Examples of PosTag

org.apache.stanbol.enhancer.nlp.pos.PosTag
An POS (part-of-speech) tag typically assigned by an POS-Tagger (an NLP component) to a {@link Token} by using the {@link POS#POS_ANNOTATION}
The only required field is {@link #getTag()} - the string tag assigned bythe POS Tagger.
PosTags can be mapped to a {@link LexicalCategory} and be part of an{@link TagSet}. NOTE that the {@link TagSet} is set bythe {@link TagSet#addTag(PosTag)} method.
seekfeel.utilities.stanfordwrapper.PosTag

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

        if(jValue.has("pos")){
            tagInfo.pos = JsonUtils.parseEnum(jValue, "pos", Pos.class);
        } else {
            tagInfo.pos = EnumSet.noneOf(Pos.class);
        }
        PosTag posTag = posTagCache.get(tagInfo);
        if(posTag == null){
            posTag = new PosTag(tagInfo.tag,tagInfo.categories,tagInfo.pos);
            posTagCache.put(tagInfo, posTag);
        }
        return posTag;
    }

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

            "cities such as Paris and people such as Bob Marley.");
        
        Token the = sent1.addToken(0, 3);
        expectedTokens.put(the, "The");
        the.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("PREP",Pos.Preposition), 0.85));
        
        Token stanbol = sent1.addToken(4,11);
        expectedTokens.put(stanbol, "Stanbol");
        stanbol.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("PN", Pos.ProperNoun),0.95));
        stanbol.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, Value.value(
            0.5));
        
        //use index to create Tokens
        int enhancerStart = sent1.getSpan().toString().indexOf("enhancer");
        Token enhancer = sent1.addToken(enhancerStart,enhancerStart+"enhancer".length());
        expectedTokens.put(enhancer, "enhancer");
        enhancer.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("PN", Pos.ProperNoun),0.95));
        enhancer.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("N", LexicalCategory.Noun),0.87));
        MorphoFeatures morpho = new MorphoFeatures("enhance");
        morpho.addCase(new CaseTag("test-case-1",Case.Comitative));
        morpho.addCase(new CaseTag("test-case-2",Case.Abessive));
        morpho.addDefinitness(Definitness.Definite);
        morpho.addPerson(Person.First);
        morpho.addPos(new PosTag("PN", Pos.ProperNoun));
        morpho.addGender(new GenderTag("test-gender", Gender.Masculine));
        morpho.addNumber(new NumberTag("test-number", NumberFeature.Plural));
        morpho.addTense(new TenseTag("test-tense", Tense.Present));
        morpho.addVerbForm(new VerbMoodTag("test-verb-mood", VerbMood.ConditionalVerb));
        enhancer.addAnnotation(NlpAnnotations.MORPHO_ANNOTATION, Value.value(morpho));

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

        
        //Add some Tokens with POS annotations to test the usage of
        //existing POS annotations by the lemmatizer
        Token verbrachten = at.addToken(de_verbStart,de_verbStart+de_verb.length());
        verbrachten.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("V",LexicalCategory.Verb), de_verbProb));
        
        Token schonen = at.addToken(de_adjectiveStart,de_adjectiveStart+de_adjective.length()); 
        schonen.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("ADJ",LexicalCategory.Adjective), de_adjectiveProb));
        
        Token urlaub = at.addToken(de_nounStart,de_nounStart+de_noun.length()); 
        urlaub.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("NC",LexicalCategory.Noun), de_nounProb));
        
        Assert.assertEquals("Can not enhance Test ContentItem",
            EnhancementEngine.ENHANCE_ASYNC,engine.canEnhance(ci));
        //compute the enhancements
        try {

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

            if(!process){ //check POS types
                Iterator<Value<PosTag>> posTags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION).iterator();
                boolean ignore = false;
                while(!ignore && !process && posTags.hasNext()) {
                    Value<PosTag> value = posTags.next();
                    PosTag tag = value.value();
                    boolean state = classifier.isAdjective(tag) || classifier.isNoun(tag);
                    ignore = !state && value.probability() >= minPOSConfidence;
                    process = state && value.probability() >= (minPOSConfidence/2.0);
                }
            } //else process all tokens ... no POS tag checking needed

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

            this.token = token;
            this.index = index;
            this.inChunk = chunk;
            this.hasAlphaNumeric = Utils.hasAlphaNumericChar(token.getSpan());


            PosTag selectedPosTag = null;
            boolean matchedPosTag = false; //matched any of the POS annotations
            
            //(1) check if this Token should be linked against the Vocabulary (isProcessable)
            boolean upperCase = index > 0 && //not a sentence start
                    token.getEnd() > token.getStart() && //not an empty token
                    Character.isUpperCase(token.getSpan().codePointAt(0)); //and upper case
            if(tpc.isLinkUpperCaseTokens() && upperCase){
                isProcessable = true;
            } else { //else use POS tag & token length
                for(Value<PosTag> posAnnotation : token.getAnnotations(POS_ANNOTATION)){
                    // check three possible match
                    //  1. the LexicalCategory matches
                    //  2. the Pos matches
                    //  3. the String tag matches
                    PosTag posTag = posAnnotation.value();
                    if((!disjoint(tpc.getLinkedLexicalCategories(), posTag.getCategories())) ||
                            (!disjoint(tpc.getLinkedPos(), posTag.getPosHierarchy())) ||
                            tpc.getLinkedPosTags().contains(posTag.getTag())){
                        if(posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){
                            selectedPosTag = posTag;
                            isProcessable = true;
                            matchedPosTag = true;
                            break;
                        } // else probability to low for inclusion
                    } else if(posAnnotation.probability() >= tpc.getMinExcludePosAnnotationProbability()){
                        selectedPosTag = posTag; //also rejected PosTags are selected
                        matchedPosTag = true;
                        isProcessable = false;
                        break;
                    } // else probability to low for exclusion
                }
                if(!matchedPosTag) { //not matched against a POS Tag ...
                    // ... fall back to the token length
                    isProcessable = token.getSpan().length() >= elc.getMinSearchTokenLength();
                }
            }
            
            //(2) check if this token should be considered to match labels of suggestions
            if(isProcessable){ //processable tokens are also matchable
                isMatchable = true;
            } else if(tpc.isMatchUpperCaseTokens() && upperCase){
                //match upper case tokens regardless of POS and length
                isMatchable = true;
            } else { //check POS and length to see if token is matchable
                matchedPosTag = false; //reset to false!
                for(Value<PosTag> posAnnotation : token.getAnnotations(POS_ANNOTATION)){
                    PosTag posTag = posAnnotation.value();
                    if(posTag.isMapped()){
                        if(!Collections.disjoint(tpc.getMatchedLexicalCategories(), 
                            posTag.getCategories())){
                            if(posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){
                                //override selectedPosTag if present
                                selectedPosTag = posTag; //mark the matchable as selected PosTag
                                isMatchable = true;
                                matchedPosTag = true;
                                break;
                            } // else probability to low for inclusion
                        } else if(posAnnotation.probability() >= tpc.getMinExcludePosAnnotationProbability()){
                            if(selectedPosTag == null){ //do not override existing values
                                selectedPosTag = posTag; //also rejected PosTags are selected
                            }
                            isMatchable = false;
                            matchedPosTag = true;
                            break;
                        } // else probability to low for exclusion
                    } //else not matched ... search next one
                }
                if(!matchedPosTag){ //not matched against POS tag ...
                    //fall back to the token length
                    isMatchable = token.getSpan().length() >= elc.getMinSearchTokenLength();    
                }
            }
            
            //(3) check for morpho analyses
            if(selectedPosTag == null){ //token is not processable or matchable
                //we need to set the selectedPoas tag to the first POS annotation
                Value<PosTag> posAnnotation = token.getAnnotation(POS_ANNOTATION);
                if(posAnnotation != null) {
                    selectedPosTag = posAnnotation.value();
                }
            }
            List<Value<MorphoFeatures>> morphoAnnotations = token.getAnnotations(NlpAnnotations.MORPHO_ANNOTATION);
            if(selectedPosTag == null){ //no POS information ... use the first morpho annotation
                morpho = morphoAnnotations.isEmpty() ? null : morphoAnnotations.get(0).value();
            } else { //select the correct morpho annotation based on the POS tag
                MorphoFeatures mf = null;
                selectMorphoFeature : 
                for(Value<MorphoFeatures> morphoAnnotation : morphoAnnotations){
                    for(PosTag posTag : morphoAnnotation.value().getPosList()){
                        if(!disjoint(selectedPosTag.getCategories(),posTag.getCategories())){
                            mf = morphoAnnotation.value();
                            break selectMorphoFeature; //stop after finding the first one
                        }
                    }
                }

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

        }


    }


    private PosTag getPosTag(TagSet<PosTag> model, Map<String,PosTag> adhocTags, String tag, String language) {
        PosTag posTag = model.getTag(tag);
        if(posTag != null){
            return posTag;
        }
        posTag = adhocTags.get(tag);
        if(posTag != null){
            return posTag;
        }
        posTag = new PosTag(tag);
        adhocTags.put(tag, posTag);
        log.info("Encountered umapped POS tag '{}' for langauge '{}'",tag,language);
        return posTag;
    }

View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

        TEST_ANALYSED_TEXT.addChunk(TEST_TEXT.indexOf("geologist"), TEST_TEXT.indexOf("geologist")+"geologist".length())
        .addAnnotation(PHRASE_ANNOTATION, nounPhrase);
        TEST_ANALYSED_TEXT.addChunk(TEST_TEXT.indexOf("the University of Otago"), 
            TEST_TEXT.length()-1).addAnnotation(PHRASE_ANNOTATION, nounPhrase);
        //add some tokens
        TEST_ANALYSED_TEXT.addToken(0, 2).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.Abbreviation),1d));
        TEST_ANALYSED_TEXT.addToken(2, 3).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(".",Pos.Point),1d));
        TEST_ANALYSED_TEXT.addToken(4, 11).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
        TEST_ANALYSED_TEXT.addToken(12, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
        int start = TEST_TEXT.indexOf("(1869 - November 1950)");
        TEST_ANALYSED_TEXT.addToken(start,start+1).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("(",Pos.OpenBracket),1d));
        TEST_ANALYSED_TEXT.addToken(start+1,start+5).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NUM",Pos.Numeral),1d));
        TEST_ANALYSED_TEXT.addToken(start+6,start+7).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("-",Pos.Hyphen),1d));
        TEST_ANALYSED_TEXT.addToken(start+8,start+16).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d));
        TEST_ANALYSED_TEXT.addToken(start+17,start+21).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NUM",Pos.Numeral),1d));
        TEST_ANALYSED_TEXT.addToken(start+21,start+22).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(")",Pos.CloseBracket),1d));
                
        start = TEST_TEXT.indexOf("geologist");
        TEST_ANALYSED_TEXT.addToken(start,start+9).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d));
        
        start = TEST_TEXT.indexOf("New Zealand");
        TEST_ANALYSED_TEXT.addToken(start,start+3).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d));
        TEST_ANALYSED_TEXT.addToken(start+4,start+11).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
        
        start = TEST_TEXT.indexOf("the University of Otago");
        TEST_ANALYSED_TEXT.addToken(start,start+3).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("ART",Pos.Article),1d));
        TEST_ANALYSED_TEXT.addToken(start+4,start+14).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d));
        TEST_ANALYSED_TEXT.addToken(start+15,start+17).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("OF",LexicalCategory.PronounOrDeterminer),1d));
        TEST_ANALYSED_TEXT.addToken(start+18,start+23).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
        TEST_ANALYSED_TEXT.addToken(start+23,start+24).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(".",Pos.Point),1d));
        
    }

View Full Code Here

Examples of seekfeel.utilities.stanfordwrapper.PosTag

        }*/
        return totalScore;
    }


    public Double getWordScore(WordPos taggedWord, ArrayList<Word> originalWords) {
        PosTag pTag = taggedWord.getWordTag();
        String tempWord;
        pTag = taggedWord.getWordTag();
        tempWord = originalWords.get(taggedWord.getWordIndex()).word();
        // Check if the Word is in the subjectivity clues
     /*   Double clueWeight = SubjectiveCluesHolder.extractWeight(tempWord, TagParser.parseTagToGeneral(pTag));

View Full Code Here

0 1

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.