Examples of PosTag


Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

            "cities such as Paris and people such as Bob Marley.");
       
        Token the = sent1.addToken(0, 3);
        expectedTokens.put(the, "The");
        the.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("PREP",Pos.Preposition), 0.85));
       
        Token stanbol = sent1.addToken(4,11);
        expectedTokens.put(stanbol, "Stanbol");
        stanbol.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("PN", Pos.ProperNoun),0.95));
        stanbol.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, Value.value(
            0.5));
       
        //use index to create Tokens
        int enhancerStart = sent1.getSpan().toString().indexOf("enhancer");
        Token enhancer = sent1.addToken(enhancerStart,enhancerStart+"enhancer".length());
        expectedTokens.put(enhancer, "enhancer");
        enhancer.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("PN", Pos.ProperNoun),0.95));
        enhancer.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("N", LexicalCategory.Noun),0.87));
        MorphoFeatures morpho = new MorphoFeatures("enhance");
        morpho.addCase(new CaseTag("test-case-1",Case.Comitative));
        morpho.addCase(new CaseTag("test-case-2",Case.Abessive));
        morpho.addDefinitness(Definitness.Definite);
        morpho.addPerson(Person.First);
        morpho.addPos(new PosTag("PN", Pos.ProperNoun));
        morpho.addGender(new GenderTag("test-gender", Gender.Masculine));
        morpho.addNumber(new NumberTag("test-number", NumberFeature.Plural));
        morpho.addTense(new TenseTag("test-tense", Tense.Present));
        morpho.addVerbForm(new VerbMoodTag("test-verb-mood", VerbMood.ConditionalVerb));
        enhancer.addAnnotation(NlpAnnotations.MORPHO_ANNOTATION, Value.value(morpho));
View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

        if(jValue.has("pos")){
            tagInfo.pos = JsonUtils.parseEnum(jValue, "pos", Pos.class);
        } else {
            tagInfo.pos = EnumSet.noneOf(Pos.class);
        }
        PosTag posTag = posTagCache.get(tagInfo);
        if(posTag == null){
            posTag = new PosTag(tagInfo.tag,tagInfo.categories,tagInfo.pos);
            posTagCache.put(tagInfo, posTag);
        }
        return posTag;
    }
View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

        this.value = value;
        this.sentence = sentence;
        this.start = token.getStart();
        this.end = token.getEnd();
        List<Value<PosTag>> tags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION);
        PosTag posTag = null;
        if(tags != null && !tags.isEmpty()){
            for(Value<PosTag> tag : tags){
                if(tag.probability() == Value.UNKNOWN_PROBABILITY ||
                        tag.probability() >= MIN_POS_CONF ||
                        !Collections.disjoint(tag.value().getCategories(),PREF_LEX_CAT)){
                    posTag = tag.value();
                    break;
                }
            }
            if(posTag == null){
                posTag = tags.get(0).value();
            }
            if(posTag.hasCategory(LexicalCategory.Noun)){
                addAbout(token); //add the token also as noun
            }
            if(posTag.hasCategory(LexicalCategory.Verb)){
                setVerb(token);
            }
        }
        this.posTag = posTag;
    }
View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

        }
        return context;
    }
    private Integer[] getSentimentContext(Integer index, Sentiment sentiment, NavigableMap<Integer,Token> verbs, NavigableMap<Integer,Token> conjunctions, NavigableMap<Integer,Token> nouns, Integer[] sectionSpan) {
        Integer[] context;
        PosTag pos = sentiment.getPosTag();
        boolean isPredicative;
        if(pos != null && pos.getPosHierarchy().contains(Pos.PredicativeAdjective)){
            isPredicative = true;
        } else if(pos != null && pos.hasCategory(LexicalCategory.Adjective) &&
                //Adjective that are not directly in front of a Noun
                nouns.get(Integer.valueOf(index+1)) == null){
          isPredicative = true;
        } else {
            isPredicative = false;
        }
        if(isPredicative){
//            Integer floorConjunction = conjunctions.floorKey(index);
//            if(floorConjunction != null && floorConjunction.compareTo(
//                Integer.valueOf(Math.max(index-conjuctionContext,sectionSpan[0]))) >= 0){
//                lowIndex = Integer.valueOf(floorConjunction-1);
//            }
//            Integer ceilingConjunction = conjunctions.ceilingKey(index);
//            if(ceilingConjunction != null && ceilingConjunction.compareTo(
//                Integer.valueOf(Math.min(index+conjuctionContext,sectionSpan[1]))) <= 0){
//                highIndex = Integer.valueOf(ceilingConjunction+1);
//            }
            //use the verb as context
            Integer floorNoun = nouns.floorKey(index);
            Entry<Integer,Token> floorVerb = verbs.floorEntry(index);
            Integer ceilingNoun = nouns.ceilingKey(index);
            Entry<Integer,Token> ceilingVerb = verbs.ceilingEntry(index);
            floorVerb = floorVerb == null || floorVerb.getKey().compareTo(sectionSpan[0]) < 0 ||
                    //do not use verbs with an noun in-between
                    (floorNoun != null && floorVerb.getKey().compareTo(floorNoun) < 0) ?
                            null : floorVerb;
            ceilingVerb = ceilingVerb == null || ceilingVerb.getKey().compareTo(sectionSpan[1]) > 0 ||
                    //do not use verbs with an noun in-between
                    (ceilingNoun != null && ceilingVerb.getKey().compareTo(ceilingNoun) > 0) ?
                            null : ceilingVerb;
            Entry<Integer,Token> verb;
            if(ceilingVerb != null && floorVerb != null){
                verb = (index - floorVerb.getKey()) < (ceilingVerb.getKey()-index) ? floorVerb : ceilingVerb;
            } else if(ceilingVerb != null){
                verb =  ceilingVerb;
            } else if(floorVerb != null){
                verb = floorVerb;
            } else { //no verb that can be used as context ... return an area around the current pos.
                verb = null;
            }
            if(verb != null){
                if(verb.getKey().compareTo(index) < 0){
                    Integer floorConjunction = conjunctions.floorKey(verb.getKey());
                    if(floorConjunction != null && floorConjunction.compareTo(
                        Integer.valueOf(Math.max(verb.getKey()-conjuctionContext,sectionSpan[0]))) >= 0){
                        //search an other verb in the same direction
                        floorVerb = verbs.floorEntry(Integer.valueOf(floorConjunction));
                        if(floorVerb != null && floorVerb.getKey().compareTo(sectionSpan[0]) >= 0 &&
                                //do not step over an noun
                                (floorNoun == null || floorVerb.getKey().compareTo(floorNoun) >= 0)){
                          verb = floorVerb;
                        }
                    }
                } else if(verb.getKey().compareTo(index) > 0){
                    Integer ceilingConjunction = conjunctions.ceilingKey(verb.getKey());
                    if(ceilingConjunction != null && ceilingConjunction.compareTo(
                        Integer.valueOf(Math.min(verb.getKey()+conjuctionContext,sectionSpan[1]))) >= 0){
                        //search an other verb in the same direction
                        ceilingVerb = verbs.floorEntry(Integer.valueOf(ceilingConjunction));
                        if(ceilingVerb != null && ceilingVerb.getKey().compareTo(sectionSpan[1]) <= 0 &&
                                //do not step over an noun
                                (ceilingNoun == null || ceilingVerb.getKey().compareTo(ceilingNoun) <= 0)){
                            verb = ceilingVerb;
                        }
                    }
                }
                context = new Integer[]{Integer.valueOf(verb.getKey()-nounContext),
                        Integer.valueOf(verb.getKey()+nounContext)};
                sentiment.setVerb(verb.getValue());
            } else {
                context = new Integer[]{Integer.valueOf(index-nounContext),
                        Integer.valueOf(index+nounContext)};
            }
        } else if(pos != null && pos.hasCategory(LexicalCategory.Adjective)){
            //for all other adjective the affected noun is expected directly
            //after the noun
            context = new Integer[]{index,Integer.valueOf(index+1)};
        } else if(pos != null && pos.hasCategory(LexicalCategory.Noun)){
            //a noun with an sentiment
            context = new Integer[]{index,index};
        } else { //else (includes pos == null) return default
            context = new Integer[]{Integer.valueOf(index-nounContext),
                    Integer.valueOf(index+nounContext)};
View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

            } else { //check PosTags if we need to lookup this word
                Iterator<Value<PosTag>> posTags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION).iterator();
                boolean ignore = false;
                while(!ignore && !process && posTags.hasNext()) {
                    Value<PosTag> value = posTags.next();
                    PosTag tag = value.value();
                    cats = classifier.getCategories(tag);
                    boolean state = cats.contains(LexicalCategory.Adjective)
                            || cats.contains(LexicalCategory.Noun);
                    ignore = !state && (value.probability() == Value.UNKNOWN_PROBABILITY ||
                            value.probability() >= minPOSConfidence);
View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

       
        //Add some Tokens with POS annotations to test the usage of
        //existing POS annotations by the lemmatizer
        Token verbrachten = at.addToken(de_verbStart,de_verbStart+de_verb.length());
        verbrachten.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("V",LexicalCategory.Verb), de_verbProb));
       
        Token schonen = at.addToken(de_adjectiveStart,de_adjectiveStart+de_adjective.length());
        schonen.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("ADJ",LexicalCategory.Adjective), de_adjectiveProb));
       
        Token urlaub = at.addToken(de_nounStart,de_nounStart+de_noun.length());
        urlaub.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("NC",LexicalCategory.Noun), de_nounProb));
       
        Assert.assertEquals("Can not enhance Test ContentItem",
            EnhancementEngine.ENHANCE_ASYNC,engine.canEnhance(ci));
        //compute the enhancements
        try {
View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

            ciFactory.createBlob(new StringSource(TEST_TEXT)));
        TEST_ANALYSED_TEXT_WO = AnalysedTextFactory.getDefaultInstance().createAnalysedText(
                ciFactory.createBlob(new StringSource(TEST_TEXT_WO)));
        initAnalyzedText(TEST_ANALYSED_TEXT);
        TEST_ANALYSED_TEXT.addChunk(0, "Dr. Patrick Marshall".length()).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
        TEST_ANALYSED_TEXT.addToken(4, 11).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
        TEST_ANALYSED_TEXT.addToken(12, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
        initAnalyzedText(TEST_ANALYSED_TEXT_WO);
        TEST_ANALYSED_TEXT_WO.addChunk(0, "Dr. Marshall Patrick".length()).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
        TEST_ANALYSED_TEXT_WO.addToken(4, 12).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
        TEST_ANALYSED_TEXT_WO.addToken(13, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
    }
View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

        at.addChunk(TEST_TEXT.indexOf("geologist"), TEST_TEXT.indexOf("geologist")+"geologist".length())
        .addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
        at.addChunk(TEST_TEXT.indexOf("the University of Otago"),
            TEST_TEXT.length()-1).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
        //add some tokens
        at.addToken(0, 2).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.Abbreviation),1d));
        at.addToken(2, 3).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(".",Pos.Point),1d));
        int start = TEST_TEXT.indexOf("(1869 - November 1950)");
        at.addToken(start,start+1).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("(",Pos.OpenBracket),1d));
        at.addToken(start+1,start+5).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NUM",Pos.Numeral),1d));
        at.addToken(start+6,start+7).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("-",Pos.Hyphen),1d));
        at.addToken(start+8,start+16).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d));
        at.addToken(start+17,start+21).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NUM",Pos.Numeral),1d));
        at.addToken(start+21,start+22).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(")",Pos.CloseBracket),1d));
       
        at.addToken(start+23, start+26).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("O",LexicalCategory.Adjective)));
        at.addToken(start+27, start+28).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("A", LexicalCategory.Adposition)));
       
        start = TEST_TEXT.indexOf("geologist");
        at.addToken(start,start+9).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d));

        at.addToken(start+10, start+13).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("O", LexicalCategory.Adjective)));
        at.addToken(start+14, start+19).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("V", LexicalCategory.Verb)));
        at.addToken(start+20, start+22).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("PP", LexicalCategory.PronounOrDeterminer)));

        start = TEST_TEXT.indexOf("New Zealand");
        at.addToken(start,start+3).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d));
        at.addToken(start+4,start+11).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
       
        //add filler Tokens for "and worked at"
        at.addToken(start+12, start+15).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("O", LexicalCategory.Adjective)));
        at.addToken(start+16, start+22).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("V", LexicalCategory.Verb)));
        at.addToken(start+23, start+25).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("PP", LexicalCategory.PronounOrDeterminer)));
       
        start = TEST_TEXT.indexOf("the University of Otago");
        at.addToken(start,start+3).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("ART",Pos.Article),1d));
        at.addToken(start+4,start+14).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d));
        at.addToken(start+15,start+17).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("OF",Pos.Preposition),1d));
        at.addToken(start+18,start+23).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
        at.addToken(start+23,start+24).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(".",Pos.Point),1d));
    }
View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

            while (tokenStream.incrementToken()){
                offset = tokenStream.addAttribute(OffsetAttribute.class);
                Token token = at.addToken(offset.startOffset(), offset.endOffset());
                //Get the POS attribute and init the PosTag
                PartOfSpeechAttribute posAttr = tokenStream.addAttribute(PartOfSpeechAttribute.class);
                PosTag posTag = POS_TAG_SET.getTag(posAttr.getPartOfSpeech());
                if(posTag == null){
                    posTag = adhocTags.get(posAttr.getPartOfSpeech());
                    if(posTag == null){
                        posTag = new PosTag(posAttr.getPartOfSpeech());
                        adhocTags.put(posAttr.getPartOfSpeech(), posTag);
                        log.warn(" ... missing PosTag mapping for {}",posAttr.getPartOfSpeech());
                    }
                }
                //Sentence detection by POS tag
                if(sentStartOffset < 0){ //the last token was a sentence ending
                  sentStartOffset = offset.startOffset();
                }
                if(posTag.hasPos(Pos.Point)) {
                    Sentence sent = at.addSentence(sentStartOffset, offset.startOffset());
                    //add the sentence as context to the NerData instances
                    while(nerSentIndex < nerList.size()){
                        nerList.get(nerSentIndex).context = sent.getSpan();
                        nerSentIndex++;
View Full Code Here

Examples of org.apache.stanbol.enhancer.nlp.pos.PosTag

        this.token = token;
        this.index = index;
        this.inChunk = chunk;
        this.hasAlphaNumeric = Utils.hasAlphaNumericChar(token.getSpan());
        this.hasSearchableLength = token.getSpan().length() >= tpc.getMinSearchTokenLength();
        PosTag selectedPosTag = null;
        boolean matchedPosTag = false; //matched any of the POS annotations
       
        //(1) check if this Token should be linked against the Vocabulary (isProcessable)
        upperCase = token.getEnd() > token.getStart() && //not an empty token
                Character.isUpperCase(token.getSpan().codePointAt(0)); //and upper case
        boolean isLinkablePos = false;
        boolean isMatchablePos = false;
        boolean isSubSentenceStart = false;
        List<Value<PosTag>> posAnnotations = token.getAnnotations(POS_ANNOTATION);
        for(Value<PosTag> posAnnotation : posAnnotations){
            // check three possible match
            //  1. the LexicalCategory matches
            //  2. the Pos matches
            //  3. the String tag matches
            PosTag posTag = posAnnotation.value();
            if((!disjoint(tpc.getLinkedLexicalCategories(), posTag.getCategories())) ||
                    (!disjoint(tpc.getLinkedPos(), posTag.getPosHierarchy())) ||
                    tpc.getLinkedPosTags().contains(posTag.getTag())){
                if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY ||
                        posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){
                    selectedPosTag = posTag;
                    isLinkablePos = true;
                    isMatchablePos = true;
                    matchedPosTag = true;
                    break;
                } // else probability to low for inclusion
            } else if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY ||
                    posAnnotation.probability() >= tpc.getMinExcludePosAnnotationProbability()){
                selectedPosTag = posTag; //also rejected PosTags are selected
                matchedPosTag = true;
                isLinkablePos = false;
                break;
            } // else probability to low for exclusion
        }
        if(!matchedPosTag) { //not matched against a POS Tag ...
            this.isLinkablePos = null;
        } else {
            this.isLinkablePos = isLinkablePos;
        }
       
        //(2) check if this token should be considered to match labels of suggestions
        if(this.isLinkablePos != null && this.isLinkablePos){ //processable tokens are also matchable
            this.isMatchablePos = true;
        } else { //check POS and length to see if token is matchable
            matchedPosTag = false; //reset to false!
            for(Value<PosTag> posAnnotation : posAnnotations){
                PosTag posTag = posAnnotation.value();
                if(posTag.isMapped()){
                    if(!Collections.disjoint(tpc.getMatchedLexicalCategories(),
                        posTag.getCategories())){
                        if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY ||
                                posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){
                            //override selectedPosTag if present
                            selectedPosTag = posTag; //mark the matchable as selected PosTag
                            isMatchablePos = true;
                            matchedPosTag = true;
                            break;
                        } // else probability to low for inclusion
                    } else if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY ||
                            posAnnotation.probability() >= tpc.getMinExcludePosAnnotationProbability()){
                        if(selectedPosTag == null){ //do not override existing values
                            selectedPosTag = posTag; //also rejected PosTags are selected
                        }
                        isMatchablePos = false;
                        matchedPosTag = true;
                        break;
                    } // else probability to low for exclusion
                } //else not matched ... search next one
            }
            if(!matchedPosTag){ //not matched against POS tag ...
                //fall back to the token length
                this.isMatchablePos = null;
                //this.isMatchablePos = token.getSpan().length() >= tpc.getMinSearchTokenLength();   
            } else {
                this.isMatchablePos = isMatchablePos;
            }
        }
        //(3) check if the POS tag indicates the start/end of an sub-sentence
        for(Value<PosTag> posAnnotation : posAnnotations){
            PosTag posTag = posAnnotation.value();
            if((!disjoint(ProcessingState.SUB_SENTENCE_START_POS,posTag.getPosHierarchy()))){
                if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY ||
                        posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){
                    isSubSentenceStart = true;
                } // else probability to low for inclusion
            } else if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY ||
                    posAnnotation.probability() >= tpc.getMinExcludePosAnnotationProbability()){
                isSubSentenceStart = false;
            }
        }
        this.isSubSentenceStart = isSubSentenceStart;
       
        //(4) check for morpho analyses
        if(selectedPosTag == null){ //token is not processable or matchable
            //we need to set the selectedPoas tag to the first POS annotation
            Value<PosTag> posAnnotation = token.getAnnotation(POS_ANNOTATION);
            if(posAnnotation != null) {
                selectedPosTag = posAnnotation.value();
            }
        }
        List<Value<MorphoFeatures>> morphoAnnotations = token.getAnnotations(NlpAnnotations.MORPHO_ANNOTATION);
        if(selectedPosTag == null){ //no POS information ... use the first morpho annotation
            morpho = morphoAnnotations.isEmpty() ? null : morphoAnnotations.get(0).value();
        } else { //select the correct morpho annotation based on the POS tag
            MorphoFeatures mf = null;
            selectMorphoFeature :
            for(Value<MorphoFeatures> morphoAnnotation : morphoAnnotations){
                for(PosTag posTag : morphoAnnotation.value().getPosList()){
                    if(!disjoint(selectedPosTag.getCategories(),posTag.getCategories())){
                        mf = morphoAnnotation.value();
                        break selectMorphoFeature; //stop after finding the first one
                    }
                }
            }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.