Package org.apache.stanbol.enhancer.nlp.model

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText.addToken()


        TokenStream tokens = new WordTokenFilter(new AnalyzedTextSentenceTokenizer(at));
        try {
          tokens.reset();
            while(tokens.incrementToken()){
                OffsetAttribute offset = tokens.addAttribute(OffsetAttribute.class);
                Token t = at.addToken(offset.startOffset(), offset.endOffset());
                log.trace("detected {}",t);
            }
        } catch (IOException e) {
            String message = String.format("IOException while reading from "
                +"CharSequenceReader of AnalyzedText for ContentItem %s",ci.getUri());
View Full Code Here


                continue; //ignore terms without readings
            }
            //Add the LexicalEntry as Token to the Text. NOTE that if a
            //Token with the same start/end positions already exist this
            //Method returns the existing instance
            Token token = at.addToken(term.getFrom(), term.getTo());
            //Now try to get POS annotations for the Token
            for(Value<PosTag> posAnno : token.getAnnotations(NlpAnnotations.POS_ANNOTATION)){
                if(posAnno.value().isMapped()){
                    for(LexicalCategory cat :posAnno.value().getCategories()){
                        if(!tokenLexCats.containsKey(cat)){ //do not override with lover prob
View Full Code Here

        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("de")));
        Assert.assertEquals("de", EnhancementEngineHelper.getLanguage(ci));
       
        //Add some Tokens with POS annotations to test the usage of
        //existing POS annotations by the lemmatizer
        Token verbrachten = at.addToken(de_verbStart,de_verbStart+de_verb.length());
        verbrachten.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("V",LexicalCategory.Verb), de_verbProb));
       
        Token schonen = at.addToken(de_adjectiveStart,de_adjectiveStart+de_adjective.length());
        schonen.addAnnotation(POS_ANNOTATION, Value.value(
View Full Code Here

        //existing POS annotations by the lemmatizer
        Token verbrachten = at.addToken(de_verbStart,de_verbStart+de_verb.length());
        verbrachten.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("V",LexicalCategory.Verb), de_verbProb));
       
        Token schonen = at.addToken(de_adjectiveStart,de_adjectiveStart+de_adjective.length());
        schonen.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("ADJ",LexicalCategory.Adjective), de_adjectiveProb));
       
        Token urlaub = at.addToken(de_nounStart,de_nounStart+de_noun.length());
        urlaub.addAnnotation(POS_ANNOTATION, Value.value(
View Full Code Here

       
        Token schonen = at.addToken(de_adjectiveStart,de_adjectiveStart+de_adjective.length());
        schonen.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("ADJ",LexicalCategory.Adjective), de_adjectiveProb));
       
        Token urlaub = at.addToken(de_nounStart,de_nounStart+de_noun.length());
        urlaub.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("NC",LexicalCategory.Noun), de_nounProb));
       
        Assert.assertEquals("Can not enhance Test ContentItem",
            EnhancementEngine.ENHANCE_ASYNC,engine.canEnhance(ci));
View Full Code Here

        }
       
        for(SentimentExpression se : seList){
            //Add the Sentiment Expression as Token to the Text. NOTE that if a Token with the same start/end positions already exist this
            //Method returns the existing instance
            Token token = at.addToken(se.getStartSnippet(),se.getEndSnippet());
            token.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, new Value<Double>(se.getSentimentPolarityAsDoubleValue()) );
        }
    }
   
    @Override
View Full Code Here

        OffsetAttribute offset = null;
        try {
          tokenStream.reset(); //required with Solr 4
            while (tokenStream.incrementToken()){
                offset = tokenStream.addAttribute(OffsetAttribute.class);
                Token token = at.addToken(offset.startOffset(), offset.endOffset());
                //Get the POS attribute and init the PosTag
                PartOfSpeechAttribute posAttr = tokenStream.addAttribute(PartOfSpeechAttribute.class);
                PosTag posTag = POS_TAG_SET.getTag(posAttr.getPartOfSpeech());
                if(posTag == null){
                    posTag = adhocTags.get(posAttr.getPartOfSpeech());
View Full Code Here

        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("de")));
        Assert.assertEquals("de", EnhancementEngineHelper.getLanguage(ci));
       
        //Add some Tokens with POS annotations to test the usage of
        //existing POS annotations by the lemmatizer
        Token verbrachten = at.addToken(de_verbStart,de_verbStart+de_verb.length());
        verbrachten.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("V",LexicalCategory.Verb), de_verbProb));
       
        Token schonen = at.addToken(de_adjectiveStart,de_adjectiveStart+de_adjective.length());
        schonen.addAnnotation(POS_ANNOTATION, Value.value(
View Full Code Here

        //existing POS annotations by the lemmatizer
        Token verbrachten = at.addToken(de_verbStart,de_verbStart+de_verb.length());
        verbrachten.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("V",LexicalCategory.Verb), de_verbProb));
       
        Token schonen = at.addToken(de_adjectiveStart,de_adjectiveStart+de_adjective.length());
        schonen.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("ADJ",LexicalCategory.Adjective), de_adjectiveProb));
       
        Token urlaub = at.addToken(de_nounStart,de_nounStart+de_noun.length());
        urlaub.addAnnotation(POS_ANNOTATION, Value.value(
View Full Code Here

       
        Token schonen = at.addToken(de_adjectiveStart,de_adjectiveStart+de_adjective.length());
        schonen.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("ADJ",LexicalCategory.Adjective), de_adjectiveProb));
       
        Token urlaub = at.addToken(de_nounStart,de_nounStart+de_noun.length());
        urlaub.addAnnotation(POS_ANNOTATION, Value.value(
            new PosTag("NC",LexicalCategory.Noun), de_nounProb));
       
        Assert.assertEquals("Can not enhance Test ContentItem",
            EnhancementEngine.ENHANCE_ASYNC,engine.canEnhance(ci));
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.