Package org.apache.stanbol.enhancer.nlp.model

Examples of org.apache.stanbol.enhancer.nlp.model.AnalysedText.addToken()


                continue; //ignore terms without readings
            }
            //Add the LexicalEntry as Token to the Text. NOTE that if a
            //Token with the same start/end positions already exist this
            //Method returns the existing instance
            Token token = at.addToken(term.getFrom(), term.getTo());
            //Now try to get POS annotations for the Token
            for(Value<PosTag> posAnno : token.getAnnotations(NlpAnnotations.POS_ANNOTATION)){
                if(posAnno.value().isMapped()){
                    for(LexicalCategory cat :posAnno.value().getCategories()){
                        if(!tokenLexCats.containsKey(cat)){ //do not override with lover prob
View Full Code Here


        }
       
        for(SentimentExpression se : seList){
            //Add the Sentiment Expression as Token to the Text. NOTE that if a Token with the same start/end positions already exist this
            //Method returns the existing instance
            Token token = at.addToken(se.getStartSnippet(),se.getEndSnippet());
            token.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, new Value<Double>(se.getSentimentPolarityAsDoubleValue()) );
        }
    }
   
    @Override
View Full Code Here

        //now the tokens
        TokenStream tokens = new WordTokenFilter(new AnalyzedTextSentenceTokenizer(at));
        try {
            while(tokens.incrementToken()){
                OffsetAttribute offset = tokens.addAttribute(OffsetAttribute.class);
                Token t = at.addToken(offset.startOffset(), offset.endOffset());
                log.trace("detected {}",t);
            }
        } catch (IOException e) {
            String message = String.format("IOException while reading from "
                +"CharSequenceReader of AnalyzedText for ContentItem %s",ci.getUri());
View Full Code Here

        try {
            while(ts.incrementToken()){
                OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
                //when tokenizing labels we need to preserve all chars
                if(offset.startOffset() > lastEnd){ //add token for stopword
                    at.addToken(lastEnd,offset.startOffset());
                }
                at.addToken(offset.startOffset(), offset.endOffset());
                lastEnd = offset.endOffset();
            }
        } catch (IOException e) {
View Full Code Here

                OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
                //when tokenizing labels we need to preserve all chars
                if(offset.startOffset() > lastEnd){ //add token for stopword
                    at.addToken(lastEnd,offset.startOffset());
                }
                at.addToken(offset.startOffset(), offset.endOffset());
                lastEnd = offset.endOffset();
            }
        } catch (IOException e) {
            log.warn("IOException while reading the parsed Text",e);
            throw new EngineException("IOException while reading the parsed Text",e);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.