Examples of org.apache.stanbol.enhancer.nlp.model.Sentence

Package org.apache.stanbol.enhancer.nlp.model

Examples of org.apache.stanbol.enhancer.nlp.model.Sentence

org.apache.stanbol.enhancer.nlp.model.Sentence

            //first the sentences
            TokenStream sentences = new SentenceTokenizer(new CharSequenceReader(at.getText()));
            try {
                while(sentences.incrementToken()){
                    OffsetAttribute offset = sentences.addAttribute(OffsetAttribute.class);
                    Sentence s = at.addSentence(offset.startOffset(), offset.endOffset());
                    if(log.isTraceEnabled()) {
                        log.trace("detected {}:{}",s,s.getSpan());
                    }
                }
            } catch (IOException e) {
                String message = String.format("IOException while reading from "
                    +"CharSequenceReader of AnalyzedText for ContentItem %s",ci.getUri());

View Full Code Here

        TokenStream sentences = new SentenceTokenizer(new CharSequenceReader(at.getText()));
        try {
          sentences.reset();
            while(sentences.incrementToken()){
                OffsetAttribute offset = sentences.addAttribute(OffsetAttribute.class);
                Sentence s = at.addSentence(offset.startOffset(), offset.endOffset());
                if(log.isTraceEnabled()) {
                    log.trace("detected {}:{}",s,s.getSpan());
                }
            }
        } catch (IOException e) {
            String message = String.format("IOException while reading from "
                +"CharSequenceReader of AnalyzedText for ContentItem %s",ci.getUri());

View Full Code Here

    public static final void setup() throws IOException {
        ci = ciFactory.createContentItem(new StringSource(text));
        textBlob = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
        analysedTextWithData = createAnalysedText();
        int sentence = text.indexOf('.')+1;
        Sentence sent1 = analysedTextWithData.addSentence(0, sentence);
        expectedSentences.put(sent1, "The Stanbol enhancer can detect famous " +
            "cities such as Paris and people such as Bob Marley.");
        
        Token the = sent1.addToken(0, 3);
        expectedTokens.put(the, "The");
        the.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("PREP",Pos.Preposition), 0.85));
        
        Token stanbol = sent1.addToken(4,11);
        expectedTokens.put(stanbol, "Stanbol");
        stanbol.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("PN", Pos.ProperNoun),0.95));
        stanbol.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, Value.value(
            0.5));
        
        //use index to create Tokens
        int enhancerStart = sent1.getSpan().toString().indexOf("enhancer");
        Token enhancer = sent1.addToken(enhancerStart,enhancerStart+"enhancer".length());
        expectedTokens.put(enhancer, "enhancer");
        enhancer.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("PN", Pos.ProperNoun),0.95));
        enhancer.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(
            new PosTag("N", LexicalCategory.Noun),0.87));

View Full Code Here

    AnalysedText parsedAt = getParsedAnalysedText(serialized);
    assertAnalysedTextEquality(parsedAt);
  }
  
  private static void initDepTreeAnnotations() {
    Sentence sentence = at.addSentence(0, text.indexOf(".") + 1);
        Token obama = sentence.addToken(0, "Obama".length());
        
        int visitedStartIdx = sentence.getSpan().toString().indexOf("visited");
        Token visited = sentence.addToken(visitedStartIdx, visitedStartIdx + "visited".length());
        
        int chinaStartIdx = sentence.getSpan().toString().indexOf("China");
        Token china = sentence.addToken(chinaStartIdx, chinaStartIdx + "China".length());
        
        GrammaticalRelationTag nSubjGrammRelTag = new GrammaticalRelationTag(
                "nsubj", GrammaticalRelation.NominalSubject);
        obama.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION, 
          Value.value(new DependencyRelation(nSubjGrammRelTag, true, visited)));

View Full Code Here

    AnalysedText parsedAt = getParsedAnalysedText(serialized);
    assertAnalysedTextEquality(parsedAt);
  }
  
  private static void initCorefAnnotations() {
    Sentence sentence1 = at.addSentence(0, sentenceText1.indexOf(".") + 1);
        Token obama = sentence1.addToken(0, "Obama".length());
        
        Sentence sentence2 = at.addSentence(sentenceText1.indexOf(".") + 2, sentenceText2.indexOf(".") + 1);
        int heStartIdx = sentence2.getSpan().toString().indexOf("He");
        Token he = sentence2.addToken(heStartIdx, heStartIdx + "He".length());
        
        Set<Span> obamaMentions = new HashSet<Span>();
        obamaMentions.add(he);
        obama.addAnnotation(NlpAnnotations.COREF_ANNOTATION, 
          Value.value(new CorefFeature(true, obamaMentions)));

View Full Code Here

        NavigableMap<Integer,Token> nounsAndPronouns = new TreeMap<Integer,Token>();
        NavigableMap<Integer,Token> verbs = new TreeMap<Integer,Token>();
        NavigableMap<Integer,Token> conjuctions = new TreeMap<Integer,Token>();
        NavigableMap<Integer,Token> sectionBorders = new TreeMap<Integer,Token>();
        boolean firstTokenInSentence = true;
        Sentence sentence = null;
        final List<SentimentPhrase> sentimentPhrases = new ArrayList<SentimentPhrase>();
        while(tokenIt.hasNext()){
            Span span = tokenIt.next();
            switch (span.getType()) {
                case Token:
                    Token word = (Token)span;
                    Integer wordIndex = sentimentTokens.size();
                    Value<Double> sentimentAnnotation = span.getAnnotation(SENTIMENT_ANNOTATION);
                    boolean addToList = false;
                    Sentiment sentiment = null;
                    if(sentimentAnnotation != null && sentimentAnnotation.value() != null &&
                            !sentimentAnnotation.value().equals(ZERO)){
                        sentiment = new Sentiment(word, sentimentAnnotation.value(),
                            sentence == null || word.getEnd() > sentence.getEnd() ?
                                    null : sentence);
                        addToList = true;
                    }
                    if(isNegation((Token)span, language)){
                        addToList = true;

View Full Code Here

    
    
    private void writeSentimentEnhancements(ContentItem ci, List<SentimentPhrase> sentimentPhrases, AnalysedText at, Language lang) {
        // TODO Auto-generated method stub
        MGraph metadata = ci.getMetadata();
        Sentence currentSentence = null;
        final List<SentimentPhrase> sentencePhrases = new ArrayList<SentimentPhrase>();
        for(SentimentPhrase sentPhrase : sentimentPhrases){
            Sentence sentence = sentPhrase.getSentence();
            if(log.isDebugEnabled()){ //debug sentiment info
                CharSequence phraseText = at.getText().subSequence(sentPhrase.getStartIndex(), sentPhrase.getEndIndex());
                log.debug("Write SentimentPhrase for {} (sentence: {})", phraseText,
                    sentence == null ? "none" : sentence.getSpan().length() > 17 ? (sentence.getSpan().subSequence(0,17) + "...") : sentence.getSpan());
                List<Sentiment> sentiments = sentPhrase.getSentiments();
                log.debug(" > {} Sentiments:",sentiments.size());
                for(int i = 0; i < sentiments.size(); i++){
                    log.debug("    {}. {}",i+1,sentiments.get(i));
                }
            }
            if(writeSentimentPhrases){
                UriRef enh = createTextEnhancement(ci, this);
                String phraseText = at.getSpan().substring(sentPhrase.getStartIndex(), sentPhrase.getEndIndex());
                metadata.add(new TripleImpl(enh, ENHANCER_SELECTED_TEXT, 
                    new PlainLiteralImpl(phraseText, lang)));
                if(sentPhrase.getSentence() == null){
                    metadata.add(new TripleImpl(enh, ENHANCER_SELECTION_CONTEXT, 
                        new PlainLiteralImpl(getSelectionContext(
                            at.getSpan(), phraseText, sentPhrase.getStartIndex()),lang)));
                } else {
                    metadata.add(new TripleImpl(enh, ENHANCER_SELECTION_CONTEXT, 
                        new PlainLiteralImpl(sentPhrase.getSentence().getSpan(),lang)));
                }
                metadata.add(new TripleImpl(enh, ENHANCER_START, 
                    lf.createTypedLiteral(sentPhrase.getStartIndex())));
                metadata.add(new TripleImpl(enh, ENHANCER_END, 
                    lf.createTypedLiteral(sentPhrase.getEndIndex())));
                if(sentPhrase.getPositiveSentiment() != null){
                    metadata.add(new TripleImpl(enh, POSITIVE_SENTIMENT_PROPERTY, 
                        lf.createTypedLiteral(sentPhrase.getPositiveSentiment())));
                }
                if(sentPhrase.getNegativeSentiment() != null){
                    metadata.add(new TripleImpl(enh, NEGATIVE_SENTIMENT_PROPERTY, 
                        lf.createTypedLiteral(sentPhrase.getNegativeSentiment())));
                }
                metadata.add(new TripleImpl(enh, SENTIMENT_PROPERTY, 
                    lf.createTypedLiteral(sentPhrase.getSentiment())));               
                //add the Sentiment type as well as the type of the SSO Ontology
                metadata.add(new TripleImpl(enh, DC_TYPE, SENTIMENT_TYPE));
                UriRef ssoType = NIFHelper.SPAN_TYPE_TO_SSO_TYPE.get(SpanTypeEnum.Chunk);
                if(ssoType != null){
                    metadata.add(new TripleImpl(enh, DC_TYPE, ssoType));
                }
            }
            if(writeSentencesSentimet && sentence != null){
                if(sentence.equals(currentSentence)){
                    sentencePhrases.add(sentPhrase);
                } else {
                    writeSentiment(ci, currentSentence,sentencePhrases);
                    //reset
                    currentSentence = sentence;

View Full Code Here

        String language = getLanguage(this, ci, true);
        SentenceDetector sentenceDetector = getSentenceDetector(language);
        if(sentenceDetector != null){
            for(opennlp.tools.util.Span sentSpan : sentenceDetector.sentPosDetect(at.getSpan())) {
                //detect sentences and add it to the AnalyzedText.
                Sentence sentence = at.addSentence(sentSpan.getStart(), sentSpan.getEnd());
                log.trace(" > add {}",sentence);
            }
        } else {
            log.warn("SentenceDetector model for language {} is no longer available. "
                + "This might happen if the model becomes unavailable during enhancement. "

View Full Code Here

                throw RuntimeException.class.cast(e);
            }
        }
        if(writeTextAnnotations){
            Iterator<Span> spans = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence,SpanTypeEnum.Chunk));
            Sentence context = null;
            MGraph metadata = ci.getMetadata();
            Language lang = new Language(language);
            LiteralFactory lf = LiteralFactory.getInstance();
            ci.getLock().writeLock().lock();
            try { //write TextAnnotations for Named Entities
                while(spans.hasNext()){
                    Span span = spans.next();
                    switch (span.getType()) {
                        case Sentence:
                            context = (Sentence)context;
                            break;
                        default:
                            Value<NerTag> nerAnno = span.getAnnotation(NER_ANNOTATION);
                            if(nerAnno != null){
                                UriRef ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
                                //add span related data
                                metadata.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT, 
                                    new PlainLiteralImpl(span.getSpan(), lang)));
                                metadata.add(new TripleImpl(ta, ENHANCER_START, 
                                    lf.createTypedLiteral(span.getStart())));
                                metadata.add(new TripleImpl(ta, ENHANCER_END, 
                                    lf.createTypedLiteral(span.getEnd())));
                                metadata.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT, 
                                    new PlainLiteralImpl(context == null ?
                                            getDefaultSelectionContext(at.getSpan(), span.getSpan(), span.getStart()) :
                                                context.getSpan(), lang)));
                                //add the NER type
                                if(nerAnno.value().getType() != null){
                                    metadata.add(new TripleImpl(ta,DC_TYPE,nerAnno.value().getType()));
                                }
                                if(nerAnno.probability() >= 0) {

View Full Code Here

                //Sentence detection by POS tag
                if(sentStartOffset < 0){ //the last token was a sentence ending
                  sentStartOffset = offset.startOffset();
                }
                if(posTag.hasPos(Pos.Point)) { 
                    Sentence sent = at.addSentence(sentStartOffset, offset.startOffset());
                    //add the sentence as context to the NerData instances
                    while(nerSentIndex < nerList.size()){
                        nerList.get(nerSentIndex).context = sent.getSpan();
                        nerSentIndex++;
                    }
                    sentStartOffset = -1;
                }
                //POS
                token.addAnnotation(POS_ANNOTATION, Value.value(posTag));
                //NER
                NerTag nerTag = NER_TAG_SET.getTag(posAttr.getPartOfSpeech());
                if(ner != null && (nerTag == null || !ner.tag.getType().equals(nerTag.getType()))){
                    //write NER annotation
                    Chunk chunk = at.addChunk(ner.start, ner.end);
                    chunk.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(ner.tag));
                    //NOTE that the fise:TextAnnotation are written later based on the nerList
                    //clean up
                    ner = null;
                }
                if(nerTag != null){
                    if(ner == null){
                        ner = new NerData(nerTag, offset.startOffset());
                        nerList.add(ner);
                    }
                    ner.end = offset.endOffset();
                }
                BaseFormAttribute baseFormAttr = tokenStream.addAttribute(BaseFormAttribute.class);
                MorphoFeatures morpho = null;
                if(baseFormAttr != null && baseFormAttr.getBaseForm() != null){
                  morpho = new MorphoFeatures(baseFormAttr.getBaseForm());
                  morpho.addPos(posTag); //and add the posTag
                }
                InflectionAttribute inflectionAttr = tokenStream.addAttribute(InflectionAttribute.class);
                inflectionAttr.getInflectionForm();
                inflectionAttr.getInflectionType();
                if(morpho != null){ //if present add the morpho
                  token.addAnnotation(MORPHO_ANNOTATION, Value.value(morpho));
                }
            }
            //we still need to write the last sentence
            Sentence lastSent = null;
            if(offset != null && sentStartOffset >= 0 && offset.endOffset() > sentStartOffset){
                lastSent = at.addSentence(sentStartOffset, offset.endOffset());
            }
            //and set the context off remaining named entities
            while(nerSentIndex < nerList.size()){
                if(lastSent != null){
                    nerList.get(nerSentIndex).context = lastSent.getSpan();
                } else { //no sentence detected
                    nerList.get(nerSentIndex).context = at.getSpan();
                }
                nerSentIndex++;
            }

View Full Code Here

0 1

TOP

Related Classes of org.apache.stanbol.enhancer.nlp.model.Sentence

org.apache.stanbol.enhancer.engines.kuromoji.impl.KuromojiNlpEngine

org.apache.stanbol.enhancer.engines.opennlp.pos.services.OpenNlpPosTaggingEngine

org.apache.stanbol.enhancer.engines.opennlp.sentence.impl.OpenNlpSentenceDetectionEngine

org.apache.stanbol.enhancer.engines.restful.nlp.impl.RestfulNlpAnalysisEngine

org.apache.stanbol.enhancer.engines.sentiment.summarize.SentimentSummarizationEngine

org.apache.stanbol.enhancer.engines.smartcn.impl.SmartcnSentenceEngine

org.apache.stanbol.enhancer.engines.smartcn.impl.SmartcnTokenizerEngine

org.apache.stanbol.enhancer.nlp.json.AnalyzedTextSerializerAndParserTest

org.apache.stanbol.enhancer.nlp.json.valuetype.CorefFeatureSupportTest

org.apache.stanbol.enhancer.nlp.json.valuetype.DependencyRelationSupportTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.