Package org.apache.stanbol.enhancer.engines.entitylinking.impl

Examples of org.apache.stanbol.enhancer.engines.entitylinking.impl.TokenData


    @Override
    public boolean incrementToken() throws IOException {
        if(input.incrementToken()){
            incrementCount++;
            boolean first = true;
            TokenData token;
            boolean lookup = false;
            int lastMatchable = -1;
            int lastIndex = -1;
            if(log.isTraceEnabled()){
              log.trace("> solr:[{},{}] {}",new Object[]{
                              offset.startOffset(), offset.endOffset(), termAtt});
            }
            while((token = nextToken(first)) != null){
              if(log.isTraceEnabled()) {
                  log.trace("  < [{},{}]:{} (link {}, match; {})",new Object[]{
                          token.token.getStart(), token.token.getEnd(),token.getTokenText(),
                          token.isLinkable, token.isMatchable});
              }
                first = false;
                if(token.isLinkable){
                    lookup = true;
                } else if (token.isMatchable){
                    lastMatchable = token.index;
                    lastIndex = lastMatchable;
                } //else if(token.hasAlphaNumeric){
                //    lastIndex = token.index;
                //}
            }
            //lookahead
            if(!lookup && lastIndex >= 0 && sectionData != null){
                List<TokenData> tokens = sectionData.getTokens();
                int maxLookahead = Math.max(lastIndex, lastMatchable+3);
                for(int i = lastIndex+1;!lookup && i < maxLookahead && i < tokens.size(); i++){
                    token = tokens.get(i);
                    if(token.isLinkable){
                        lookup = true;
                    } else if(token.isMatchable && (i+1) == maxLookahead){
                        maxLookahead++; //increase lookahead for matchable tokens
                    }
                }
            }
            this.taggable.setTaggable(lookup);
            if(lookup){
                if(log.isTraceEnabled()){
                    TokenData t = getToken();
                    log.trace("lookup: token [{},{}]: {} | word [{},{}]:{}", new Object[]{
                            offset.startOffset(), offset.endOffset(), termAtt,
                            t.token.getStart(), t.token.getEnd(),
                            t.getTokenText()});
                }
                lookupCount++;
            }
            return true;
        } else {
View Full Code Here


        if(tokensCursor >= tokens.size()-1){
            if(!incrementTokenData()){ //adds a new token to the list
                return null; //EoF
            }
        }
        TokenData cursorToken = tokens.get(tokensCursor+1);
        if(cursorToken.token.getStart() < endOffset){
            tokensCursor++; //set the next token as current
            return cursorToken; //and return it
        } else {
            return null;
View Full Code Here

                if(log.isTraceEnabled()){
                    CharSequence tagSequence = at.getText().subSequence(start, end);
                    log.trace(" > reduce tag {} - no overlapp with linkable token", tagSequence);
                }
            } else { //if the tag overlaps a linkable token
                TokenData linkableToken = linkableTokenContext.linkableToken;
                List<TokenData> tokens = linkableTokenContext.context;
                ChunkData cd = linkableToken.inChunk; //check if it maches > 50% of the chunk
                 if(!lpc.isIgnoreChunks() && cd != null &&
                        cd.isProcessable){
                    int cstart = cd.getMatchableStartChar() >= 0 ? cd.getMatchableStartChar() :
                        start;
                    int cend = cd.getMatchableEndChar();
                    if(cstart < start || cend > end){ //if the tag does not cover the whole chunk
                        int num = 0;
                        int match = 0;
                        for(int i = cd.getMatchableStart(); i <= cd.getMatchableEnd(); i++){
                            TokenData td = tokens.get(i);
                            if(td.isMatchable){
                                num++;
                                if(match < 1 && td.token.getStart() >= start ||
                                        match > 0 && td.token.getEnd() <= end){
                                    match++;
View Full Code Here

TOP

Related Classes of org.apache.stanbol.enhancer.engines.entitylinking.impl.TokenData

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.