Package opennlp.tools.parser

Examples of opennlp.tools.parser.Parse


  public String[] getContext(Parse parent, Parse[] constituents, int index, boolean trimFrontier) {
    List<String> features = new ArrayList<String>(100);
    //default
    features.add("default");
    Parse[] children = Parser.collapsePunctuation(parent.getChildren(),punctSet);
    Parse pstart = children[0];
    Parse pend = children[children.length-1];
    String type = parent.getType();
    checkcons(pstart, "begin", type, features);
    checkcons(pend, "last", type, features);
    String production = "p="+production(parent,false);
    String punctProduction = "pp="+production(parent,true);
    features.add(production);
    features.add(punctProduction);


    Parse p1 = null;
    Parse p2 = null;
    Parse p_1 = null;
    Parse p_2 = null;
    Collection<Parse> p1s = constituents[index].getNextPunctuationSet();
    Collection<Parse> p2s = null;
    Collection<Parse> p_1s = constituents[index].getPreviousPunctuationSet();
    Collection<Parse> p_2s = null;
    List<Parse> rf;
View Full Code Here


   * @param constituents The constituents of the parse so far.
   * @param index The index of the constituent where a build decision is being made.
   * @return the contexts/features for the decision to build a new constituent.
   */
  public String[] getContext(Parse[] constituents, int index) {
    Parse p_2 = null;
    Parse p_1 = null;
    Parse p0 = null;
    Parse p1 = null;
    Parse p2 = null;
    int ps = constituents.length;

    p0 = constituents[index];
    if (index + 1 < ps) {
      p1 = constituents[index + 1];
View Full Code Here

    if (min != null) {
     
      int startOffset = p.toString().indexOf(min);
      int endOffset = startOffset + min.length();
     
      Parse tokens[] = p.getTagNodes();
     
      int beginToken = -1;
      int endToken = -1;
     
      for (int i = 0; i < tokens.length; i++) {
View Full Code Here

 
  public static boolean addMention(int id, Span mention, Parse[] tokens) {

  boolean failed = false;
   
    Parse startToken = tokens[mention.getStart()];
    Parse endToken = tokens[mention.getEnd() - 1];
    Parse commonParent = startToken.getCommonParent(endToken);
   
    if (commonParent != null) {
//      Span mentionSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd());
     
      if (entitySet.contains(commonParent.getType())) {
        commonParent.getParent().setType("NP#" + id);           
      }
      else if (commonParent.getType().equals("NML")) {
        commonParent.setType("NML#" + id);
      }
      else if (commonParent.getType().equals("NP")) {
        commonParent.setType("NP#" + id);
      }
      else {
        System.out.println("Inserting mention failed: " + commonParent.getType() + " Failed id: " + id);
        failed = true;
      }
    }
    else {
      throw new IllegalArgumentException("Tokens must always have a common parent!");
View Full Code Here

   * @param node The node whose parents are to be returned.
   * @return a set of parent nodes.
   */
  private Map<Parse, Integer> getNonAdjoinedParent(Parse node) {
    Map<Parse, Integer> parents = new HashMap<Parse, Integer>();
    Parse parent = node.getParent();
    int index = indexOf(node,parent);
    parents.put(parent, index);
    while(parent.getType().equals(node.getType())) {
      node = parent;
      parent = parent.getParent();
      index = indexOf(node,parent);
      parents.put(parent, index);
    }
    return parents;
  }
View Full Code Here

      List<CorefMention[]> allMentions = sample.getMentions();
      List<Parse> allParses = sample.getParses();
     
      for (int si = 0; si < allMentions.size(); si++) {
        CorefMention mentions[] = allMentions.get(si);
        Parse p = allParses.get(si);
       
        for (Mention extent : mentionFinder.getMentions(new DefaultParse(p, si))) {
          if (extent.getParse() == null) {
            // not sure how to get head index
            Parse snp = new Parse(p.getText(),extent.getSpan(),"NML",1.0,0);
            p.insert(snp);
          }
        }
       
        Parse tokens[] = p.getTagNodes();
       
        for (CorefMention mention : mentions) {
          Span min = getMinSpan(p, mention);
         
          if (min == null) {
View Full Code Here

  @Override
  protected boolean lastChild(Parse child, Parse parent) {
    boolean lc = super.lastChild(child, parent);
    while(!lc) {
      Parse cp = child.getParent();
      if (cp != parent && cp.getType().equals(child.getType())) {
        lc = super.lastChild(cp,parent);
        child = cp;
      }
      else {
        break;
View Full Code Here

      currentChunks[ci].setLabel(Parser.COMPLETE);
      chunks[ci].setLabel(Parser.COMPLETE);
    }
    for (int ci=0;ci<chunks.length;ci++) {
      //System.err.println("parserEventStream.addParseEvents: chunks="+Arrays.asList(chunks));
      Parse parent = chunks[ci].getParent();
      Parse prevParent = chunks[ci];
      int off = 0;
      //build un-built parents
      if (!chunks[ci].isPosTag()) {
        builtNodes.add(off++,chunks[ci]);
      }
      //perform build stages
      while (!parent.getType().equals(AbstractBottomUpParser.TOP_NODE) && parent.getLabel() == null) {
        if (parent.getLabel() == null && !prevParent.getType().equals(parent.getType())) {
          //build level
          if (debug) System.err.println("Build: "+parent.getType()+" for: "+currentChunks[ci]);
          if (etype == ParserEventTypeEnum.BUILD) {
            parseEvents.add(new Event(parent.getType(), buildContextGenerator.getContext(currentChunks, ci)));
          }
          builtNodes.add(off++,parent);
          Parse newParent = new Parse(currentChunks[ci].getText(),currentChunks[ci].getSpan(),parent.getType(),1,0);
          newParent.add(currentChunks[ci],rules);
          newParent.setPrevPunctuation(currentChunks[ci].getPreviousPunctuationSet());
          newParent.setNextPunctuation(currentChunks[ci].getNextPunctuationSet());
          currentChunks[ci].setParent(newParent);
          currentChunks[ci] = newParent;
          newParent.setLabel(Parser.BUILT);
          //see if chunk is complete
          if (lastChild(chunks[ci], parent)) {
            if (etype == ParserEventTypeEnum.CHECK) {
              parseEvents.add(new Event(Parser.COMPLETE, checkContextGenerator.getContext(currentChunks[ci],currentChunks, ci,false)));
            }
            currentChunks[ci].setLabel(Parser.COMPLETE);
            parent.setLabel(Parser.COMPLETE);
          }
          else {
            if (etype == ParserEventTypeEnum.CHECK) {
              parseEvents.add(new Event(Parser.INCOMPLETE, checkContextGenerator.getContext(currentChunks[ci],currentChunks,ci,false)));
            }
            currentChunks[ci].setLabel(Parser.INCOMPLETE);
            parent.setLabel(Parser.COMPLETE);
          }

          chunks[ci] = parent;
          //System.err.println("build: "+newParent+" for "+parent);
        }
        //TODO: Consider whether we need to set this label or train parses at all.
        parent.setLabel(Parser.BUILT);
        prevParent = parent;
        parent = parent.getParent();
      }
      //decide to attach
      if (etype == ParserEventTypeEnum.BUILD) {
        parseEvents.add(new Event(Parser.DONE, buildContextGenerator.getContext(currentChunks, ci)));
      }
      //attach node
      String attachType = null;
      /** Node selected for attachment. */
      Parse attachNode = null;
      int attachNodeIndex = -1;
      if (ci == 0){
        Parse top = new Parse(currentChunks[ci].getText(),new Span(0,currentChunks[ci].getText().length()),AbstractBottomUpParser.TOP_NODE,1,0);
        top.insert(currentChunks[ci]);
      }
      else {
        /** Right frontier consisting of partially-built nodes based on current state of the parse.*/
        List<Parse> currentRightFrontier = Parser.getRightFrontier(currentChunks[0],punctSet);
        if (currentRightFrontier.size() != rightFrontier.size()) {
          System.err.println("fontiers mis-aligned: "+currentRightFrontier.size()+" != "+rightFrontier.size()+" "+currentRightFrontier+" "+rightFrontier);
          System.exit(1);
        }
        Map<Parse, Integer> parents = getNonAdjoinedParent(chunks[ci]);
        //try daughters first.
        for (int cfi=0;cfi<currentRightFrontier.size();cfi++) {
          Parse frontierNode = rightFrontier.get(cfi);
          Parse cfn = currentRightFrontier.get(cfi);
          if (!Parser.checkComplete || !Parser.COMPLETE.equals(cfn.getLabel())) {
            Integer i = parents.get(frontierNode);
            if (debug) System.err.println("Looking at attachment site ("+cfi+"): "+cfn.getType()+" ci="+i+" cs="+nonPunctChildCount(cfn)+", "+cfn+" :for "+currentChunks[ci].getType()+" "+currentChunks[ci]+" -> "+parents);
            if (attachNode == null &&  i != null && i == nonPunctChildCount(cfn)) {
              attachType = Parser.ATTACH_DAUGHTER;
              attachNodeIndex = cfi;
              attachNode = cfn;
              if (etype == ParserEventTypeEnum.ATTACH) {
                parseEvents.add(new Event(attachType, attachContextGenerator.getContext(currentChunks, ci, currentRightFrontier, attachNodeIndex)));
              }
              //System.err.println("daughter attach "+attachNode+" at "+fi);
            }
          }
          else {
            if (debug) System.err.println("Skipping ("+cfi+"): "+cfn.getType()+","+cfn.getPreviousPunctuationSet()+" "+cfn+" :for "+currentChunks[ci].getType()+" "+currentChunks[ci]+" -> "+parents);
          }
          // Can't attach past first incomplete node.
          if (Parser.checkComplete && cfn.getLabel().equals(Parser.INCOMPLETE)) {
            if (debug) System.err.println("breaking on incomplete:"+cfn.getType()+" "+cfn);
            break;
          }
        }
        //try sisters, and generate non-attach events.
        for (int cfi=0;cfi<currentRightFrontier.size();cfi++) {
          Parse frontierNode = rightFrontier.get(cfi);
          Parse cfn = currentRightFrontier.get(cfi);
          if (attachNode == null && parents.containsKey(frontierNode.getParent())
              && frontierNode.getType().equals(frontierNode.getParent().getType())
              ){ //&& frontierNode.getParent().getLabel() == null) {
            attachType = Parser.ATTACH_SISTER;
            attachNode = cfn;
            attachNodeIndex = cfi;
            if (etype == ParserEventTypeEnum.ATTACH) {
              parseEvents.add(new Event(Parser.ATTACH_SISTER, attachContextGenerator.getContext(currentChunks, ci, currentRightFrontier, cfi)));
            }
            chunks[ci].getParent().setLabel(Parser.BUILT);
            //System.err.println("in search sister attach "+attachNode+" at "+cfi);
          }
          else if (cfi == attachNodeIndex) {
            //skip over previously attached daughter.
          }
          else {
            if (etype == ParserEventTypeEnum.ATTACH) {
              parseEvents.add(new Event(Parser.NON_ATTACH, attachContextGenerator.getContext(currentChunks, ci, currentRightFrontier, cfi)));
            }
          }
          //Can't attach past first incomplete node.
          if (Parser.checkComplete && cfn.getLabel().equals(Parser.INCOMPLETE)) {
            if (debug) System.err.println("breaking on incomplete:"+cfn.getType()+" "+cfn);
            break;
          }
        }
        //attach Node
        if (attachNode != null) {
          if (attachType == Parser.ATTACH_DAUGHTER) {
            Parse daughter = currentChunks[ci];
            if (debug) System.err.println("daughter attach a="+attachNode.getType()+":"+attachNode+" d="+daughter+" com="+lastChild(chunks[ci], rightFrontier.get(attachNodeIndex)));
            attachNode.add(daughter,rules);
            daughter.setParent(attachNode);
            if (lastChild(chunks[ci], rightFrontier.get(attachNodeIndex))) {
              if (etype == ParserEventTypeEnum.CHECK) {
                parseEvents.add(new Event(Parser.COMPLETE, checkContextGenerator.getContext(attachNode,currentChunks,ci,true)));
              }
              attachNode.setLabel(Parser.COMPLETE);
            }
            else {
              if (etype == ParserEventTypeEnum.CHECK) {
                parseEvents.add(new Event(Parser.INCOMPLETE, checkContextGenerator.getContext(attachNode,currentChunks,ci,true)));
              }
            }
          }
          else if (attachType == Parser.ATTACH_SISTER) {
            Parse frontierNode = rightFrontier.get(attachNodeIndex);
            rightFrontier.set(attachNodeIndex,frontierNode.getParent());
            Parse sister = currentChunks[ci];
            if (debug) System.err.println("sister attach a="+attachNode.getType()+":"+attachNode+" s="+sister+" ap="+attachNode.getParent()+" com="+lastChild(chunks[ci], rightFrontier.get(attachNodeIndex)));
            Parse newParent = attachNode.getParent().adjoin(sister,rules);

            newParent.setParent(attachNode.getParent());
            attachNode.setParent(newParent);
            sister.setParent(newParent);
            if (attachNode == currentChunks[0]) {
              currentChunks[0]= newParent;
            }
            if (lastChild(chunks[ci], rightFrontier.get(attachNodeIndex))) {
              if (etype == ParserEventTypeEnum.CHECK) {
                parseEvents.add(new Event(Parser.COMPLETE, checkContextGenerator.getContext(newParent,currentChunks,ci,true)));
              }
              newParent.setLabel(Parser.COMPLETE);
            }
            else {
              if (etype == ParserEventTypeEnum.CHECK) {
                parseEvents.add(new Event(Parser.INCOMPLETE, checkContextGenerator.getContext(newParent,currentChunks,ci,true)));
              }
              newParent.setLabel(Parser.INCOMPLETE);
            }

          }
          //update right frontier
          for (int ni=0;ni<attachNodeIndex;ni++) {
View Full Code Here

        sentenceNumber=0;
        document.clear();
        parses.clear();
      }
      else {
        Parse p = Parse.parseParse(line);
        parses.add(p);
        Mention[] extents = treebankLinker.getMentionFinder().getMentions(new DefaultParse(p,sentenceNumber));
        //construct new parses for mentions which don't have constituents.
        for (int ei=0,en=extents.length;ei<en;ei++) {
          //System.err.println("PennTreebankLiner.main: "+ei+" "+extents[ei]);

          if (extents[ei].getParse() == null) {
            //not sure how to get head index, but its not used at this point.
            Parse snp = new Parse(p.getText(),extents[ei].getSpan(),"NML",1.0,0);
            p.insert(snp);
            extents[ei].setParse(new DefaultParse(snp,sentenceNumber));
          }

        }
View Full Code Here

    }
   
    ParseConverter converter = new ParseConverter(sentence,(Span[])
        tokenSpans.toArray(new Span[tokenSpans.size()]));
  
    Parse unparsedTree = converter.getParseForTagger();
  
    if (unparsedTree.getChildCount() > 0) {
     
      Parse parse = mParser.parse(unparsedTree);
 
      // TODO: We need a strategy to handle the case that a full
      //       parse could not be found. What to do in this case?
     
      parse = converter.transformParseFromTagger(parse);
  
      if (mLogger.isLoggable(Level.INFO)) {
        StringBuffer parseString = new StringBuffer();
        parse.show(parseString);
    
        mLogger.log(Level.INFO, parseString.toString());
      }
  
      createAnnotation(cas, sentenceAnnotation.getBegin(), parse);
View Full Code Here

TOP

Related Classes of opennlp.tools.parser.Parse

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.