Package opennlp.tools.parser

Examples of opennlp.tools.parser.Parse


   * @return A set of contextual features about this attachment.
   */
  public String[] getContext(Parse[] constituents, int index, List<Parse> rightFrontier, int rfi) {
    List<String> features = new ArrayList<String>(100);
    int nodeDistance = rfi;
    Parse fn = rightFrontier.get(rfi);
    Parse fp = null;
    if (rfi+1 < rightFrontier.size()) {
      fp = rightFrontier.get(rfi+1);
    }
    Parse p_1 = null;
    if (rightFrontier.size() > 0) {
      p_1 = rightFrontier.get(0);
    }
    Parse p0 = constituents[index];
    Parse p1 = null;
    if (index+1 < constituents.length) {
      p1 = constituents[index+1];
    }

    Collection<Parse> punct1s = null;
    Collection<Parse> punct_1s = null;
    Collection<Parse> punct_1fs = null;
    punct_1fs = fn.getPreviousPunctuationSet();
    punct_1s=p0.getPreviousPunctuationSet();
    punct1s=p0.getNextPunctuationSet();

    String consfp = cons(fp,-3);
    String consf = cons(fn,-2);
    String consp_1 = cons(p_1,-1);
    String consp0 = cons(p0,0);
    String consp1 = cons(p1,1);

    String consbofp = consbo(fp,-3);
    String consbof = consbo(fn,-2);
    String consbop_1 = consbo(p_1,-1);
    String consbop0 = consbo(p0,0);
    String consbop1 = consbo(p1,1);

    Cons cfp = new Cons(consfp,consbofp,-3,true);
    Cons cf = new Cons(consf,consbof,-2,true);
    Cons c_1 = new Cons(consp_1,consbop_1,-1,true);
    Cons c0 = new Cons(consp0,consbop0,0,true);
    Cons c1 = new Cons(consp1,consbop1,1,true);

    //default
    features.add("default");

    //unigrams
    features.add(consfp);
    features.add(consbofp);
    features.add(consf);
    features.add(consbof);
    features.add(consp_1);
    features.add(consbop_1);
    features.add(consp0);
    features.add(consbop0);
    features.add(consp1);
    features.add(consbop1);

    //productions
    String prod = production(fn,false);
    //String punctProd = production(fn,true,punctSet);
    features.add("pn="+prod);
    features.add("pd="+prod+","+p0.getType());
    features.add("ps="+fn.getType()+"->"+fn.getType()+","+p0.getType());
    if (punct_1s != null) {
      StringBuffer punctBuf = new StringBuffer(5);
      for (Iterator<Parse> pi=punct_1s.iterator();pi.hasNext();) {
        Parse punct = pi.next();
        punctBuf.append(punct.getType()).append(",");
      }
      //features.add("ppd="+punctProd+","+punctBuf.toString()+p0.getType());
      //features.add("pps="+fn.getType()+"->"+fn.getType()+","+punctBuf.toString()+p0.getType());
    }

View Full Code Here


  public String[] getContext(Parse parent, Parse[] constituents, int index, boolean trimFrontier) {
    List<String> features = new ArrayList<String>(100);
    //default
    features.add("default");
    Parse[] children = Parser.collapsePunctuation(parent.getChildren(),punctSet);
    Parse pstart = children[0];
    Parse pend = children[children.length-1];
    String type = parent.getType();
    checkcons(pstart, "begin", type, features);
    checkcons(pend, "last", type, features);
    String production = "p="+production(parent,false);
    String punctProduction = "pp="+production(parent,true);
    features.add(production);
    features.add(punctProduction);


    Parse p1 = null;
    Parse p2 = null;
    Parse p_1 = null;
    Parse p_2 = null;
    Collection<Parse> p1s = constituents[index].getNextPunctuationSet();
    Collection<Parse> p2s = null;
    Collection<Parse> p_1s = constituents[index].getPreviousPunctuationSet();
    Collection<Parse> p_2s = null;
    List<Parse> rf;
View Full Code Here

    List<String> features = new ArrayList<String>(100);
    int ps = constituents.length;

    // cons(-2), cons(-1), cons(0), cons(1), cons(2)
    // cons(-2)
    Parse p_2 = null;
    Parse p_1 = null;
    Parse p0 = null;
    Parse p1 = null;
    Parse p2 = null;

    Collection<Parse> punct1s = null;
    Collection<Parse> punct2s = null;
    Collection<Parse> punct_1s = null;
    Collection<Parse> punct_2s = null;

    if (index - 2 >= 0) {
      p_2 = constituents[index - 2];
    }
    if (index - 1 >= 0) {
      p_1 = constituents[index - 1];
      punct_2s = p_1.getPreviousPunctuationSet();
    }
    p0 = constituents[index];
    punct_1s=p0.getPreviousPunctuationSet();
    punct1s=p0.getNextPunctuationSet();

    if (index + 1 < ps) {
      p1 = constituents[index + 1];
      punct2s = p1.getNextPunctuationSet();
    }
    if (index + 2 < ps) {
      p2 = constituents[index + 2];
    }

    boolean u_2 = true;
    boolean u_1 = true;
    boolean u0 = true;
    boolean u1 = true;
    boolean u2 = true;
    boolean b_2_1 = true;
    boolean b_10 = true;
    boolean b01 = true;
    boolean b12 = true;
    boolean t_2_10 = true;
    boolean t_101 = true;
    boolean t012 = true;

    if (dict != null) {

      if (p_2 != null) {
        unigram[0] = p_2.getHead().toString();
        u_2 = dict.contains(new StringList(unigram));
      }

      if (p2 != null) {
        unigram[0] = p2.getHead().toString();
        u2 = dict.contains(new StringList(unigram));
      }

      unigram[0] = p0.getHead().toString();
      u0 = dict.contains(new StringList(unigram));

      if (p_2 != null && p_1 != null) {
        bigram[0] = p_2.getHead().toString();
        bigram[1] = p_1.getHead().toString();
        b_2_1 = dict.contains(new StringList(bigram));

        trigram[0] = p_2.getHead().toString();
        trigram[1] = p_1.getHead().toString();
        trigram[2] = p0.getHead().toString();
        t_2_10 = dict.contains(new StringList(trigram));
      }
      if (p_1 != null && p1 != null) {
        trigram[0] = p_1.getHead().toString();
        trigram[1] = p0.getHead().toString();
        trigram[2] = p1.getHead().toString();
        t_101 = dict.contains(new StringList(trigram));
      }
      if (p_1 != null) {
        unigram[0] = p_1.getHead().toString();
        u_1 = dict.contains(new StringList(unigram));

        //extra check for 2==null case
        b_2_1 = b_2_1 && u_1 & u_2;
        t_2_10 = t_2_10 && u_1 & u_2 & u0;
        t_101 = t_101 && u_1 & u0 && u1;

        bigram[0] = p_1.getHead().toString();
        bigram[1] = p0.getHead().toString();
        b_10 = dict.contains(new StringList(bigram)) && u_1 && u0;
      }
      if (p1 != null && p2 != null) {
        bigram[0] = p1.getHead().toString();
        bigram[1] = p2.getHead().toString();
        b12 = dict.contains(new StringList(bigram));

        trigram[0] = p0.getHead().toString();
        trigram[1] = p1.getHead().toString();
        trigram[2] = p2.getHead().toString();
        t012 = dict.contains(new StringList(trigram));
      }
      if (p1 != null) {
        unigram[0] = p1.getHead().toString();
        u1 = dict.contains(new StringList(unigram));

        //extra check for 2==null case
        b12 = b12 && u1 && u2;
        t012 = t012 && u1 && u2 && u0;
        t_101 = t_101 && u0 && u_1 && u1;

        bigram[0] = p0.getHead().toString();
        bigram[1] = p1.getHead().toString();
        b01 = dict.contains(new StringList(bigram));
        b01 = b01 && u0 && u1;
      }
    }

    String consp_2 = cons(p_2, -2);
    String consp_1 = cons(p_1, -1);
    String consp0 = cons(p0, 0);
    String consp1 = cons(p1, 1);
    String consp2 = cons(p2, 2);

    String consbop_2 = consbo(p_2, -2);
    String consbop_1 = consbo(p_1, -1);
    String consbop0 = consbo(p0, 0);
    String consbop1 = consbo(p1, 1);
    String consbop2 = consbo(p2, 2);

    Cons c_2 = new Cons(consp_2,consbop_2,-2,u_2);
    Cons c_1 = new Cons(consp_1,consbop_1,-1,u_1);
    Cons c0 = new Cons(consp0,consbop0,0,u0);
    Cons c1 = new Cons(consp1,consbop1,1,u1);
    Cons c2 = new Cons(consp2,consbop2,2,u2);

    //default
    features.add("default");
    //first constituent label
    //features.add("fl="+constituents[0].getLabel());

    // features.add("stage=cons(i)");
    // cons(-2), cons(-1), cons(0), cons(1), cons(2)
    if (u0) features.add(consp0);
    features.add(consbop0);

    if (u_2) features.add(consp_2);
    features.add(consbop_2);
    if (u_1) features.add(consp_1);
    features.add(consbop_1);
    if (u1) features.add(consp1);
    features.add(consbop1);
    if (u2) features.add(consp2);
    features.add(consbop2);

    //cons(0),cons(1)
    cons2(features,c0,c1,punct1s,b01);
    //cons(-1),cons(0)
    cons2(features,c_1,c0,punct_1s,b_10);
    //features.add("stage=cons(0),cons(1),cons(2)");
    cons3(features,c0,c1,c2,punct1s,punct2s,t012,b01,b12);
    cons3(features,c_2,c_1,c0,punct_2s,punct_1s,t_2_10,b_2_1,b_10);
    cons3(features,c_1,c0,c1,punct_1s,punct1s,t_101,b_10,b01);
    //features.add("stage=other");
    String p0Tag = p0.getType();
    if (p0Tag.equals("-RRB-")) {
      for (int pi = index - 1; pi >= 0; pi--) {
        Parse p = constituents[pi];
        if (p.getType().equals("-LRB-")) {
          features.add("bracketsmatch");
          break;
        }
        if (p.getLabel().startsWith(Parser.START)) {
          break;
        }
      }
    }
    if (p0Tag.equals("-RCB-")) {
      for (int pi = index - 1; pi >= 0; pi--) {
        Parse p = constituents[pi];
        if (p.getType().equals("-LCB-")) {
          features.add("bracketsmatch");
          break;
        }
        if (p.getLabel().startsWith(Parser.START)) {
          break;
        }
      }
    }
    if (p0Tag.equals("''")) {
      for (int pi = index - 1; pi >= 0; pi--) {
        Parse p = constituents[pi];
        if (p.getType().equals("``")) {
          features.add("quotesmatch");
          break;
        }
        if (p.getLabel().startsWith(Parser.START)) {
          break;
        }
      }
    }
    if (p0Tag.equals("'")) {
      for (int pi = index - 1; pi >= 0; pi--) {
        Parse p = constituents[pi];
        if (p.getType().equals("`")) {
          features.add("quotesmatch");
          break;
        }
        if (p.getLabel().startsWith(Parser.START)) {
          break;
        }
      }
    }
    if (p0Tag.equals(",")) {
      for (int pi = index - 1; pi >= 0; pi--) {
        Parse p = constituents[pi];
        if (p.getType().equals(",")) {
          features.add("iscomma");
          break;
        }
        if (p.getLabel().startsWith(Parser.START)) {
          break;
        }
      }
    }
    if (p0Tag.equals(".") && index == ps - 1) {
      for (int pi = index - 1; pi >= 0; pi--) {
        Parse p = constituents[pi];
        if (p.getLabel().startsWith(Parser.START)) {
          if (pi == 0) {
            features.add("endofsentence");
          }
          break;
        }
View Full Code Here

    }
   
    ParseConverter converter = new ParseConverter(sentence,(Span[])
        tokenSpans.toArray(new Span[tokenSpans.size()]));
   
   Parse parse = mParser.parse(converter.getParseForTagger());
 
   parse = converter.transformParseFromTagger(parse);
  
   if (mLogger.isLoggable(Level.INFO)) {
     StringBuffer parseString = new StringBuffer();
     parse.show(parseString);
    
     mLogger.log(Level.INFO, parseString.toString());
   }
  
   createAnnotation(cas, sentenceAnnotation.getBegin(), parse);
View Full Code Here

   createAnnotation(cas, sentenceAnnotation.getBegin(), parse);
  }
 
  protected void createAnnotation(CAS cas, int offset, Parse parse) {
   
    Parse parseChildrens[] = parse.getChildren();
   
    // do this for all children
    for (int i = 0; i < parseChildrens.length; i++) {
      Parse child = parseChildrens[i];
      createAnnotation(cas, offset, child);
    }
   
    AnnotationFS parseAnnotation = cas.createAnnotation(mParseType, offset +
        parse.getSpan().getStart(), offset + parse.getSpan().getEnd());
View Full Code Here

      // remove last space
      sentenceStringBuilder.setLength(sentenceStringBuilder.length() - 1);
     
      String tokenizedSentence = sentenceStringBuilder.toString();
     
      mParseForTagger = new Parse(tokenizedSentence,
          new Span(0, tokenizedSentence.length()), "INC", 1, null);
     
      int start = 0;
     
      for (int i = 0; i < tokenList.length; i++) {

        mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start,
            start + tokenList[i].length()),
            opennlp.tools.parser.chunking.Parser.TOK_NODE, 0f, 0));

        start += tokenList[i].length() + 1;
      }
View Full Code Here

    Parse transformParseFromTagger(Parse parseFromTagger) {
      int start = parseFromTagger.getSpan().getStart();
      int end = parseFromTagger.getSpan().getEnd();
     
     
      Parse transformedParse = new Parse(mSentence,
          new Span(((Integer) mIndexMap.get(new Integer(start))).intValue(),
          ((Integer) mIndexMap.get(new Integer(end))).intValue()),
          parseFromTagger.getType(),
          parseFromTagger.getProb(), parseFromTagger.getHeadIndex());
     
     
      Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren();
     
      // call this method for all childs ...
      for (int i = 0; i < parseFromTaggerChildrens.length; i++) {
       
        Parse child = parseFromTaggerChildrens[i];
       
        if (!child.getType().equals(
            opennlp.tools.parser.chunking.Parser.TOK_NODE)) {
       
          // only insert if it has childs
          if (child.getChildCount() > 0 &&
              !child.getChildren()[0].getType().equals(opennlp.tools.parser.chunking.Parser.TOK_NODE)) {
            transformedParse.insert(transformParseFromTagger(child));
          }
        }
      }
     
View Full Code Here

  public List<opennlp.tools.coref.mention.Parse> getNamedEntities() {
    List<Parse> names = new ArrayList<Parse>();
    List<Parse> kids = new LinkedList<Parse>(Arrays.asList(parse.getChildren()));
    while (kids.size() > 0) {
      Parse p = kids.remove(0);
      if (entitySet.contains(p.getType())) {
        names.add(p);
      }
      else {
        kids.addAll(Arrays.asList(p.getChildren()));
      }
    }
    return createParses(names.toArray(new Parse[names.size()]));
  }
View Full Code Here

  }

  public List<opennlp.tools.coref.mention.Parse> getSyntacticChildren() {
    List<Parse> kids = new ArrayList<Parse>(Arrays.asList(parse.getChildren()));
    for (int ci = 0; ci < kids.size(); ci++) {
      Parse kid = kids.get(ci);
      if (entitySet.contains(kid.getType())) {
        kids.remove(ci);
        kids.addAll(ci, Arrays.asList(kid.getChildren()));
        ci--;
      }
    }
    return createParses(kids.toArray(new Parse[kids.size()]));
  }
View Full Code Here

  public List<opennlp.tools.coref.mention.Parse> getTokens() {
    List<Parse> tokens = new ArrayList<Parse>();
    List<Parse> kids = new LinkedList<Parse>(Arrays.asList(parse.getChildren()));
    while (kids.size() > 0) {
      Parse p = kids.remove(0);
      if (p.isPosTag()) {
        tokens.add(p);
      }
      else {
        kids.addAll(0,Arrays.asList(p.getChildren()));
      }
    }
    return createParses(tokens.toArray(new Parse[tokens.size()]));
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.parser.Parse

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.