Package opennlp.tools.formats.ad.ADSentenceStream.SentenceParser

Examples of opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Node


     */
    public Sentence parse(String sentenceString, int para, boolean isTitle, boolean isBox) {
      BufferedReader reader = new BufferedReader(new StringReader(
          sentenceString));
      Sentence sentence = new Sentence();
      Node root = new Node();
      try {
        // first line is <s ...>
        String line = reader.readLine();

        boolean useSameTextAndMeta = false; // to handle cases where there are diff sug of parse (&&)

          // should find the source source
          while (!line.startsWith("SOURCE")) {
            if(line.equals("&&")) {
              // same sentence again!
              useSameTextAndMeta = true;
              break;
            }
            line = reader.readLine();
            if (line == null) {
              return null;
            }
          }
        if(!useSameTextAndMeta) {
            // got source, get the metadata
          String metaFromSource = line.substring(7);
          line = reader.readLine();
          // we should have the plain sentence
          // we remove the first token
          int start = line.indexOf(" ");
          text = line.substring(start + 1).trim();
          text = fixPunctuation(text);
          String titleTag = "";
          if(isTitle) titleTag = " title";
          String boxTag = "";
          if(isBox) boxTag = " box";
          if(start > 0) {
            meta = line.substring(0, start) + " p=" + para + titleTag + boxTag + metaFromSource;
          } else {
            // rare case were there is no space between id and the sentence.
              // will use previous meta for now
          }
        }
        sentence.setText(text);
        sentence.setMetadata(meta);
        // now we look for the root node

        // skip lines starting with ###
        line = reader.readLine();
        while(line != null && line.startsWith("###")) {
          line = reader.readLine();
        }

        // got the root. Add it to the stack
        Stack<Node> nodeStack = new Stack<Node>();

        root.setSyntacticTag("ROOT");
        root.setLevel(0);
        nodeStack.add(root);


        /* now we have to take care of the lastLevel. Every time it raises, we will add the
        leaf to the node at the top. If it decreases, we remove the top. */

        while (line != null && line.length() != 0 && line.startsWith("</s>") == false && !line.equals("&&")) {
          TreeElement element = this.getElement(line);

          if(element != null) {
            // The idea here is to keep a stack of nodes that are candidates for
            // parenting the following elements (nodes and leafs).

            // 1) When we get a new element, we check its level and remove from
            // the top of the stack nodes that are brothers or nephews.
            while (!nodeStack.isEmpty() && element.getLevel() > 0
                && element.getLevel() <= nodeStack.peek().getLevel()) {
              Node nephew = nodeStack.pop();
            }

            if( element.isLeaf() ) {
              // 2a) If the element is a leaf and there is no parent candidate,
              // add it as a daughter of the root.
              if (nodeStack.isEmpty()) {
                root.addElement(element);
              } else {
                // 2b) There are parent candidates.
                // look for the node with the correct level
                Node peek = nodeStack.peek();
                if (element.level == 0) { // add to the root
                  nodeStack.firstElement().addElement(element);
                } else {
                  Node parent = null;
                  int index = nodeStack.size() - 1;
                  while (parent == null) {
                    if (peek.getLevel() < element.getLevel()) {
                      parent = peek;
                    } else {
                      index--;
                      if (index > -1) {
                        peek = nodeStack.get(index);
                      } else {
                        parent = nodeStack.firstElement();
                      }
                    }
                  }
                  parent.addElement(element);
                }
              }
            } else {
              // 3) Check if the element that is at the top of the stack is this
              // node parent, if yes add it as a son
View Full Code Here


  }

  public POSSample read() throws IOException {
    Sentence paragraph;
    while ((paragraph = this.adSentenceStream.read()) != null) {
      Node root = paragraph.getRoot();
      List<String> sentence = new ArrayList<String>();
      List<String> tags = new ArrayList<String>();
      process(root, sentence, tags);

      return new POSSample(sentence, tags);
View Full Code Here

      // try node
      Matcher nodeMatcher = nodePattern.matcher(line);
      if (nodeMatcher.matches()) {
        int level = nodeMatcher.group(1).length() + 1;
        String syntacticTag = nodeMatcher.group(2);
        Node node = new Node();
        node.setLevel(level);
        node.setSyntacticTag(syntacticTag);
        return node;
      }

      Matcher leafMatcher = leafPattern.matcher(line);
      if (leafMatcher.matches()) {
View Full Code Here

      if(currentTextID != textID) {
        clearData = true;
        textID = currentTextID;
      }

      Node root = paragraph.getRoot();
      List<String> sentence = new ArrayList<String>();
      List<Span> names = new ArrayList<Span>();
      process(root, sentence, names);

      return new NameSample(sentence.toArray(new String[sentence.size()]),
View Full Code Here

TOP

Related Classes of opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Node

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.