Package edu.stanford.nlp.trees

Source Code of edu.stanford.nlp.trees.SemanticHeadFinder

package edu.stanford.nlp.trees;

import edu.stanford.nlp.ling.HasCategory;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.util.ArrayUtils;
import java.util.function.Predicate;
import edu.stanford.nlp.util.Generics;

import java.util.Arrays;
import java.util.List;
import java.util.Set;


/**
* Implements a 'semantic head' variant of the the HeadFinder found
* in Michael Collins' 1999 thesis.
* This version chooses the semantic head verb rather than the verb form
* for cases with verbs.  And it makes similar themed changes to other
* categories: e.g., in question phrases, like "Which Brazilian game", the
* head is made "game" not "Which" as in common PTB head rules.<p/>
* <p/>
* By default the SemanticHeadFinder uses a treatment of copula where the
* complement of the copula is taken as the head.  That is, a sentence like
* "Bill is big" will be analyzed as <p/>
* <p/>
* <code>nsubj</code>(big, Bill) <br/>
* <code>cop</code>(big, is) <p/>
* <p/>
* This analysis is used for questions and declaratives for adjective
* complements and declarative nominal complements.  However Wh-sentences
* with nominal complements do not receive this treatment.
* "Who is the president?" is analyzed with "the president" as nsubj and "who"
* as "attr" of the copula:<p/><p>
* <code>nsubj</code>(is, president)<br/>
* <code>attr</code>(is, Who) <p/>
* <p/>
* (Such nominal copula sentences are complex: arguably, depending on the
* circumstances, several analyses are possible, with either the overt NP able
* to be any of the subject, the predicate, or one of two referential entities
* connected by an equational copula.  These uses aren't differentiated.)
* <p/>
* Existential sentences are treated as follows:  <br/>
* "There is a man" <br/>
* <code>expl</code>(is, There) <br/>
* <code>det</code>(man-4, a-3) <br/>
* <code>nsubj</code>(is-2, man-4)<br/>
*
* @author John Rappaport
* @author Marie-Catherine de Marneffe
* @author Anna Rafferty
*/
public class SemanticHeadFinder extends ModCollinsHeadFinder {

  private static final boolean DEBUG = System.getProperty("SemanticHeadFinder", null) != null;

  /* A few times the apostrophe is missing on "'s", so we have "s" */
  /* Tricky auxiliaries: "na" is from "gonna", "ve" from "Weve", etc.  "of" as non-standard for "have" */
  private static final String[] auxiliaries = {"will", "wo", "shall", "sha", "may", "might", "should", "would", "can", "could", "ca", "must", "has", "have", "had", "having", "get", "gets", "getting", "got", "gotten", "do", "does", "did", "to", "'ve", "ve", "v", "'d", "d", "'ll", "ll", "na", "of", "hav", "hvae", "as" };
  private static final String[] beGetVerbs = {"be", "being", "been", "am", "are", "r", "is", "ai", "was", "were", "'m", "m", "'re", "'s", "s", "art", "ar", "get", "getting", "gets", "got"};
  static final String[] copulaVerbs = {"be", "being", "been", "am", "are", "r", "is", "ai", "was", "were", "'m", "m", "ar", "art", "'re", "'s", "s", "wase"};

  // include Charniak tags so can do BLLIP right
  private static final String[] verbTags = {"TO", "MD", "VB", "VBD", "VBP", "VBZ", "VBG", "VBN", "AUX", "AUXG"};
  // These ones are always auxiliaries, even if the word is "too", "my", or whatever else appears in web text.
  private static final String[] unambiguousAuxTags = {"TO", "MD", "AUX", "AUXG"};


  private final Set<String> verbalAuxiliaries;
  private final Set<String> copulars;
  private final Set<String> passiveAuxiliaries;
  private final Set<String> verbalTags;
  private final Set<String> unambiguousAuxiliaryTags;

  private final boolean makeCopulaHead;


  public SemanticHeadFinder() {
    this(new PennTreebankLanguagePack(), true);
  }

  public SemanticHeadFinder(boolean noCopulaHead) {
    this(new PennTreebankLanguagePack(), noCopulaHead);
  }


  /** Create a SemanticHeadFinder.
   *
   * @param tlp The TreebankLanguagePack, used by the superclass to get basic
   *     category of constituents.
   * @param noCopulaHead If true, a copular verb
   *     (be, seem, appear, stay, remain, resemble, become)
   *     is not treated as head when it has an AdjP or NP complement.  If false,
   *     a copula verb is still always treated as a head.  But it will still
   *     be treated as an auxiliary in periphrastic tenses with a VP complement.
   */
  public SemanticHeadFinder(TreebankLanguagePack tlp, boolean noCopulaHead) {
    super(tlp);
    ruleChanges();

    // make a distinction between auxiliaries and copula verbs to
    // get the NP has semantic head in sentences like "Bill is an honest man".  (Added "sha" for "shan't" May 2009
    verbalAuxiliaries = Generics.newHashSet(Arrays.asList(auxiliaries));

    passiveAuxiliaries = Generics.newHashSet(Arrays.asList(beGetVerbs));

    //copula verbs having an NP complement
    copulars = Generics.newHashSet();
    if (noCopulaHead) {
      copulars.addAll(Arrays.asList(copulaVerbs));
    }

    // TODO: reverse the polarity of noCopulaHead
    this.makeCopulaHead = !noCopulaHead;

    verbalTags = Generics.newHashSet(Arrays.asList(verbTags));
    unambiguousAuxiliaryTags = Generics.newHashSet(Arrays.asList(unambiguousAuxTags));
  }

  @Override
  public boolean makesCopulaHead() {
    return makeCopulaHead;
  }

  //makes modifications of Collins' rules to better fit with semantic notions of heads
  private void ruleChanges() {
    //  NP: don't want a POS to be the head
    // verbs are here so that POS isn't favored in the case of bad parses
    nonTerminalInfo.put("NP", new String[][]{{"rightdis", "NN", "NNP", "NNPS", "NNS", "NX", "NML", "JJR", "WP" }, {"left", "NP", "PRP"}, {"rightdis", "$", "ADJP", "FW"}, {"right", "CD"}, {"rightdis", "JJ", "JJS", "QP", "DT", "WDT", "NML", "PRN", "RB", "RBR", "ADVP"}, {"rightdis", "VP", "VB", "VBZ", "VBD", "VBP"}, {"left", "POS"}});
    nonTerminalInfo.put("NX", nonTerminalInfo.get("NP"));
    nonTerminalInfo.put("NML", nonTerminalInfo.get("NP"));
    // WHNP clauses should have the same sort of head as an NP
    // but it a WHNP has a NP and a WHNP under it, the WHNP should be the head.  E.g.,  (WHNP (WHNP (WP$ whose) (JJ chief) (JJ executive) (NN officer))(, ,) (NP (NNP James) (NNP Gatward))(, ,))
    nonTerminalInfo.put("WHNP", new String[][]{{"rightdis", "NN", "NNP", "NNPS", "NNS", "NX", "NML", "JJR", "WP"}, {"left", "WHNP", "NP"}, {"rightdis", "$", "ADJP", "PRN", "FW"}, {"right", "CD"}, {"rightdis", "JJ", "JJS", "RB", "QP"}, {"left", "WHPP", "WHADJP", "WP$", "WDT"}});
    //WHADJP
    nonTerminalInfo.put("WHADJP", new String[][]{{"left", "ADJP", "JJ", "JJR", "WP"}, {"right", "RB"}, {"right"}});
    //WHADJP
    nonTerminalInfo.put("WHADVP", new String[][]{{"rightdis", "WRB", "WHADVP", "RB", "JJ"}}); // if not WRB or WHADVP, probably has flat NP structure, allow JJ for "how long" constructions
    // QP: we don't want the first CD to be the semantic head (e.g., "three billion": head should be "billion"), so we go from right to left
    nonTerminalInfo.put("QP", new String[][]{{"right", "$", "NNS", "NN", "CD", "JJ", "PDT", "DT", "IN", "RB", "NCD", "QP", "JJR", "JJS"}});

    // S, SBAR and SQ clauses should prefer the main verb as the head
    // S: "He considered him a friend" -> we want a friend to be the head
    nonTerminalInfo.put("S", new String[][]{{"left", "VP", "S", "FRAG", "SBAR", "ADJP", "UCP", "TO"}, {"right", "NP"}});

    nonTerminalInfo.put("SBAR", new String[][]{{"left", "S", "SQ", "SINV", "SBAR", "FRAG", "VP", "WHNP", "WHPP", "WHADVP", "WHADJP", "IN", "DT"}});
    // VP shouldn't be needed in SBAR, but occurs in one buggy tree in PTB3 wsj_1457 and otherwise does no harm

    nonTerminalInfo.put("SQ", new String[][]{{"left", "VP", "SQ", "ADJP", "VB", "VBZ", "VBD", "VBP", "MD", "AUX", "AUXG"}});


    // UCP take the first element as head
    nonTerminalInfo.put("UCP", new String[][]{{"left"}});

    // CONJP: we want different heads for "but also" and "but not" and we don't want "not" to be the head in "not to mention"; now make "mention" head of "not to mention"
    nonTerminalInfo.put("CONJP", new String[][]{{"right", "CC", "VB", "JJ", "RB", "IN" }});

    // FRAG: crap rule needs to be change if you want to parse
    // glosses; but it is correct to have ADJP and ADVP before S
    // because of weird parses of reduced sentences.
    nonTerminalInfo.put("FRAG", new String[][]{{"left", "IN"}, {"right", "RB"}, {"left", "NP"}, {"left", "ADJP", "ADVP", "FRAG", "S", "SBAR", "VP"}});

    // PRN: sentence first
    nonTerminalInfo.put("PRN", new String[][]{{"left", "VP", "SQ", "S", "SINV", "SBAR", "NP", "ADJP", "PP", "ADVP", "INTJ", "WHNP", "NAC", "VBP", "JJ", "NN", "NNP"}});

    // add the constituent XS (special node to add a layer in a QP tree introduced in our QPTreeTransformer)
    nonTerminalInfo.put("XS", new String[][]{{"right", "IN"}});

    // add a rule to deal with the CoNLL data
    nonTerminalInfo.put("EMBED", new String[][]{{"right", "INTJ"}});

  }


  private boolean shouldSkip(Tree t, boolean origWasInterjection) {
    return t.isPreTerminal() && (tlp.isPunctuationTag(t.value()) || ! origWasInterjection && "UH".equals(t.value())) ||
           "INTJ".equals(t.value()) && ! origWasInterjection;
  }

  private int findPreviousHead(int headIdx, Tree[] daughterTrees, boolean origWasInterjection) {
    boolean seenSeparator = false;
    int newHeadIdx = headIdx;
    while (newHeadIdx >= 0) {
      newHeadIdx = newHeadIdx - 1;
      if (newHeadIdx < 0) {
        return newHeadIdx;
      }
      String label = tlp.basicCategory(daughterTrees[newHeadIdx].value());
      if (",".equals(label) || ":".equals(label)) {
        seenSeparator = true;
      } else if (daughterTrees[newHeadIdx].isPreTerminal() && (tlp.isPunctuationTag(label) || ! origWasInterjection && "UH".equals(label)) ||
               "INTJ".equals(label) && ! origWasInterjection) {
        // keep looping
      } else {
        if ( ! seenSeparator) {
          newHeadIdx = -1;
        }
        break;
      }
    }
    return newHeadIdx;
  }

  /**
   * Overwrite the postOperationFix method.  For "a, b and c" or similar: we want "a" to be the head.
   */
  @Override
  protected int postOperationFix(int headIdx, Tree[] daughterTrees) {
    if (headIdx >= 2) {
      String prevLab = tlp.basicCategory(daughterTrees[headIdx - 1].value());
      if (prevLab.equals("CC") || prevLab.equals("CONJP")) {
        boolean origWasInterjection = "UH".equals(tlp.basicCategory(daughterTrees[headIdx].value()));
        int newHeadIdx = headIdx - 2;
        // newHeadIdx is now left of conjunction.  Now try going back over commas, etc. for 3+ conjuncts
        // Don't allow INTJ unless conjoined with INTJ - important in informal genres "Oh and don't forget to call!"
        while (newHeadIdx >= 0 && shouldSkip(daughterTrees[newHeadIdx], origWasInterjection)) {
          newHeadIdx--;
        }
        // We're now at newHeadIdx < 0 or have found a left head
        // Now consider going back some number of punct that includes a , or : tagged thing and then find non-punct
        while (newHeadIdx >= 2) {
          int nextHead = findPreviousHead(newHeadIdx, daughterTrees, origWasInterjection);
          if (nextHead < 0) {
            break;
          }
          newHeadIdx = nextHead;
        }
        if (newHeadIdx >= 0) {
          headIdx = newHeadIdx;
        }
      }
    }
    return headIdx;
  }

  // Note: The first two SBARQ patterns only work when the SQ
  // structure has already been removed in CoordinationTransformer.
  static final TregexPattern[] headOfCopulaTregex = {
    // Matches phrases such as "what is wrong"
    TregexPattern.compile("SBARQ < (WHNP $++ (/^VB/ < " + EnglishPatterns.copularWordRegex + " $++ ADJP=head))"),

    // matches WHNP $+ VB<copula $+ NP
    // for example, "Who am I to judge?"
    // !$++ ADJP matches against "Why is the dog pink?"
    TregexPattern.compile("SBARQ < (WHNP=head $++ (/^VB/ < " + EnglishPatterns.copularWordRegex + " $+ NP !$++ ADJP))"),

    // Actually somewhat limited in scope, this detects "Tuesday it is",
    // "Such a great idea this was", etc
    TregexPattern.compile("SINV < (NP=head $++ (NP $++ (VP < (/^(?:VB|AUX)/ < " + EnglishPatterns.copularWordRegex + "))))"),
  };

  static final TregexPattern[] headOfConjpTregex = {
    TregexPattern.compile("CONJP < (CC <: /^(?i:but|and)$/ $+ (RB=head <: /^(?i:not)$/))"),
    TregexPattern.compile("CONJP < (CC <: /^(?i:but)$/ [ ($+ (RB=head <: /^(?i:also|rather)$/)) | ($+ (ADVP=head <: (RB <: /^(?i:also|rather)$/))) ])"),
    TregexPattern.compile("CONJP < (CC <: /^(?i:and)$/ [ ($+ (RB=head <: /^(?i:yet)$/)) | ($+ (ADVP=head <: (RB <: /^(?i:yet)$/))) ])"),
  };

  static final TregexPattern noVerbOverTempTregex = TregexPattern.compile("/^VP/ < NP-TMP !< /^V/ !< NNP|NN|NNPS|NNS|NP|JJ|ADJP|S");

  /**
   * We use this to avoid making a -TMP or -ADV the head of a copular phrase.
   * For example, in the sentence "It is hands down the best dessert ...",
   * we want to avoid using "hands down" as the head.
   */
  static final Predicate<Tree> REMOVE_TMP_AND_ADV = tree -> {
    if (tree == null)
      return false;
    Label label = tree.label();
    if (label == null)
      return false;
    if (label.value().contains("-TMP") || label.value().contains("-ADV"))
      return false;
    if (label.value().startsWith("VP") && noVerbOverTempTregex.matcher(tree).matches()) {
      return false;
    }
    return true;
  };

  /**
   * Determine which daughter of the current parse tree is the
   * head.  It assumes that the daughters already have had their
   * heads determined.  Uses special rule for VP heads
   *
   * @param t The parse tree to examine the daughters of.
   *          This is assumed to never be a leaf
   * @return The parse tree that is the head
   */
  @Override
  protected Tree determineNonTrivialHead(Tree t, Tree parent) {
    String motherCat = tlp.basicCategory(t.label().value());

    if (DEBUG) {
      System.err.println("At " + motherCat + ", my parent is " + parent);
    }

    // Some conj expressions seem to make more sense with the "not" or
    // other key words as the head.  For example, "and not" means
    // something completely different than "and".  Furthermore,
    // downstream code was written assuming "not" would be the head...
    if (motherCat.equals("CONJP")) {
      for (TregexPattern pattern : headOfConjpTregex) {
        TregexMatcher matcher = pattern.matcher(t);
        if (matcher.matchesAt(t)) {
          return matcher.getNode("head");
        }
      }
      // if none of the above patterns match, use the standard method
    }

    if (motherCat.equals("SBARQ") || motherCat.equals("SINV")) {
      if (!makeCopulaHead) {
        for (TregexPattern pattern : headOfCopulaTregex) {
          TregexMatcher matcher = pattern.matcher(t);
          if (matcher.matchesAt(t)) {
            return matcher.getNode("head");
          }
        }
      }
      // if none of the above patterns match, use the standard method
    }

    Tree[] tmpFilteredChildren = null;

    // do VPs with auxiliary as special case
    if ((motherCat.equals("VP") || motherCat.equals("SQ") || motherCat.equals("SINV"))) {
      Tree[] kids = t.children();
      // try to find if there is an auxiliary verb

      if (DEBUG) {
        System.err.println("Semantic head finder: at VP");
        System.err.println("Class is " + t.getClass().getName());
        t.pennPrint(System.err);
        //System.err.println("hasVerbalAuxiliary = " + hasVerbalAuxiliary(kids, verbalAuxiliaries));
      }

      // looks for auxiliaries
      if (hasVerbalAuxiliary(kids, verbalAuxiliaries, true) || hasPassiveProgressiveAuxiliary(kids)) {
        // String[] how = new String[] {"left", "VP", "ADJP", "NP"};
        // Including NP etc seems okay for copular sentences but is
        // problematic for other auxiliaries, like 'he has an answer'
        // But maybe doing ADJP is fine!
        String[] how = { "left", "VP", "ADJP" };
        if (tmpFilteredChildren == null) {
          tmpFilteredChildren = ArrayUtils.filter(kids, REMOVE_TMP_AND_ADV);
        }
        Tree pti = traverseLocate(tmpFilteredChildren, how, false);
        if (DEBUG) {
          System.err.println("Determined head (case 1) for " + t.value() + " is: " + pti);
        }
        if (pti != null) {
          return pti;
        // } else {
          // System.err.println("------");
          // System.err.println("SemanticHeadFinder failed to reassign head for");
          // t.pennPrint(System.err);
          // System.err.println("------");
        }
      }

      // looks for copular verbs
      if (hasVerbalAuxiliary(kids, copulars, false) && ! isExistential(t, parent) && ! isWHQ(t, parent)) {
        String[] how;
        if (motherCat.equals("SQ")) {
          how = new String[]{"right", "VP", "ADJP", "NP", "WHADJP", "WHNP"};
        } else {
          how = new String[]{"left", "VP", "ADJP", "NP", "WHADJP", "WHNP"};
        }
        // Avoid undesirable heads by filtering them from the list of potential children
        if (tmpFilteredChildren == null) {
          tmpFilteredChildren = ArrayUtils.filter(kids, REMOVE_TMP_AND_ADV);
        }
        Tree pti = traverseLocate(tmpFilteredChildren, how, false);
        // In SQ, only allow an NP to become head if there is another one to the left (then it's probably predicative)
        if (motherCat.equals("SQ") && pti != null && pti.label() != null && pti.label().value().startsWith("NP")) {
            boolean foundAnotherNp = false;
            for (Tree kid : kids) {
              if (kid == pti) {
                break;
              } else if (kid.label() != null && kid.label().value().startsWith("NP")) {
                foundAnotherNp = true;
                break;
              }
            }
          if ( ! foundAnotherNp) {
            pti = null;
          }
        }

        if (DEBUG) {
          System.err.println("Determined head (case 2) for " + t.value() + " is: " + pti);
        }
        if (pti != null) {
          return pti;
        } else {
          if (DEBUG) {
            System.err.println("------");
            System.err.println("SemanticHeadFinder failed to reassign head for");
            t.pennPrint(System.err);
            System.err.println("------");
          }
        }
      }
    }

    Tree hd = super.determineNonTrivialHead(t, parent);

    /* ----
    // This should now be handled at the AbstractCollinsHeadFinder level, so see if we can comment this out
    // Heuristically repair punctuation heads
    Tree[] hdChildren = hd.children();
    if (hdChildren != null && hdChildren.length > 0 &&
        hdChildren[0].isLeaf()) {
      if (tlp.isPunctuationWord(hdChildren[0].label().value())) {
         Tree[] tChildren = t.children();
         if (DEBUG) {
           System.err.printf("head is punct: %s\n", hdChildren[0].label());
         }
         for (int i = tChildren.length - 1; i >= 0; i--) {
           if (!tlp.isPunctuationWord(tChildren[i].children()[0].label().value())) {
             hd = tChildren[i];
             if (DEBUG) {
               System.err.printf("New head of %s is %s%n", hd.label(), hd.children()[0].label());
             }
             break;
           }
         }
      }
    }
    */

    if (DEBUG) {
      System.err.println("Determined head (case 3) for " + t.value() + " is: " + hd);
    }
    return hd;
  }

  /* Checks whether the tree t is an existential constituent
   * There are two cases:
   * -- affirmative sentences in which "there" is a left sister of the VP
   * -- questions in which "there" is a daughter of the SQ.
   *
   */
  private boolean isExistential(Tree t, Tree parent) {
    if (DEBUG) {
      System.err.println("isExistential: " + t + ' ' + parent);
    }
    boolean toReturn = false;
    String motherCat = tlp.basicCategory(t.label().value());
    // affirmative case
    if (motherCat.equals("VP") && parent != null) {
      //take t and the sisters
      Tree[] kids = parent.children();
      // iterate over the sisters before t and checks if existential
      for (Tree kid : kids) {
        if (!kid.value().equals("VP")) {
          List<Label> tags = kid.preTerminalYield();
          for (Label tag : tags) {
            if (tag.value().equals("EX")) {
              toReturn = true;
            }
          }
        } else {
          break;
        }
      }
    }
    // question case
    else if (motherCat.startsWith("SQ") && parent != null) {
      //take the daughters
      Tree[] kids = parent.children();
      // iterate over the daughters and checks if existential
      for (Tree kid : kids) {
        if (!kid.value().startsWith("VB")) {//not necessary to look into the verb
          List<Label> tags = kid.preTerminalYield();
          for (Label tag : tags) {
            if (tag.value().equals("EX")) {
              toReturn = true;
            }
          }
        }
      }
    }

    if (DEBUG) {
      System.err.println("decision " + toReturn);
    }

    return toReturn;
  }


  /* Is the tree t a WH-question?
   *  At present this is only true if the tree t is a SQ having a WH.* sister
   *  and headed by a SBARQ.
   * (It was changed to looser definition in Feb 2006.)
   *
   */
  private static boolean isWHQ(Tree t, Tree parent) {
    if (t == null) {
      return false;
    }
    boolean toReturn = false;
    if (t.value().startsWith("SQ")) {
      if (parent != null && parent.value().equals("SBARQ")) {
        Tree[] kids = parent.children();
        for (Tree kid : kids) {
          // looks for a WH.*
          if (kid.value().startsWith("WH")) {
            toReturn = true;
          }
        }
      }
    }

    if (DEBUG) {
      System.err.println("in isWH, decision: " + toReturn + " for node " + t);
    }

    return toReturn;
  }

  private boolean isVerbalAuxiliary(Tree preterminal, Set<String> verbalSet, boolean allowJustTagMatch) {
    if (preterminal.isPreTerminal()) {
      Label kidLabel = preterminal.label();
      String tag = null;
      if (kidLabel instanceof HasTag) {
        tag = ((HasTag) kidLabel).tag();
      }
      if (tag == null) {
        tag = preterminal.value();
      }
      Label wordLabel = preterminal.firstChild().label();
      String word = null;
      if (wordLabel instanceof HasWord) {
        word = ((HasWord) wordLabel).word();
      }
      if (word == null) {
        word = wordLabel.value();
      }

      if (DEBUG) {
        System.err.println("Checking " + preterminal.value() + " head is " + word + '/' + tag);
      }
      String lcWord = word.toLowerCase();
      if (allowJustTagMatch && unambiguousAuxiliaryTags.contains(tag) || verbalTags.contains(tag) && verbalSet.contains(lcWord)) {
        if (DEBUG) {
          System.err.println("isAuxiliary found desired type of aux");
        }
        return true;
      }
    }
    return false;
  }

  /**
   * Returns true if this tree is a preterminal that is a verbal auxiliary.
   *
   * @param t A tree to examine for being an auxiliary.
   * @return Whether it is a verbal auxiliary (be, do, have, get)
   */
  public boolean isVerbalAuxiliary(Tree t) {
    return isVerbalAuxiliary(t, verbalAuxiliaries, true);
  }


  // now overly complex so it deals with coordinations.  Maybe change this class to use tregrex?
  private boolean hasPassiveProgressiveAuxiliary(Tree[] kids) {
    if (DEBUG) {
      System.err.println("Checking for passive/progressive auxiliary");
    }
    boolean foundPassiveVP = false;
    boolean foundPassiveAux = false;
    for (Tree kid : kids) {
      if (DEBUG) {
        System.err.println("  checking in " + kid);
      }
      if (isVerbalAuxiliary(kid, passiveAuxiliaries, false)) {
          foundPassiveAux = true;
      } else if (kid.isPhrasal()) {
        Label kidLabel = kid.label();
        String cat = null;
        if (kidLabel instanceof HasCategory) {
          cat = ((HasCategory) kidLabel).category();
        }
        if (cat == null) {
          cat = kid.value();
        }
        if ( ! cat.startsWith("VP")) {
          continue;
        }
        if (DEBUG) {
          System.err.println("hasPassiveProgressiveAuxiliary found VP");
        }
        Tree[] kidkids = kid.children();
        boolean foundParticipleInVp = false;
        for (Tree kidkid : kidkids) {
          if (DEBUG) {
            System.err.println("  hasPassiveProgressiveAuxiliary examining " + kidkid);
          }
          if (kidkid.isPreTerminal()) {
            Label kidkidLabel = kidkid.label();
            String tag = null;
            if (kidkidLabel instanceof HasTag) {
              tag = ((HasTag) kidkidLabel).tag();
            }
            if (tag == null) {
              tag = kidkid.value();
            }
            // we allow in VBD because of frequent tagging mistakes
            if ("VBN".equals(tag) || "VBG".equals(tag) || "VBD".equals(tag)) {
              foundPassiveVP = true;
              if (DEBUG) {
                System.err.println("hasPassiveAuxiliary found VBN/VBG/VBD VP");
              }
              break;
            } else if ("CC".equals(tag) && foundParticipleInVp) {
              foundPassiveVP = true;
              if (DEBUG) {
                System.err.println("hasPassiveAuxiliary [coordination] found (VP (VP[VBN/VBG/VBD] CC");
              }
              break;
            }
          } else if (kidkid.isPhrasal()) {
            String catcat = null;
            if (kidLabel instanceof HasCategory) {
              catcat = ((HasCategory) kidLabel).category();
            }
            if (catcat == null) {
              catcat = kid.value();
            }
            if ("VP".equals(catcat)) {
              if (DEBUG) {
                System.err.println("hasPassiveAuxiliary found (VP (VP)), recursing");
              }
              foundParticipleInVp = vpContainsParticiple(kidkid);
            } else if (("CONJP".equals(catcat) || "PRN".equals(catcat)) && foundParticipleInVp) { // occasionally get PRN in CONJ-like structures
              foundPassiveVP = true;
              if (DEBUG) {
                System.err.println("hasPassiveAuxiliary [coordination] found (VP (VP[VBN/VBG/VBD] CONJP");
              }
              break;
            }
          }
        }
      }
      if (foundPassiveAux && foundPassiveVP) {
        break;
      }
    } // end for (Tree kid : kids)
    if (DEBUG) {
      System.err.println("hasPassiveProgressiveAuxiliary returns " + (foundPassiveAux && foundPassiveVP));
    }
    return foundPassiveAux && foundPassiveVP;
  }

  private static boolean vpContainsParticiple(Tree t) {
    for (Tree kid : t.children()) {
      if (DEBUG) {
        System.err.println("vpContainsParticiple examining " + kid);
      }
      if (kid.isPreTerminal()) {
        Label kidLabel = kid.label();
        String tag = null;
        if (kidLabel instanceof HasTag) {
          tag = ((HasTag) kidLabel).tag();
        }
        if (tag == null) {
          tag = kid.value();
        }
        if ("VBN".equals(tag) || "VBG".equals(tag) || "VBD".equals(tag)) {
          if (DEBUG) {
            System.err.println("vpContainsParticiple found VBN/VBG/VBD VP");
          }
          return true;
        }
      }
    }
    return false;
  }


  /** This looks to see whether any of the children is a preterminal headed by a word
   *  which is within the set verbalSet (which in practice is either
   *  auxiliary or copula verbs).  It only returns true if it's a preterminal head, since
   *  you don't want to pick things up in phrasal daughters.  That is an error.
   *
   * @param kids The child trees
   * @param verbalSet The set of words
   * @param allowTagOnlyMatch If true, it's sufficient to match on an unambiguous auxiliary tag.
   *                          Make true iff verbalSet is "all auxiliaries"
   * @return Returns true if one of the child trees is a preterminal verb headed
   *      by a word in verbalSet
   */
  private boolean hasVerbalAuxiliary(Tree[] kids, Set<String> verbalSet, boolean allowTagOnlyMatch) {
    if (DEBUG) {
      System.err.println("Checking for verbal auxiliary");
    }
    for (Tree kid : kids) {
      if (DEBUG) {
        System.err.println("  checking in " + kid);
      }
      if (isVerbalAuxiliary(kid, verbalSet, allowTagOnlyMatch)) {
        return true;
      }
    }
    if (DEBUG) {
      System.err.println("hasVerbalAuxiliary returns false");
    }
    return false;
  }


  private static final long serialVersionUID = 5721799188009249808L;

}
TOP

Related Classes of edu.stanford.nlp.trees.SemanticHeadFinder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.