Package org.apache.nutch.searcher.Query

Examples of org.apache.nutch.searcher.Query.Clause


  /** Run all defined filters. */
  public BooleanQuery filter(Query input) throws QueryException {
    // first check that all field names are claimed by some plugin
    Clause[] clauses = input.getClauses();
    for (int i = 0; i < clauses.length; i++) {
      Clause c = clauses[i];
      if (!isField(c.getField()))
        throw new QueryException("Not a known field name:"+c.getField());
    }

    // then run each plugin
    BooleanQuery output = new BooleanQuery();
    for (int i = 0; i < this.queryFilters.length; i++) {
View Full Code Here


    throws QueryException {

    // examine each clause in the Nutch query
    Clause[] clauses = input.getClauses();
    for (int i = 0; i < clauses.length; i++) {
      Clause c = clauses[i];

      // skip non-matching clauses
      String fieldName = c.getField();
      if (!fieldNames.contains(fieldName)) {
        continue;
      }
      String value = c.getTerm().toString().toLowerCase();

      // add a Lucene TermQuery for this clause
      TermQuery clause = new TermQuery(new Term(fieldName, value));
      // set boost
      if (boosts.containsKey(fieldName)) {
        clause.setBoost(boosts.get(fieldName));
      }

      // add it as specified in query
      output.add(clause, (c.isProhibited() ? BooleanClause.Occur.MUST_NOT
        : (c.isRequired() ? BooleanClause.Occur.MUST
          : BooleanClause.Occur.SHOULD)));
    }

    // return the modified Lucene query
    return output;
View Full Code Here

     * @param output
     */
    private void buildQuery(Query input, BooleanQuery output, Configuration conf) {
        Clause[] clauses = input.getClauses();
        for (int i = 0; i < clauses.length; i++) {
          Clause c = clauses[i];

          if (!c.getField().equals(Clause.DEFAULT_FIELD))
            continue;                                 // skip non-default fields
                 
          String[] sterms = null;
          if (c.isPhrase()) {
            //sterms = new CommonGrams(conf).optimizePhrase(c.getPhrase(), FIELDS[f]);         
            sterms = new String[c.getPhrase().getTerms().length];
            for (int j=0; j<sterms.length; j++) {
              sterms[j]=c.getPhrase().getTerms()[j].toString();
            }
          }
          else {
            sterms = new String[1];
            sterms[0]=c.getTerm().toString();
          }

          for (int j=0; j<sterms.length; j++) {   
            BooleanQuery out = new BooleanQuery();
            for (int f=0; f<FIELDS.length; f++) {                                     
              out.add(new TermQuery(new Term(FIELDS[f], sterms[j])), BooleanClause.Occur.SHOULD);
            }
            output.add(out, (c.isProhibited()
                      ? BooleanClause.Occur.MUST_NOT
                      : (c.isRequired()
                          ? BooleanClause.Occur.MUST
                          : BooleanClause.Occur.SHOULD
                        )));
          }         
        }       
View Full Code Here

    // Examine each clause in the Nutch query
    Clause [] clauses = input.getClauses();
   
    for (int i = 0; i < clauses.length; i++)
    {
      Clause c = clauses[i];

      // Skip if not date clauses
      if (!c.getField().equals(FIELD_NAME))
      {
        continue;
      }

      String dateTerm = c.getTerm().toString();
      Matcher matcher = pattern.matcher(dateTerm);
     
      if (matcher == null || !matcher.matches())
      {
        String message = "Wrong query syntax " + FIELD_NAME
          + ":" + dateTerm + ". Must be standalone 14 digit " +
          " IA format date or a range with a hyphen between.";
        LOGGER.error(message);
       
        throw new QueryException(message);
      }

      // So, date is in one of 2 possible formats.  First is standalone
      // 14 character IA date.
      String d = matcher.group(1);
     
      if (d != null)
      {
        LOGGER.debug("Found single date: " + d);

        // This is not a range query. Take the passed date and convert
        // it to seconds-since-epoch.
        BooleanQuery bq = new BooleanQuery();
       
        bq.add(new TermQuery(getTerm(getSeconds(pad(d)))),
          BooleanClause.Occur.SHOULD);
       
        output.add(bq,
          (c.isRequired() == true && c.isProhibited() == false)?
          BooleanClause.Occur.MUST:
          (c.isRequired() == false && c.isProhibited() == false)?
          BooleanClause.Occur.SHOULD:
          BooleanClause.Occur.MUST_NOT);

        continue;
      }
View Full Code Here

   BooleanClause.Occur occurinner=null;
   int nClauses=0;
   // examine each clause in the Nutch query
   Clause[] clauses = input.getClauses();
   for (int i = 0; i < clauses.length; i++) {
     Clause c = clauses[i];

     // skip non-matching clauses
     if (!c.getField().equals(field))
       continue;

     // get the field value from the clause
     // raw fields are guaranteed to be Terms, not Phrases
     String value = c.getTerm().toString();
     if (lowerCase)
       value = value.toLowerCase();

     // add a Lucene TermQuery for this clause
     TermQuery clause = new TermQuery(new Term(FINAL_KEY, value));   
     // add it as specified in query       
     outinner.add(clause, BooleanClause.Occur.SHOULD);   
     if (nClauses==0) {
       occurinner=c.isProhibited()
              ? BooleanClause.Occur.MUST_NOT
                    : (c.isRequired()
                        ? BooleanClause.Occur.MUST
                        : BooleanClause.Occur.SHOULD
                       );
     }
     nClauses++;
View Full Code Here

  /** Run all defined filters. */
  public BooleanQuery filter(Query input) throws QueryException {
    // first check that all field names are claimed by some plugin
    Clause[] clauses = input.getClauses();
    for (int i = 0; i < clauses.length; i++) {
      Clause c = clauses[i];
      if (!isField(c.getField()))
        throw new QueryException("Not a known field name:"+c.getField());
    }

    // then run each plugin
    BooleanQuery output = new BooleanQuery();
    for (int i = 0; i < this.queryFilters.length; i++) {
View Full Code Here

    // Examine each clause in the Nutch query
    Clause [] clauses = input.getClauses();
   
    for (int i = 0; i < clauses.length; i++)
    {
      Clause c = clauses[i];

      // Skip if not date clauses
      if (!c.getField().equals(FIELD_NAME))
      {
        continue;
      }

      String dateTerm = c.getTerm().toString();
      Matcher matcher = pattern.matcher(dateTerm);
     
      if (matcher == null || !matcher.matches())
      {
        String message = "Wrong query syntax " + FIELD_NAME
          + ":" + dateTerm + ". Must be standalone 14 digit " +
          " IA format date.";
        LOGGER.error(message);
       
        throw new QueryException(message);
      }

      // So, date is in one format.
      // 14 character IA date.
      String d = matcher.group(1);
     
      if (d != null)
      {
        LOGGER.debug("Found single date: " + d);

        // This is not a range query. Take the passed date and convert
        // it to seconds-since-epoch.     
        output.add(new PwaClosestQuery(getTerm(getSeconds(pad(d)))),
                (c.isProhibited()
                    ? BooleanClause.Occur.MUST_NOT
                    : (c.isRequired()
                        ? BooleanClause.Occur.MUST
                        : BooleanClause.Occur.SHOULD
                       )
                 ));
       
View Full Code Here

    throws QueryException {
   
    // examine each clause in the Nutch query
    Clause[] clauses = input.getClauses();
    for (int i = 0; i < clauses.length; i++) {
      Clause c = clauses[i];

      // skip non-matching clauses
      if (!c.getField().equals(field))
        continue;

      // get the field value from the clause
      // raw fields are guaranteed to be Terms, not Phrases
      String value = c.getTerm().toString();
      if (lowerCase)
        value = value.toLowerCase();

      // add a Lucene TermQuery for this clause
      TermQuery clause = new TermQuery(new Term(field, value));
      // set boost
      clause.setBoost(boost);
      // add it as specified in query
     
      output.add(clause,
          (c.isProhibited()
              ? BooleanClause.Occur.MUST_NOT
              : (c.isRequired()
                  ? BooleanClause.Occur.MUST
                  : BooleanClause.Occur.SHOULD
                 )
           ));
    }
View Full Code Here

    throws QueryException {
   
    // examine each clause in the Nutch query
    Clause[] clauses = input.getClauses();
    for (int i = 0; i < clauses.length; i++) {
      Clause c = clauses[i];

      // skip non-matching clauses
      if (!c.getField().equals(field))
        continue;

      // optimize phrase clause
      if (c.isPhrase()) {
        String[] opt = this.commonGrams.optimizePhrase(c.getPhrase(), field);
        if (opt.length==1) {
          c = new Clause(new Query.Term(opt[0]),
                         c.isRequired(), c.isProhibited(), getConf());
        } else {
          c = new Clause(new Phrase(opt), c.isRequired(), c.isProhibited(), getConf());
        }
      }

      // construct appropriate Lucene clause
      org.apache.lucene.search.Query luceneClause;
      if (c.isPhrase()) {
        Phrase nutchPhrase = c.getPhrase();
        Query.Term[] terms = nutchPhrase.getTerms();
        PhraseQuery lucenePhrase = new PhraseQuery();
        for (int j = 0; j < terms.length; j++) {
          lucenePhrase.add(new Term(field, terms[j].toString()));
        }
        luceneClause = lucenePhrase;
      } else {
        luceneClause = new TermQuery(new Term(field, c.getTerm().toString()));
      }

      // set boost
      luceneClause.setBoost(boost);
      // add it as specified in query
     
      output.add(luceneClause,
          (c.isProhibited()
              ? BooleanClause.Occur.MUST_NOT
              : (c.isRequired()
                  ? BooleanClause.Occur.MUST
                  : BooleanClause.Occur.SHOULD
                 )
           ));
    }
View Full Code Here

  /** Run all defined filters. */
  public BooleanQuery filter(Query input) throws QueryException {
    // first check that all field names are claimed by some plugin
    Clause[] clauses = input.getClauses();
    for (int i = 0; i < clauses.length; i++) {
      Clause c = clauses[i];
      if (!isField(c.getField()))
        throw new QueryException("Not a known field name:"+c.getField());
    }

    // then run each plugin
    BooleanQuery output = new BooleanQuery();
    for (int i = 0; i < this.queryFilters.length; i++) {
View Full Code Here

TOP

Related Classes of org.apache.nutch.searcher.Query.Clause

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.