Package org.apache.nutch.searcher

Source Code of org.apache.nutch.searcher.FieldQueryFilter

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.nutch.searcher;

import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.index.Term;

import org.apache.nutch.analysis.CommonGrams;

import org.apache.nutch.searcher.Query.Clause;
import org.apache.nutch.searcher.Query.Phrase;
import org.apache.hadoop.conf.Configuration;

/** Translate query fields to search the same-named field, as indexed by an
* IndexingFilter.  Best for tokenized fields. */
public abstract class FieldQueryFilter implements QueryFilter {
  private String field;
  private float boost = 1.0f;
  private Configuration conf;
  private CommonGrams commonGrams;

  /** Construct for the named field.*/
  protected FieldQueryFilter(String field) {
    this(field, 1.0f);
  }

  /** Construct for the named field, boosting as specified.*/
  protected FieldQueryFilter(String field, float boost) {
    this.field = field;
    this.boost = boost;
  }

  public BooleanQuery filter(Query input, BooleanQuery output)
    throws QueryException {
   
    // examine each clause in the Nutch query
    Clause[] clauses = input.getClauses();
    for (int i = 0; i < clauses.length; i++) {
      Clause c = clauses[i];

      // skip non-matching clauses
      if (!c.getField().equals(field))
        continue;

      // optimize phrase clause
      if (c.isPhrase()) {
        String[] opt = this.commonGrams.optimizePhrase(c.getPhrase(), field);
        if (opt.length==1) {
          c = new Clause(new Query.Term(opt[0]),
                         c.isRequired(), c.isProhibited(), getConf());
        } else {
          c = new Clause(new Phrase(opt), c.isRequired(), c.isProhibited(), getConf());
        }
      }

      // construct appropriate Lucene clause
      org.apache.lucene.search.Query luceneClause;
      if (c.isPhrase()) {
        Phrase nutchPhrase = c.getPhrase();
        Query.Term[] terms = nutchPhrase.getTerms();
        PhraseQuery lucenePhrase = new PhraseQuery();
        for (int j = 0; j < terms.length; j++) {
          lucenePhrase.add(new Term(field, terms[j].toString()));
        }
        luceneClause = lucenePhrase;
      } else {
        luceneClause = new TermQuery(new Term(field, c.getTerm().toString()));
      }

      // set boost
      luceneClause.setBoost(boost);
      // add it as specified in query
     
      output.add(luceneClause,
          (c.isProhibited()
              ? BooleanClause.Occur.MUST_NOT
              : (c.isRequired()
                  ? BooleanClause.Occur.MUST
                  : BooleanClause.Occur.SHOULD
                 )
           ));
    }
   
    // return the modified Lucene query
    return output;
  }
 
  public void setConf(Configuration conf) {
    this.conf = conf;
    this.commonGrams = new CommonGrams(conf);
  }

  public Configuration getConf() {
    return this.conf;
  }
}
TOP

Related Classes of org.apache.nutch.searcher.FieldQueryFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.