/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.searcher;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.index.Term;
import org.apache.nutch.analysis.CommonGrams;
import org.apache.nutch.searcher.Query.Clause;
import org.apache.nutch.searcher.Query.Phrase;
import org.apache.hadoop.conf.Configuration;
/** Translate query fields to search the same-named field, as indexed by an
* IndexingFilter. Best for tokenized fields. */
public abstract class FieldQueryFilter implements QueryFilter {
private String field;
private float boost = 1.0f;
private Configuration conf;
private CommonGrams commonGrams;
/** Construct for the named field.*/
protected FieldQueryFilter(String field) {
this(field, 1.0f);
}
/** Construct for the named field, boosting as specified.*/
protected FieldQueryFilter(String field, float boost) {
this.field = field;
this.boost = boost;
}
public BooleanQuery filter(Query input, BooleanQuery output)
throws QueryException {
// examine each clause in the Nutch query
Clause[] clauses = input.getClauses();
for (int i = 0; i < clauses.length; i++) {
Clause c = clauses[i];
// skip non-matching clauses
if (!c.getField().equals(field))
continue;
// optimize phrase clause
if (c.isPhrase()) {
String[] opt = this.commonGrams.optimizePhrase(c.getPhrase(), field);
if (opt.length==1) {
c = new Clause(new Query.Term(opt[0]),
c.isRequired(), c.isProhibited(), getConf());
} else {
c = new Clause(new Phrase(opt), c.isRequired(), c.isProhibited(), getConf());
}
}
// construct appropriate Lucene clause
org.apache.lucene.search.Query luceneClause;
if (c.isPhrase()) {
Phrase nutchPhrase = c.getPhrase();
Query.Term[] terms = nutchPhrase.getTerms();
PhraseQuery lucenePhrase = new PhraseQuery();
for (int j = 0; j < terms.length; j++) {
lucenePhrase.add(new Term(field, terms[j].toString()));
}
luceneClause = lucenePhrase;
} else {
luceneClause = new TermQuery(new Term(field, c.getTerm().toString()));
}
// set boost
luceneClause.setBoost(boost);
// add it as specified in query
output.add(luceneClause,
(c.isProhibited()
? BooleanClause.Occur.MUST_NOT
: (c.isRequired()
? BooleanClause.Occur.MUST
: BooleanClause.Occur.SHOULD
)
));
}
// return the modified Lucene query
return output;
}
public void setConf(Configuration conf) {
this.conf = conf;
this.commonGrams = new CommonGrams(conf);
}
public Configuration getConf() {
return this.conf;
}
}