Package org.apache.lucene.util.automaton

Examples of org.apache.lucene.util.automaton.RegExp


  private class DumbRegexpQuery extends MultiTermQuery {
    private final Automaton automaton;
   
    DumbRegexpQuery(Term term, int flags) {
      super(term.field());
      RegExp re = new RegExp(term.text(), flags);
      automaton = re.toAutomaton();
    }
View Full Code Here


   * @param term regular expression.
   * @param flags optional RegExp features from {@link RegExp}
   * @param provider custom AutomatonProvider for named automata
   */
  public RegexpQuery(Term term, int flags, AutomatonProvider provider) {
    super(term, new RegExp(term.text(), flags).toAutomaton(provider));
  }
View Full Code Here

    w.forceMerge(1);
    DirectoryReader r = w.getReader();
    w.close();
    AtomicReader sub = getOnlySegmentReader(r);
    Terms terms = sub.fields().terms("field");
    Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();   
    CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);   
    TermsEnum te = terms.intersect(ca, null);
    assertEquals("aaa", te.next().utf8ToString());
    assertEquals(0, te.docs(null, null, DocsEnum.FLAG_NONE).nextDoc());
    assertEquals("bbb", te.next().utf8ToString());
View Full Code Here

    DirectoryReader r = w.getReader();
    w.close();
    AtomicReader sub = getOnlySegmentReader(r);
    Terms terms = sub.fields().terms("field");

    Automaton automaton = new RegExp(".*d", RegExp.NONE).toAutomaton();
    CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);   
    TermsEnum te;
   
    // should seek to startTerm
    te = terms.intersect(ca, new BytesRef("aad"));
View Full Code Here

    DirectoryReader r = w.getReader();
    w.close();
    AtomicReader sub = getOnlySegmentReader(r);
    Terms terms = sub.fields().terms("field");

    Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton()// accept ALL
    CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);   

    TermsEnum te = terms.intersect(ca, null);
    DocsEnum de;

View Full Code Here

public class TestSpanFirstQuery extends LuceneTestCase {
  public void testStartPositions() throws Exception {
    Directory dir = newDirectory();
   
    // mimic StopAnalyzer
    CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|a|of").toAutomaton());
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
   
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, analyzer);
    Document doc = new Document();
    doc.add(newTextField("field", "the quick brown fox", Field.Store.NO));
View Full Code Here

 
  public void testMaxSizeEndHighlight() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {
      @Override
      public void run() throws Exception {
        CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
        TermQuery query = new TermQuery(new Term("text", "searchterm"));

        String text = "this is a text with searchterm in it";
        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
        Highlighter hg = getHighlighter(query, "text", fm);
View Full Code Here

    assertEquals(two, getQuery("field:/foo/ field:/bar/",qp));
    assertEquals(two, getQuery("/foo/ /bar/",qp));
  }
 
  public void testStopwords() throws Exception {
    CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
    CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
    Query result = getQuery("field:the OR field:foo",qp);
    assertNotNull("result is null and it shouldn't be", result);
    assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
    assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0);
View Full Code Here

    setDefaultField(oldDefaultField);
  }

  public void testPhraseQueryPositionIncrements() throws Exception {
    CharacterRunAutomaton stopStopList =
    new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());

    CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));

    qp = getParserConfig(
                         new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
View Full Code Here

    final Map<String, Analyzer> fieldAnalyzers = new TreeMap<>();
    fieldAnalyzers.put( "field", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET ) );
    fieldAnalyzers.put( "field_exact", new MockAnalyzer( random() ) );
    fieldAnalyzers.put( "field_super_exact", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, false ) );
    fieldAnalyzers.put( "field_characters", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp(".").toAutomaton() ), true ) );
    fieldAnalyzers.put( "field_tripples", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp("...").toAutomaton() ), true ) );
    fieldAnalyzers.put( "field_sliced", fieldAnalyzers.get( "field" ) );
    fieldAnalyzers.put( "field_der_red", fieldAnalyzers.get( "field" ) )// This is required even though we provide a token stream
    Analyzer analyzer = new AnalyzerWrapper() {
      public Analyzer getWrappedAnalyzer(String fieldName) {
        return fieldAnalyzers.get( fieldName );
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.automaton.RegExp

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.