Examples of LineFileDocs

org.apache.lucene.util.LineFileDocs
Minimal port of benchmark's LneDocSource + DocMaker, so tests can enum docs from a line file created by benchmark's WriteLineDoc task

Examples of org.apache.lucene.util.LineFileDocs

    IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
        new MockAnalyzer(random()));
    Similarity provider = new MySimProvider();
    config.setSimilarity(provider);
    RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
    final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
      Document doc = docs.nextDoc();
      int boost = random().nextInt(255);
      Field f = new TextField(byteTestField, "" + boost, Field.Store.YES);
      f.setBoost(boost);
      doc.add(f);
      writer.addDocument(doc);
      doc.removeField(byteTestField);
      if (rarely()) {
        writer.commit();
      }
    }
    writer.commit();
    writer.close();
    docs.close();
  }

View Full Code Here

Examples of org.apache.lucene.util.LineFileDocs

    dir.setPreventDoubleWrite(false);
    double rate = random().nextDouble()*0.01;
    //System.out.println("rate=" + rate);
    dir.setRandomIOExceptionRateOnOpen(rate);
    int iters = atLeast(20);
    LineFileDocs docs = new LineFileDocs(random(), defaultCodecSupportsDocValues());
    IndexReader r = null;
    DirectoryReader r2 = null;
    boolean any = false;
    MockDirectoryWrapper dirCopy = null;
    int lastNumDocs = 0;
    for(int iter=0;iter<iters;iter++) {


      IndexWriter w = null;
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }
      try {
        IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));


        if (VERBOSE) {
          // Do this ourselves instead of relying on LTC so
          // we see incrementing messageID:
          iwc.setInfoStream(new PrintStreamInfoStream(System.out));
        }
        MergeScheduler ms = iwc.getMergeScheduler();
        if (ms instanceof ConcurrentMergeScheduler) {
          ((ConcurrentMergeScheduler) ms).setSuppressExceptions();
        }
        w = new IndexWriter(dir, iwc);
        if (r != null && random().nextInt(5) == 3) {
          if (random().nextBoolean()) {
            if (VERBOSE) {
              System.out.println("TEST: addIndexes IR[]");
            }
            w.addIndexes(new IndexReader[] {r});
          } else {
            if (VERBOSE) {
              System.out.println("TEST: addIndexes Directory[]");
            }
            w.addIndexes(new Directory[] {dirCopy});
          }
        } else {
          if (VERBOSE) {
            System.out.println("TEST: addDocument");
          }
          w.addDocument(docs.nextDoc());
        }
        w.close();
        w = null;


        // NOTE: This is O(N^2)!  Only enable for temporary debugging:

View Full Code Here

Examples of org.apache.lucene.util.LineFileDocs

  
  // Build FST for all unique terms in the test line docs
  // file, up until a time limit
  public void testRealTerms() throws Exception {


    final LineFileDocs docs = new LineFileDocs(random(), defaultCodecSupportsDocValues());
    final int RUN_TIME_MSEC = atLeast(500);
    final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
    final File tempDir = _TestUtil.getTempDir("fstlines");
    final Directory dir = newFSDirectory(tempDir);
    final IndexWriter writer = new IndexWriter(dir, conf);
    final long stopTime = System.currentTimeMillis() + RUN_TIME_MSEC;
    Document doc;
    int docCount = 0;
    while((doc = docs.nextDoc()) != null && System.currentTimeMillis() < stopTime) {
      writer.addDocument(doc);
      docCount++;
    }
    IndexReader r = DirectoryReader.open(writer, true);
    writer.close();

View Full Code Here

Examples of org.apache.lucene.util.LineFileDocs

  // concurrency
  private final class ChangeIndices extends Thread {
    @Override
    public void run() {
      try {
        final LineFileDocs docs = new LineFileDocs(random(), defaultCodecSupportsDocValues());
        int numDocs = 0;
        while (System.nanoTime() < endTimeNanos) {
          final int what = random().nextInt(3);
          final NodeState node = nodes[random().nextInt(nodes.length)];
          if (numDocs == 0 || what == 0) {
            node.writer.addDocument(docs.nextDoc());
            numDocs++;
          } else if (what == 1) {
            node.writer.updateDocument(new Term("docid", ""+random().nextInt(numDocs)),
                                        docs.nextDoc());
            numDocs++;
          } else {
            node.writer.deleteDocuments(new Term("docid", ""+random().nextInt(numDocs)));
          }
          // TODO: doc blocks too

View Full Code Here

Examples of org.apache.lucene.util.LineFileDocs

   * populates a writer with random stuff. this must be fully reproducable with
   * the seed!
   */
  public static void createRandomIndex(int numdocs, RandomIndexWriter writer,
      Random random) throws IOException {
    LineFileDocs lineFileDocs = new LineFileDocs(random);


    for (int i = 0; i < numdocs; i++) {
      writer.addDocument(lineFileDocs.nextDoc());
    }
    
    lineFileDocs.close();
  }

View Full Code Here

Examples of org.apache.lucene.util.LineFileDocs

public class TestSameScoresWithThreads extends LuceneTestCase {


  public void test() throws Exception {
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    LineFileDocs docs = new LineFileDocs(random(), defaultCodecSupportsDocValues());
    int charsToIndex = atLeast(100000);
    int charsIndexed = 0;
    //System.out.println("bytesToIndex=" + charsToIndex);
    while(charsIndexed < charsToIndex) {
      Document doc = docs.nextDoc();
      charsIndexed += doc.get("body").length();
      w.addDocument(doc);
      //System.out.println("  bytes=" + charsIndexed + " add: " + doc);
    }
    IndexReader r = w.getReader();

View Full Code Here

Examples of org.apache.lucene.util.LineFileDocs

  public void testNRTAndCommit() throws Exception {
    Directory dir = newDirectory();
    NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
    final LineFileDocs docs = new LineFileDocs(random(),
                                               defaultCodecSupportsDocValues());
    final int numDocs = _TestUtil.nextInt(random(), 100, 400);


    if (VERBOSE) {
      System.out.println("TEST: numDocs=" + numDocs);
    }


    final List<BytesRef> ids = new ArrayList<BytesRef>();
    DirectoryReader r = null;
    for(int docCount=0;docCount<numDocs;docCount++) {
      final Document doc = docs.nextDoc();
      ids.add(new BytesRef(doc.get("docid")));
      w.addDocument(doc);
      if (random().nextInt(20) == 17) {
        if (r == null) {
          r = DirectoryReader.open(w.w, false);
        } else {
          final DirectoryReader r2 = DirectoryReader.openIfChanged(r);
          if (r2 != null) {
            r.close();
            r = r2;
          }
        }
        assertEquals(1+docCount, r.numDocs());
        final IndexSearcher s = newSearcher(r);
        // Just make sure search can run; we can't assert
        // totHits since it could be 0
        TopDocs hits = s.search(new TermQuery(new Term("body", "the")), 10);
        // System.out.println("tot hits " + hits.totalHits);
      }
    }


    if (r != null) {
      r.close();
    }


    // Close should force cache to clear since all files are sync'd
    w.close();


    final String[] cachedFiles = cachedDir.listCachedFiles();
    for(String file : cachedFiles) {
      System.out.println("FAIL: cached file " + file + " remains after sync");
    }
    assertEquals(0, cachedFiles.length);
    
    r = DirectoryReader.open(dir);
    for(BytesRef id : ids) {
      assertEquals(1, r.docFreq(new Term("docid", id)));
    }
    r.close();
    cachedDir.close();
    docs.close();
  }

View Full Code Here

Examples of org.apache.lucene.util.LineFileDocs

    packCount.set(0);


    final long t0 = System.currentTimeMillis();


    Random random = new Random(random().nextLong());
    final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
    final File tempDir = _TestUtil.getTempDir(testName);
    dir = newMockFSDirectory(tempDir); // some subclasses rely on this being MDW
    dir.setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
    final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, 
        new MockAnalyzer(random())).setInfoStream(new FailOnNonBulkMergesInfoStream());


    if (LuceneTestCase.TEST_NIGHTLY) {
      // newIWConfig makes smallish max seg size, which
      // results in tons and tons of segments for this test
      // when run nightly:
      MergePolicy mp = conf.getMergePolicy();
      if (mp instanceof TieredMergePolicy) {
        ((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.);
      } else if (mp instanceof LogByteSizeMergePolicy) {
        ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.);
      } else if (mp instanceof LogMergePolicy) {
        ((LogMergePolicy) mp).setMaxMergeDocs(100000);
      }
    }


    conf.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
      @Override
      public void warm(AtomicReader reader) throws IOException {
        if (VERBOSE) {
          System.out.println("TEST: now warm merged reader=" + reader);
        }
        warmed.put(((SegmentReader) reader).core, Boolean.TRUE);
        final int maxDoc = reader.maxDoc();
        final Bits liveDocs = reader.getLiveDocs();
        int sum = 0;
        final int inc = Math.max(1, maxDoc/50);
        for(int docID=0;docID<maxDoc;docID += inc) {
          if (liveDocs == null || liveDocs.get(docID)) {
            final Document doc = reader.document(docID);
            sum += doc.getFields().size();
          }
        }


        IndexSearcher searcher = newSearcher(reader);
        sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;


        if (VERBOSE) {
          System.out.println("TEST: warm visited " + sum + " fields");
        }
      }
      });


    if (VERBOSE) {
      conf.setInfoStream(new PrintStreamInfoStream(System.out) {
          @Override
          public void message(String component, String message) {
            if ("TP".equals(component)) {
              return; // ignore test points!
            }
            super.message(component, message);
          }
        });
    }
    writer = new IndexWriter(dir, conf);
    _TestUtil.reduceOpenFiles(writer);


    final ExecutorService es = random().nextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));


    doAfterWriter(es);


    final int NUM_INDEX_THREADS = _TestUtil.nextInt(random(), 2, 4);


    final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;


    final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>());
    final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>());
    final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>());


    final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC*1000;


    final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);


    if (VERBOSE) {
      System.out.println("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (System.currentTimeMillis()-t0) + " ms]");
    }


    // Let index build up a bit
    Thread.sleep(100);


    doSearching(es, stopTime);


    if (VERBOSE) {
      System.out.println("TEST: all searching done [" + (System.currentTimeMillis()-t0) + " ms]");
    }
    
    for(int thread=0;thread<indexThreads.length;thread++) {
      indexThreads[thread].join();
    }


    if (VERBOSE) {
      System.out.println("TEST: done join indexing threads [" + (System.currentTimeMillis()-t0) + " ms]; addCount=" + addCount + " delCount=" + delCount);
    }


    final IndexSearcher s = getFinalSearcher();
    if (VERBOSE) {
      System.out.println("TEST: finalSearcher=" + s);
    }


    assertFalse(failed.get());


    boolean doFail = false;


    // Verify: make sure delIDs are in fact deleted:
    for(String id : delIDs) {
      final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
      if (hits.totalHits != 0) {
        System.out.println("doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc);
        doFail = true;
      }
    }


    // Verify: make sure delPackIDs are in fact deleted:
    for(String id : delPackIDs) {
      final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1);
      if (hits.totalHits != 0) {
        System.out.println("packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches");
        doFail = true;
      }
    }


    // Verify: make sure each group of sub-docs are still in docID order:
    for(SubDocs subDocs : allSubDocs) {
      TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20);
      if (!subDocs.deleted) {
        // We sort by relevance but the scores should be identical so sort falls back to by docID:
        if (hits.totalHits != subDocs.subIDs.size()) {
          System.out.println("packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits);
          doFail = true;
        } else {
          int lastDocID = -1;
          int startDocID = -1;
          for(ScoreDoc scoreDoc : hits.scoreDocs) {
            final int docID = scoreDoc.doc;
            if (lastDocID != -1) {
              assertEquals(1+lastDocID, docID);
            } else {
              startDocID = docID;
            }
            lastDocID = docID;
            final Document doc = s.doc(docID);
            assertEquals(subDocs.packID, doc.get("packID"));
          }


          lastDocID = startDocID - 1;
          for(String subID : subDocs.subIDs) {
            hits = s.search(new TermQuery(new Term("docid", subID)), 1);
            assertEquals(1, hits.totalHits);
            final int docID = hits.scoreDocs[0].doc;
            if (lastDocID != -1) {
              assertEquals(1+lastDocID, docID);
            }
            lastDocID = docID;
          }
        }
      } else {
        // Pack was deleted -- make sure its docs are
        // deleted.  We can't verify packID is deleted
        // because we can re-use packID for update:
        for(String subID : subDocs.subIDs) {
          assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits);
        }
      }
    }


    // Verify: make sure all not-deleted docs are in fact
    // not deleted:
    final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
    docs.close();


    for(int id=0;id<endID;id++) {
      String stringID = ""+id;
      if (!delIDs.contains(stringID)) {
        final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1);

View Full Code Here

Examples of org.apache.lucene.util.LineFileDocs

    assertNull(reader.termPositionsEnum(new Term("not-in-index", "foo")));
    assertNull(reader.terms("not-in-index"));
  }
  
  public void testDuellMemIndex() throws IOException {
    LineFileDocs lineFileDocs = new LineFileDocs(random());
    int numDocs = atLeast(10);
    MemoryIndex memory = new MemoryIndex(random().nextBoolean(),  random().nextInt(50) * 1024 * 1024);
    for (int i = 0; i < numDocs; i++) {
      Directory dir = newDirectory();
      MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
      IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), TEST_VERSION_CURRENT, mockAnalyzer));
      Document nextDoc = lineFileDocs.nextDoc();
      Document doc = new Document();
      for (IndexableField field : nextDoc.getFields()) {
        if (field.fieldType().indexed()) {
          doc.add(field);
          if (random().nextInt(3) == 0) {
            doc.add(field);  // randomly add the same field twice
          }
        }
      }
      
      writer.addDocument(doc);
      writer.close();
      for (IndexableField field : doc.getFields()) {
          memory.addField(field.name(), ((Field)field).stringValue(), mockAnalyzer);  
      }
      DirectoryReader competitor = DirectoryReader.open(dir);
      AtomicReader memIndexReader= (AtomicReader) memory.createSearcher().getIndexReader();
      duellReaders(competitor, memIndexReader);
      IOUtils.close(competitor, memIndexReader);
      memory.reset();
      dir.close();
    }
    lineFileDocs.close();
  }

View Full Code Here

Examples of org.apache.lucene.util.LineFileDocs

@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
public class TestTermsEnum extends LuceneTestCase {


  public void test() throws Exception {
    Random random = new Random(random().nextLong());
    final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
    final Directory d = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), d);
    final int numDocs = atLeast(10);
    for(int docCount=0;docCount<numDocs;docCount++) {
      w.addDocument(docs.nextDoc());
    }
    final IndexReader r = w.getReader();
    w.close();


    final List<BytesRef> terms = new ArrayList<BytesRef>();
    final TermsEnum termsEnum = MultiFields.getTerms(r, "body").iterator(null);
    BytesRef term;
    while((term = termsEnum.next()) != null) {
      terms.add(BytesRef.deepCopyOf(term));
    }
    if (VERBOSE) {
      System.out.println("TEST: " + terms.size() + " terms");
    }


    int upto = -1;
    final int iters = atLeast(200);
    for(int iter=0;iter<iters;iter++) {
      final boolean isEnd;
      if (upto != -1 && random().nextBoolean()) {
        // next
        if (VERBOSE) {
          System.out.println("TEST: iter next");
        }
        isEnd = termsEnum.next() == null;
        upto++;
        if (isEnd) {
          if (VERBOSE) {
            System.out.println("  end");
          }
          assertEquals(upto, terms.size());
          upto = -1;
        } else {
          if (VERBOSE) {
            System.out.println("  got term=" + termsEnum.term().utf8ToString() + " expected=" + terms.get(upto).utf8ToString());
          }
          assertTrue(upto < terms.size());
          assertEquals(terms.get(upto), termsEnum.term());
        }
      } else {


        final BytesRef target;
        final String exists;
        if (random().nextBoolean()) {
          // likely fake term
          if (random().nextBoolean()) {
            target = new BytesRef(_TestUtil.randomSimpleString(random()));
          } else {
            target = new BytesRef(_TestUtil.randomRealisticUnicodeString(random()));
          }
          exists = "likely not";
        } else {
          // real term
          target = terms.get(random().nextInt(terms.size()));
          exists = "yes";
        }


        upto = Collections.binarySearch(terms, target);


        if (random().nextBoolean()) {
          if (VERBOSE) {
            System.out.println("TEST: iter seekCeil target=" + target.utf8ToString() + " exists=" + exists);
          }
          // seekCeil
          final TermsEnum.SeekStatus status = termsEnum.seekCeil(target);
          if (VERBOSE) {
            System.out.println("  got " + status);
          }
          
          if (upto < 0) {
            upto = -(upto+1);
            if (upto >= terms.size()) {
              assertEquals(TermsEnum.SeekStatus.END, status);
              upto = -1;
            } else {
              assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
              assertEquals(terms.get(upto), termsEnum.term());
            }
          } else {
            assertEquals(TermsEnum.SeekStatus.FOUND, status);
            assertEquals(terms.get(upto), termsEnum.term());
          }
        } else {
          if (VERBOSE) {
            System.out.println("TEST: iter seekExact target=" + target.utf8ToString() + " exists=" + exists);
          }
          // seekExact
          final boolean result = termsEnum.seekExact(target);
          if (VERBOSE) {
            System.out.println("  got " + result);
          }
          if (upto < 0) {
            assertFalse(result);
            upto = -1;
          } else {
            assertTrue(result);
            assertEquals(target, termsEnum.term());
          }
        }
      }
    }


    r.close();
    d.close();
    docs.close();
  }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.