Examples of org.apache.lucene.analysis.TokenStream.reset()

Class org.apache.lucene.analysis.TokenStream

Examples of org.apache.lucene.analysis.TokenStream.reset()

org.apache.lucene.analysis.TokenStream.reset()
Resets this stream to the beginning. This is an optional operation, so subclasses may or may not implement this method. {@link #reset()} is not needed forthe standard indexing process. However, if the tokens of a TokenStream are intended to be consumed more than once, it is necessary to implement {@link #reset()}. Note that if your TokenStream caches tokens and feeds them back again after a reset, it is imperative that you clone the tokens when you store them away (on the first pass) as well as when you return them (on future passes after {@link #reset()}).


  private void assertInflectionForms(String input, String... inflectionForms) throws IOException {
    TokenStream ts = analyzer.tokenStream("ignored", input);
    try {
      InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class);
      ts.reset();
      for(String inflectionForm : inflectionForms) {
        assertTrue(ts.incrementToken());
        assertEquals(inflectionForm, inflectionAtt.getInflectionForm());
      }
      assertFalse(ts.incrementToken());

View Full Code Here

  
  private void assertPartsOfSpeech(String input, String... partsOfSpeech) throws IOException {
    TokenStream ts = analyzer.tokenStream("ignored", input);
    try {
      PartOfSpeechAttribute partOfSpeechAtt = ts.addAttribute(PartOfSpeechAttribute.class);
      ts.reset();
      for(String partOfSpeech : partsOfSpeech) {
        assertTrue(ts.incrementToken());
        assertEquals(partOfSpeech, partOfSpeechAtt.getPartOfSpeech());
      }
      assertFalse(ts.incrementToken());

View Full Code Here


    long totalStart = System.currentTimeMillis();
    for (int i = 0; i < numIterations; i++) {
      TokenStream ts = analyzer.tokenStream("ignored", line);
      try {
        ts.reset();
        while(ts.incrementToken());
        ts.end();
      } finally {
        IOUtils.closeWhileHandlingException(ts);
      }

View Full Code Here

    totalStart = System.currentTimeMillis();
    for (int i = 0; i < numIterations; i++) {
      for (String sentence: sentences) {
        TokenStream ts = analyzer.tokenStream("ignored", sentence);
        try {
          ts.reset();
          while(ts.incrementToken());
          ts.end();
        } finally {
          IOUtils.closeWhileHandlingException(ts);
        }

View Full Code Here

    for (int i = 0; i < numIterations; i++) {
      String s = _TestUtil.randomUnicodeString(random(), 100);
      TokenStream ts = analyzer.tokenStream("foo", s);
      try {
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
          assertTrue(UnicodeUtil.validUTF16String(termAtt));
        }
        ts.end();
      } finally {

View Full Code Here

  
  public void testTokenAttributes() throws Exception {
    TokenStream ts = a.tokenStream("dummy", "This is a test");
    try {
      ScriptAttribute scriptAtt = ts.addAttribute(ScriptAttribute.class);
      ts.reset();
      while (ts.incrementToken()) {
        assertEquals(UScript.LATIN, scriptAtt.getCode());
        assertEquals(UScript.getName(UScript.LATIN), scriptAtt.getName());
        assertEquals(UScript.getShortName(UScript.LATIN), scriptAtt.getShortName());
        assertTrue(ts.reflectAsString(false).contains("script=Latin"));

View Full Code Here

  // LUCENE-1441
  public void testOffsets() throws Exception {
    TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd"));
    try {
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      stream.reset();
      assertTrue(stream.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(4, offsetAtt.endOffset());
      assertFalse(stream.incrementToken());
      stream.end();

View Full Code Here

    if (analyzerIn == null) analyzerIn = getAnalyzer();


    TokenStream source = null;
    try {
      source = analyzerIn.tokenStream(field, part);
      source.reset();
      
      TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();


      if (!source.incrementToken())

View Full Code Here

    TokenStream ts = analyzer.tokenStream(fieldName, r);
    try {
      int tokenCount = 0;
      // for every token
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      ts.reset();
      while (ts.incrementToken()) {
        String word = termAtt.toString();
        tokenCount++;
        if (tokenCount > maxNumTokensParsed) {
          break;

View Full Code Here

      IOException priorException = null;
      TokenStream ts = analyzer.tokenStream("", text);
      try {
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
        ts.reset();
        reuse.length = 0;
        while (ts.incrementToken()) {
          int length = termAtt.length();
          if (length == 0) {
            throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.