Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream.reset()


  private void assertInflectionForms(String input, String... inflectionForms) throws IOException {
    TokenStream ts = analyzer.tokenStream("ignored", input);
    try {
      InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class);
      ts.reset();
      for(String inflectionForm : inflectionForms) {
        assertTrue(ts.incrementToken());
        assertEquals(inflectionForm, inflectionAtt.getInflectionForm());
      }
      assertFalse(ts.incrementToken());
View Full Code Here


 
  private void assertPartsOfSpeech(String input, String... partsOfSpeech) throws IOException {
    TokenStream ts = analyzer.tokenStream("ignored", input);
    try {
      PartOfSpeechAttribute partOfSpeechAtt = ts.addAttribute(PartOfSpeechAttribute.class);
      ts.reset();
      for(String partOfSpeech : partsOfSpeech) {
        assertTrue(ts.incrementToken());
        assertEquals(partOfSpeech, partOfSpeechAtt.getPartOfSpeech());
      }
      assertFalse(ts.incrementToken());
View Full Code Here

    long totalStart = System.currentTimeMillis();
    for (int i = 0; i < numIterations; i++) {
      TokenStream ts = analyzer.tokenStream("ignored", line);
      try {
        ts.reset();
        while(ts.incrementToken());
        ts.end();
      } finally {
        IOUtils.closeWhileHandlingException(ts);
      }
View Full Code Here

    totalStart = System.currentTimeMillis();
    for (int i = 0; i < numIterations; i++) {
      for (String sentence: sentences) {
        TokenStream ts = analyzer.tokenStream("ignored", sentence);
        try {
          ts.reset();
          while(ts.incrementToken());
          ts.end();
        } finally {
          IOUtils.closeWhileHandlingException(ts);
        }
View Full Code Here

    for (int i = 0; i < numIterations; i++) {
      String s = _TestUtil.randomUnicodeString(random(), 100);
      TokenStream ts = analyzer.tokenStream("foo", s);
      try {
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
          assertTrue(UnicodeUtil.validUTF16String(termAtt));
        }
        ts.end();
      } finally {
View Full Code Here

 
  public void testTokenAttributes() throws Exception {
    TokenStream ts = a.tokenStream("dummy", "This is a test");
    try {
      ScriptAttribute scriptAtt = ts.addAttribute(ScriptAttribute.class);
      ts.reset();
      while (ts.incrementToken()) {
        assertEquals(UScript.LATIN, scriptAtt.getCode());
        assertEquals(UScript.getName(UScript.LATIN), scriptAtt.getName());
        assertEquals(UScript.getShortName(UScript.LATIN), scriptAtt.getShortName());
        assertTrue(ts.reflectAsString(false).contains("script=Latin"));
View Full Code Here

  // LUCENE-1441
  public void testOffsets() throws Exception {
    TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd"));
    try {
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      stream.reset();
      assertTrue(stream.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(4, offsetAtt.endOffset());
      assertFalse(stream.incrementToken());
      stream.end();
View Full Code Here

    if (analyzerIn == null) analyzerIn = getAnalyzer();

    TokenStream source = null;
    try {
      source = analyzerIn.tokenStream(field, part);
      source.reset();
     
      TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();

      if (!source.incrementToken())
View Full Code Here

    TokenStream ts = analyzer.tokenStream(fieldName, r);
    try {
      int tokenCount = 0;
      // for every token
      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
      ts.reset();
      while (ts.incrementToken()) {
        String word = termAtt.toString();
        tokenCount++;
        if (tokenCount > maxNumTokensParsed) {
          break;
View Full Code Here

      IOException priorException = null;
      TokenStream ts = analyzer.tokenStream("", text);
      try {
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
        ts.reset();
        reuse.length = 0;
        while (ts.incrementToken()) {
          int length = termAtt.length();
          if (length == 0) {
            throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.