Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.CharArraySet.addAll()


      // big to start
      words = new CharArraySet(luceneMatchVersion,
          files.size() * 10, ignoreCase);
      for (String file : files) {
        List<String> wlist = loader.getLines(file.trim());
        words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
            ignoreCase));
      }
    }
    return words;
  }
View Full Code Here


   * @deprecated create a CharArraySet with CharArraySet instead
   */
  @Deprecated
  public static CharArraySet makeCommonSet(String[] commonWords, boolean ignoreCase) {
    CharArraySet commonSet = new CharArraySet(commonWords.length, ignoreCase);
    commonSet.addAll(Arrays.asList(commonWords));
    return commonSet;
  }

  /**
   * Inserts bigrams for common words into a token stream. For each input token,
View Full Code Here

   * @param ignoreCase   If true, all words are lower cased first.
   * @return a Set containing the words
   */
  public static final Set<Object> makeStopSet(Version matchVersion, String[] stopWords, boolean ignoreCase) {
    CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.length, ignoreCase);
    stopSet.addAll(Arrays.asList(stopWords));
    return stopSet;
  }

  /**
   * Creates a stopword set from the given stopword list.
View Full Code Here

   * @param ignoreCase   if true, all words are lower cased first
   * @return A Set ({@link org.apache.lucene.analysis.CharArraySet}) containing the words
   */
  public static final Set<Object> makeStopSet(Version matchVersion, List<?> stopWords, boolean ignoreCase) {
    CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.size(), ignoreCase);
    stopSet.addAll(stopWords);
    return stopSet;
  }

  /**
   * Returns the next input Token whose term() is not a stop word.
View Full Code Here

   * @return a Set containing the words
   */
  public static final CharArraySet makeCommonSet(String[] commonWords,
      boolean ignoreCase) {
    CharArraySet commonSet = new CharArraySet(commonWords.length, ignoreCase);
    commonSet.addAll(Arrays.asList(commonWords));
    return commonSet;
  }

  /**
   * Inserts bigrams for common words into a token stream. For each input token,
View Full Code Here

      // default stopwords list has 35 or so words, but maybe don't make it that
      // big to start
      words = new CharArraySet( files.size() * 10, ignoreCase );
      for ( String file : files ) {
        List<String> wlist = loader.getLines( file.trim() );
        words.addAll(
            StopFilter.makeStopSet(
                wlist,
                ignoreCase
            )
        );
View Full Code Here

   * @deprecated create a CharArraySet with CharArraySet instead
   */
  @Deprecated
  public static CharArraySet makeCommonSet(String[] commonWords, boolean ignoreCase) {
    CharArraySet commonSet = new CharArraySet( commonWords.length, ignoreCase );
    commonSet.addAll( Arrays.asList( commonWords ) );
    return commonSet;
  }

  /**
   * Inserts bigrams for common words into a token stream. For each input token,
View Full Code Here

      final Class<? extends ReusableAnalyzerBase> aClass, final String resource,
      final String comment) throws IOException {
    final Set<String> wordSet = WordlistLoader.getWordSet(aClass, resource,
        comment);
    final CharArraySet set = new CharArraySet(Version.LUCENE_31, wordSet.size(), ignoreCase);
    set.addAll(wordSet);
    return set;
  }

}
View Full Code Here

    }
   
    br.close();
   
    final CharArraySet stopSet = new CharArraySet(stopWords.size(), false);
    stopSet.addAll(stopWords)
       
    ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);

  }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.