Package org.apache.lucene.analysis.core

Examples of org.apache.lucene.analysis.core.LowerCaseFilter


  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerFilter(result, stemExclusionSet);
    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
      result = new SpanishLightStemFilter(result);
View Full Code Here


  protected TokenStreamComponents createComponents(String fieldName,
      Reader aReader) {
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
      TokenStream result = new StandardFilter(matchVersion, source);
      result = new LowerCaseFilter(matchVersion, result);
      result = new StopFilter(matchVersion, result, stoptable);
      if (!excltable.isEmpty())
        result = new KeywordMarkerFilter(result, excltable);
      if (!stemdict.isEmpty())
        result = new StemmerOverrideFilter(result, stemdict);
View Full Code Here

    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
      source = new StandardTokenizer(matchVersion, reader);
    } else {
      source = new IndicTokenizer(matchVersion, reader);
    }
    TokenStream result = new LowerCaseFilter(matchVersion, source);
    if (!stemExclusionSet.isEmpty())
      result = new KeywordMarkerFilter(result, stemExclusionSet);
    result = new IndicNormalizationFilter(result);
    result = new HindiNormalizationFilter(result);
    result = new StopFilter(matchVersion, result, stopwords);
View Full Code Here

    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    if (matchVersion.onOrAfter(Version.LUCENE_32)) {
      result = new ElisionFilter(result, DEFAULT_ARTICLES);
    }
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerFilter(result, stemExclusionSet);
    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
      result = new ItalianLightStemFilter(result);
View Full Code Here

  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new BasqueStemmer());
    return new TokenStreamComponents(source, result);
View Full Code Here

  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new RomanianStemmer());
    return new TokenStreamComponents(source, result);
View Full Code Here

      Reader reader) {
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
      TokenStream result = new StandardFilter(matchVersion, source);
      result = new ElisionFilter(result, DEFAULT_ARTICLES);
      result = new LowerCaseFilter(matchVersion, result);
      result = new StopFilter(matchVersion, result, stopwords);
      if(!excltable.isEmpty())
        result = new KeywordMarkerFilter(result, excltable);
      if (matchVersion.onOrAfter(Version.LUCENE_36)) {
        result = new FrenchLightStemFilter(result);
      } else {
        result = new SnowballFilter(result, new org.tartarus.snowball.ext.FrenchStemmer());
      }
      return new TokenStreamComponents(source, result);
    } else {
      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
      TokenStream result = new StandardFilter(matchVersion, source);
      result = new StopFilter(matchVersion, result, stopwords);
      if(!excltable.isEmpty())
        result = new KeywordMarkerFilter(result, excltable);
      result = new FrenchStemFilter(result);
      // Convert to lowercase after stemming!
      return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
    }
  }
View Full Code Here

   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new LowerCaseFilter(matchVersion, source);
    result = new StandardFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(excltable != null && !excltable.isEmpty())
      result = new KeywordMarkerFilter(result, excltable);
    return new TokenStreamComponents(source, new BrazilianStemFilter(result));
View Full Code Here

   
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_31, reader) : factory.create(reader);
        TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_31, tokenizer) : tokenizer;
        return new TokenStreamComponents(tokenizer, stream);
      }
    };

    String format = args.get("format");
View Full Code Here

  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter( matchVersion, result, stopwords);
    result = new KeywordMarkerFilter(result, exclusionSet);
    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
      result = new GermanNormalizationFilter(result);
      result = new GermanLightStemFilter(result);
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.core.LowerCaseFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.