Examples of EnglishStemmer


Examples of org.tartarus.snowball.ext.EnglishStemmer

   */
  public static void main(String[] args) throws Exception {
    LuceneIndexerSearcher lis = new LuceneIndexerSearcher(false);
    IndexSearcher is = lis.getIndexSearcher();

    Stemmer stemmerTools = new Stemmer(new EnglishStemmer());
   
    //QueryParser qp = new Oscar3QueryParser("txt", new Oscar3Analyzer(), lis, false);
    //Query q = qp.parse("NaCl");
   
    String queryTerm = "lipid";
View Full Code Here

Examples of org.tartarus.snowball.ext.EnglishStemmer

      double expected = scaleFactor * docFreq;
      double excess = df.getCount(s) - expected;
      score = excess / clusterSize;       
      if(score > threshold) scores.put(s, score);
    }
    Stemmer st = new Stemmer(new EnglishStemmer());
    Map<String,List<String>> stems = st.wordsToStems(df.getSet());
    for(String stem : stems.keySet()) {
      List<String> words = stems.get(stem);
      if(words.size() > 1) {
        BooleanQuery bq = new BooleanQuery(true);
View Full Code Here

Examples of org.tartarus.snowball.ext.EnglishStemmer

      }
     

      clusterFiles.add(new File(ir.document(i).getField("filename").stringValue().replaceAll("markedup", "source")));
    }
    Stemmer st = new Stemmer(new EnglishStemmer());
    Map<String,List<String>> stems = st.wordsToStems(dfs.getSet());

    dfs.discardInfrequent(2);
    NGramTfDf ngtd = NGramTfDf.analyseFiles(clusterFiles);
    ngtd.calculateNGrams();
View Full Code Here

Examples of org.tartarus.snowball.ext.EnglishStemmer

  public static void main(String[] args) throws Exception {
    LuceneIndexerSearcher lis = new LuceneIndexerSearcher(false);
    IndexSearcher is = lis.getIndexSearcher();
   
    Stemmer stemmerTools = new Stemmer(new EnglishStemmer());
   
    //QueryParser qp = new Oscar3QueryParser("txt", new Oscar3Analyzer(), lis, false);
    //Query q = qp.parse("NaCl");
   
    String queryTerm = "content";
View Full Code Here

Examples of org.tartarus.snowball.ext.EnglishStemmer

   
    return results;
  }
 
  public Map<String,List<String>> ngramsByStem() {
    Stemmer st = new Stemmer(new EnglishStemmer());
    Set<String> terms = new HashSet<String>();
    for(SubstringClass sc : classArray) {
      for(String s : sc.getSuffixStrings(2)) {
        if(!checkTerm(s)) continue;
        terms.add(s);
View Full Code Here

Examples of org.tartarus.snowball.ext.EnglishStemmer

import uk.ac.cam.ch.wwmm.ptclib.string.StringTools;

public class DocClassifier {

  public static Event docToEvent(IndexReader ir, int doc, String cue) throws Exception {
    Stemmer st = new Stemmer(new EnglishStemmer());
    List<String> words = new ArrayList<String>();
    boolean hasCue = false;
    TermFreqVector tvf = ir.getTermFreqVector(doc, "txt");
    String [] termArray = tvf.getTerms();
    int [] termFreqs = tvf.getTermFrequencies();
View Full Code Here

Examples of org.tartarus.snowball.ext.EnglishStemmer

import java.util.Map;

public class TestSnowballPorterFilterFactory extends BaseTokenStreamTestCase {

  public void test() throws IOException {
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
    String[] gold = new String[test.length];
    for (int i = 0; i < test.length; i++) {
      stemmer.setCurrent(test[i]);
      stemmer.stem();
      gold[i] = stemmer.getCurrent();
    }

    SnowballPorterFilterFactory factory = new SnowballPorterFilterFactory();
    Map<String, String> args = new HashMap<String, String>();
    args.put("language", "English");
View Full Code Here

Examples of org.tartarus.snowball.ext.EnglishStemmer

public class TestSnowballPorterFilterFactory extends BaseTokenStreamFactoryTestCase {

  public void test() throws Exception {
    String text = "The fledgling banks were counting on a big boom in banking";
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = text.split("\\s");
    String[] gold = new String[test.length];
    for (int i = 0; i < test.length; i++) {
      stemmer.setCurrent(test[i]);
      stemmer.stem();
      gold[i] = stemmer.getCurrent();
    }
   
    Reader reader = new StringReader(text);
    TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
    stream = tokenFilterFactory("SnowballPorter", "language", "English").create(stream);
View Full Code Here

Examples of org.tartarus.snowball.ext.EnglishStemmer

import java.util.Collections;

public class EnglishPorterFilterFactoryTest extends BaseTokenTestCase {

  public void test() throws IOException {
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
    StringBuilder gold = new StringBuilder();
    for (int i = 0; i < test.length; i++) {
      stemmer.setCurrent(test[i]);
      stemmer.stem();
      gold.append(stemmer.getCurrent()).append(' ');
    }

    EnglishPorterFilterFactory factory = new EnglishPorterFilterFactory();
    Map<String, String> args = new HashMap<String, String>();
View Full Code Here

Examples of org.tartarus.snowball.ext.EnglishStemmer

    String out = tsToString(factory.create(new IterTokenStream(test)));
    assertEquals(gold.toString().trim(), out);
  }

  public void testProtected() throws Exception {
    EnglishStemmer stemmer = new EnglishStemmer();
    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
    StringBuilder gold = new StringBuilder();
    for (int i = 0; i < test.length; i++) {
      if (test[i].equals("fledgling") == false && test[i].equals("banks") == false) {
        stemmer.setCurrent(test[i]);
        stemmer.stem();
        gold.append(stemmer.getCurrent()).append(' ');
      } else {
        gold.append(test[i]).append(' ');
      }
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.