Package ch.akuhn.hapax.corpus

Source Code of ch.akuhn.hapax.corpus.Terms

package ch.akuhn.hapax.corpus;

import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;

import ch.akuhn.util.Bag;
import ch.akuhn.util.PrintOn;

public class Terms extends Bag<String> implements ScannerClient {

    public Terms() {
        // do nothing
    }

    public Terms(File file) {
        new CamelCaseScanner().client(this).onFile(file).run();
    }

    public Terms(String text) {
        new CamelCaseScanner().client(this).onString(text).run();
    }
   
    public Terms(Collection<String> strings) {
        this.addAll(strings); // #addAll handles "instance of bag" special case
    }

    public Terms(InputStream stream) {
        new CamelCaseScanner().client(this).onStream(stream).run();
    }

    public Terms(Terms... union) {
        for (Terms each: union) addAll(each);
    }

    public Terms stem() {
        Stemmer stemmer = new PorterStemmer();
        Terms terms = new Terms();
        for (Count<String> each: this.counts()) {
            terms.add(stemmer.stem(each.element), each.count);
        }
        return terms;
    }

    public Terms toLowerCase() {
        Terms terms = new Terms();
        for (Count<String> each: this.counts()) {
            terms.add(each.element.toString().toLowerCase(), each.count);
        }
        return terms;
    }

    //@Override
    public void yield(CharSequence term) {
        this.add(term.toString());
    }
   
    public Terms intern() {
        Terms terms = new Terms();
        for (Count<String> each: this.counts()) {
            terms.add(each.element.intern(), each.count);
        }
        return terms;
    }

    public void storeOn(Appendable app) {
        PrintOn out = new PrintOn(app);
        int count = -1;
        for (Count<String> each: sortedCounts()) {
            if (each.count != count) out.print(count = each.count).space();
            out.append(each.element).space();
        }
        out.cr();
    }
   
    public void readFrom(java.util.Scanner scanner) {
        while (scanner.hasNextInt()) {
            int count = scanner.nextInt();
            while (!scanner.hasNextInt()) {
                add(scanner.next(), count);
            }
        }
    }

    public List<Count<String>> top(int num) {
        List<Count<String>> top = new ArrayList<Count<String>>();
        Iterator<Count<String>> counts = this.sortedCounts().iterator();
        for (int n = 0; n < num && counts.hasNext(); n++) top.add(counts.next());
        return top;
    }
   
}
TOP

Related Classes of ch.akuhn.hapax.corpus.Terms

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.