Package edu.washington.cs.knowitall.extractor.conf

Source Code of edu.washington.cs.knowitall.extractor.conf.ReVerbDataSetCreator

package edu.washington.cs.knowitall.extractor.conf;

import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.OutputStream;

import edu.washington.cs.knowitall.extractor.ReVerbExtractor;
import edu.washington.cs.knowitall.nlp.ChunkedSentence;
import edu.washington.cs.knowitall.nlp.ChunkedSentenceReader;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedBinaryExtraction;
import edu.washington.cs.knowitall.util.DefaultObjects;

/***
* Used to create a labeled data set using the ReVerb extractions from a given
* file. See the documentation for <code>LabeledBinaryExtractionReader</code>
* for details on the output format.
*
* @author afader
*
*/
public class ReVerbDataSetCreator {

    /**
     * Runs ReVerb on the given file (the first argument), and writes the output
     * to the given target file (the second argument). If the first argument is
     * "-", then reads the data from standard input.
     *
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {

        if (args.length != 2) {
            System.err.println("Usage: ReVerbDataSetCreator input output");
            return;
        }

        BufferedReader reader;
        if (args[0].equals("-")) {
            reader = new BufferedReader(new InputStreamReader(System.in));
        } else {
            reader = new BufferedReader(new FileReader(args[0]));
        }

        ChunkedSentenceReader sentReader = DefaultObjects
                .getDefaultSentenceReader(reader);

        OutputStream outStream = new FileOutputStream(args[1]);
        LabeledBinaryExtractionWriter out = new LabeledBinaryExtractionWriter(
                outStream);

        ReVerbExtractor extractor = new ReVerbExtractor();

        for (ChunkedSentence sent : sentReader.getSentences()) {
            for (ChunkedBinaryExtraction extr : extractor.extract(sent)) {
                LabeledBinaryExtraction extrLabeled = new LabeledBinaryExtraction(
                        extr, 0);
                out.writeExtraction(extrLabeled);
            }
        }

    }

}
TOP

Related Classes of edu.washington.cs.knowitall.extractor.conf.ReVerbDataSetCreator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.