Examples of opennlp.tools.util.PlainTextByLineStream

opennlp.tools.util.PlainTextByLineStream
Reads a plain text file and return each line as a String object.

  public void setup() throws IOException {
    InputStream in = ADParagraphStreamTest.class
        .getResourceAsStream("/opennlp/tools/formats/ad.sample");


    ADNameSampleStream stream = new ADNameSampleStream(
        new PlainTextByLineStream(in, "UTF-8"), true);


    NameSample sample = stream.read();


    while (sample != null) {
      samples.add(sample);

View Full Code Here

        TokenNameFinderModel model = new TokenNameFinderModelLoader().load(new File(args[i]));
        nameFinders[i] = new NameFinderME(model);
      }


      ObjectStream<String> untokenizedLineStream =
          new PlainTextByLineStream(new InputStreamReader(System.in));


      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
      perfMon.start();


      try {
        String line;
        while((line = untokenizedLineStream.read()) != null) {
          String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);


          // A new line indicates a new document,
          // adaptive data must be cleared for a new document

View Full Code Here

      } catch (IOException e) {
        throw new TerminateToolException(-1, "Failed to load all coreferencer models!", e);
      }
      
      ObjectStream<String> lineStream =
          new PlainTextByLineStream(new InputStreamReader(System.in));
      
      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "parses");
      perfMon.start();
      
      try {
        
        int sentenceNumber = 0;
        List<Mention> document = new ArrayList<Mention>();
        List<Parse> parses = new ArrayList<Parse>();
        
        String line;
        while ((line = lineStream.read()) != null) {


          if (line.equals("")) {
            DiscourseEntity[] entities = treebankLinker.getEntities(document.toArray(new Mention[document.size()]));
            //showEntities(entities);
            new CorefParse(parses,entities).show();

View Full Code Here

    FileInputStream sampleDataIn = new FileInputStream(new File(getClass()
        .getClassLoader()
        .getResource("opennlp/tools/namefind/AnnotatedSentences.txt").toURI()));


    ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
        new PlainTextByLineStream(sampleDataIn.getChannel(), "ISO-8859-1"));


    TrainingParameters mlParams = ModelUtil.createTrainingParameters(70, 1);
    mlParams.put(TrainingParameters.ALGORITHM_PARAM,
        ModelType.MAXENT.toString());

View Full Code Here

    FileInputStream sampleDataIn = new FileInputStream(new File(getClass()
        .getClassLoader()
        .getResource("opennlp/tools/namefind/AnnotatedSentences.txt").toURI()));


    ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
        new PlainTextByLineStream(sampleDataIn.getChannel(), "ISO-8859-1"));


    TrainingParameters mlParams = ModelUtil.createTrainingParameters(70, 1);
    mlParams.put(TrainingParameters.ALGORITHM_PARAM,
        ModelType.MAXENT.toString());

View Full Code Here

  private static ObjectStream<TokenSample> createSampleStream()
      throws IOException {
    InputStream in = TokenizerFactoryTest.class.getClassLoader()
        .getResourceAsStream("opennlp/tools/tokenize/token.train");


    return new TokenSampleStream(new PlainTextByLineStream(
        new InputStreamReader(in)));
  }

View Full Code Here


  @Test
  public void testSimple() throws IOException {
    // add one sentence with expandME = includeFeats = false
    ADPOSSampleStream stream = new ADPOSSampleStream(
        new PlainTextByLineStream(
            ADParagraphStreamTest.class
                .getResourceAsStream("/opennlp/tools/formats/ad.sample"),
            "UTF-8"), false, false);


    POSSample sample = stream.read();

View Full Code Here

        "opennlp/tools/namefind/AnnotatedSentences.txt");


    String encoding = "ISO-8859-1";


    NameSampleDataStream ds = new NameSampleDataStream(
        new PlainTextByLineStream(new InputStreamReader(in, encoding)));


    NameSample ns = ds.read();


    String[] expectedNames = { "Alan McKennedy", "Julie", "Marie Clara",
        "Stefanie Schmidt", "Mike", "Stefanie Schmidt", "George", "Luise",

View Full Code Here

  
  @Test
  public void testExpandME() throws IOException {
    // add one sentence with expandME = true
    ADPOSSampleStream stream = new ADPOSSampleStream(
        new PlainTextByLineStream(
            ADParagraphStreamTest.class
                .getResourceAsStream("/opennlp/tools/formats/ad.sample"),
            "UTF-8"), true, false);


    POSSample sample = stream.read();

View Full Code Here

  public void testWithNameTypes() throws Exception {
    InputStream in = getClass().getClassLoader().getResourceAsStream(
        "opennlp/tools/namefind/voa1.train");


    NameSampleDataStream ds = new NameSampleDataStream(
        new PlainTextByLineStream(new InputStreamReader(in)));


    Map<String, List<String>> names = new HashMap<String, List<String>>();
    Map<String, List<Span>> spans = new HashMap<String, List<Span>>();
    
    NameSample ns;

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of opennlp.tools.util.PlainTextByLineStream

com.tamingtext.opennlp.NameFinderTest

io.lumify.opennlpDictionary.OpenNLPDictionaryExtractorGraphPropertyWorker

io.lumify.opennlpme.OpenNLPMaximumEntropyExtractorGraphPropertyWorker

opennlp.tools.chunker.ChunkerDetailedFMeasureListenerTest

opennlp.tools.chunker.ChunkerEvaluatorTest

opennlp.tools.chunker.ChunkerFactoryTest

opennlp.tools.chunker.ChunkerME

opennlp.tools.chunker.ChunkerMETest

opennlp.tools.chunker.ChunkSampleStreamTest

opennlp.tools.chunker.ChunkSampleTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.