Package com.clearnlp.component.tagger

Examples of com.clearnlp.component.tagger.EnglishOnlinePOSTagger


    int dfc = getDocumentFrequencyCutoff(eTrain);
    int dtc = getDocumentTokenCount(eTrain);
    DEPTree tree;
   
    Set<String> sLsfs = getSimplifiedFormsByDocumentFrequencies(reader, trainFiles, dfc, dtc);
    EnglishOnlinePOSTagger component = new EnglishOnlinePOSTagger(xmls, sLsfs);
   
    LOG.info("Collecting lexica:");
    int total = 0;
   
    for (String trainFile : trainFiles)
    {
      reader.open(UTInput.createBufferedFileReader(trainFile));
     
      while ((tree = reader.next()) != null)
      {
        component.collect(tree);
        if (++total%5000 == 0) LOG.info(".");
      }
     
      reader.close();
    LOG.info("\n");

    LOG.info("Trainig:");
   
    Object[] lexica = component.getLexica();
    component = new EnglishOnlinePOSTagger(xmls, lexica);
   
    for (String trainFile : trainFiles)
    {
      reader.open(UTInput.createBufferedFileReader(trainFile));
     
      while ((tree = reader.next()) != null)
      {
        component.train(tree);
        if (++total%5000 == 0) LOG.info(".");
      }
     
      reader.close();
    }
   
    IOnlineAlgorithm algorithm = new OnlineAdaGradHinge(0.01, 0.1);
    List<DEPTree> devTrees = getTrees(reader, devFiles);
   
    component.develop(LOG, algorithm, 5, devTrees);
   
    LOG.info("Bootsrapping:");
   
    for (String trainFile : trainFiles)
    {
      reader.open(UTInput.createBufferedFileReader(trainFile));
     
      while ((tree = reader.next()) != null)
      {
        component.bootstrap(tree);
        if (++total%5000 == 0) LOG.info(".");
      }
     
      reader.close();
    }
   
    component.develop(LOG, algorithm, 5, devTrees);
  }
View Full Code Here

TOP

Related Classes of com.clearnlp.component.tagger.EnglishOnlinePOSTagger

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.