Package opennlp.tools.tokenize

Examples of opennlp.tools.tokenize.TokenizerModel


  }

  public void setTokenizer(FileSystem fs, Path p){
    try {
      FSDataInputStream in = fs.open(p);
      TokenizerModel model;
      model = new TokenizerModel(in);
      tokenizer = new TokenizerME(model);
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException("OpenNLPTokenizer model not available at " + p);
    }
View Full Code Here


  }

  public void setTokenizer(FileSystem fs, Path p){
    try {
      FSDataInputStream in = fs.open(p);
      TokenizerModel model;
      model = new TokenizerModel(in);
      tokenizer = new TokenizerME(model);
    }
    catch (IOException e) {
      e.printStackTrace();
    }
View Full Code Here

     */
    public Tokenizer getTokenizer(String language) {
        Tokenizer tokenizer = null;
        if(language != null){
            try {
                TokenizerModel model = getTokenizerModel(language);
                if(model != null){
                    tokenizer = new TokenizerME(getTokenizerModel(language));
                }
            } catch (InvalidFormatException e) {
                log.warn("Unable to load Tokenizer Model for "+language+": " +
View Full Code Here

    CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile);
    ObjectStream<TokenSample> sampleStream = openSampleData("Training",
        trainingDataInFile, parameters.getEncoding());

    TokenizerModel model;
    try {
      model = opennlp.tools.tokenize.TokenizerME.train(
          parameters.getLanguage(), sampleStream,
          parameters.isAlphaNumericOptimizationEnabled(),
          parameters.getCutoff(), parameters.getNumberOfIterations());
View Full Code Here

    if (encoding == null) {
      System.out.println(getHelp());
      throw new TerminateToolException(1);
    }

    TokenizerModel model = new TokenizerModelLoader().load(
        new File(CmdLineUtil.getParameter("-model", args)));

    TokenizerEvaluator evaluator = new TokenizerEvaluator(
        new opennlp.tools.tokenize.TokenizerME(model));
View Full Code Here

    // the sentence detector and tokenizer constructors
    // take paths to their respective models
    SentenceDetectorME sdetector = new SentenceDetectorME(
        new SentenceModel(new FileInputStream(
            "models/en-sent.bin")));
    Tokenizer tokenizer = new TokenizerME(new TokenizerModel(
        new FileInputStream("models/en-token.bin")));

    // the parser takes the path to the parser models
    // directory and a few other options
    /*
 
View Full Code Here

   *
   * @throws IOException
   */
  public ApacheExtractor() throws IOException {
    nameFinder = new NameFinderME(new TokenNameFinderModel(ApacheExtractor.class.getResourceAsStream(pathToNERModel)));
    tokenizer = new TokenizerME(new TokenizerModel(ApacheExtractor.class.getResourceAsStream(pathToTokenizerModel)));
        sentenceDetector = new SentenceDetectorME(new SentenceModel(ApacheExtractor.class.getResourceAsStream(pathToSentenceDetectorModel)));
  }
View Full Code Here

  }

  public void setTokenizer(FileSystem fs, Path p){
    try {
      FSDataInputStream in = fs.open(p);
      TokenizerModel model;
      model = new TokenizerModel(in);
      tokenizer = new TokenizerME(model);
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException("OpenNLPTokenizer model not available at " + p);
    }
View Full Code Here

        }
        if(languageConfig.isLanguage(language)){
            String modelName = languageConfig.getParameter(language, PARAM_MODEL);
            if(modelName != null){
                try {
                    TokenizerModel model = openNlp.getModel(TokenizerModel.class, modelName, null);
                    return new TokenizerME(model).tokenize(label);
                } catch (Exception e) {
                    log.warn("Unable to load configured TokenizerModel '"+modelName
                        + "' for language '"+language
                        + "! Fallback to default Tokenizers",e);
View Full Code Here

TOP

Related Classes of opennlp.tools.tokenize.TokenizerModel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.