Package com.tamingtext.util

Source Code of com.tamingtext.util.NameFinderFactory

/*
* Copyright 2008-2011 Grant Ingersoll, Thomas Morton and Drew Farris
*
*    Licensed under the Apache License, Version 2.0 (the "License");
*    you may not use this file except in compliance with the License.
*    You may obtain a copy of the License at
*
*        http://www.apache.org/licenses/LICENSE-2.0
*
*    Unless required by applicable law or agreed to in writing, software
*    distributed under the License is distributed on an "AS IS" BASIS,
*    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*    See the License for the specific language governing permissions and
*    limitations under the License.
* -------------------
* To purchase or learn more about Taming Text, by Grant Ingersoll, Thomas Morton and Drew Farris, visit
* http://www.manning.com/ingersoll
*/

package com.tamingtext.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;

import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.tamingtext.opennlp.PooledTokenNameFinderModel;

/** Encapsulates OpenNLP's NameFinder by providing a mechanism to load
*  all of the name finder models files found in a single directory into memory
*  and instantiating an array of NameFinderME objects.
*/
public class NameFinderFactory {
 
  private static final Logger log = LoggerFactory.getLogger(NameFinderFactory.class);

  NameFinderME[] finders;
  String[] modelNames;
 
  /** Create a NameFinderEngine that loads models from the directory specified
   *  in the system property <code>model.dir</code> system property for the
   *  english language
   *  
   * @param modelDirectory
   *   the directory containing the model files, can be null to force
   *   use of the model.dir system property.
   * @throws IOException
   */
  public NameFinderFactory() throws IOException {
    this(null);
  }

  public NameFinderFactory(Map<String,String> param) throws IOException {
    String language = OpenNLPUtil.getModelLanguage(param);
    String modelDirectory = OpenNLPUtil.getModelDirectory(param);
    loadNameFinders(language, modelDirectory);
  }
 
  /** Create a NameFinderEngine that loads models from the specified directory,
   *  or, reads the <code>model.dir</code> system property in order to determine
   *  if the <code>modelDirectory</code> is <code>null</code> or empty.
   * @param language
   *   two letter language prefix from the model file names.
   * @param modelDirectory
   *   the directory containing the model files, can be null to force
   *   use of the model.dir system property.
   * @throws IOException
   */
  public NameFinderFactory(String language, String modelDirectory) throws IOException {
    loadNameFinders(language, modelDirectory);
  }

  /** Load the name finder models. Currently any file in the model directory
   *  that starts with (lang)-ner
   * @param language
   * @param modelDirectory
   *    can be null to use the value of the system property model.dir
   * @return
   */
  protected File[] findNameFinderModels(String language, String modelDirectory) {
    final String modelPrefix = language + "-ner";

    log.info("Loading name finder models from {} using prefix {} ",
        new Object[] { modelDirectory, modelPrefix } );

    File[] models = new File(modelDirectory).listFiles(new FilenameFilter() {
      public boolean accept(File file, String name) {
        if (name.startsWith(modelPrefix)) {
          return true;
        }
        return false;
      }
    });
   
    if (models == null || models.length < 1) {
      throw new RuntimeException("Configuration Error: No models in " + modelDirectory);
    }
    return models;
  }

  /** Load name finder models based upon models for the specified language
   *  in the specified model directory.
   *
   * @param language
   * @param modelDirectory
   *      can be null to use the value of the system property model.dir
   * @throws IOException
   */
  protected void loadNameFinders(String language, String modelDirectory) throws IOException {
    //<start id="maxent.examples.namefinder.setup"/>
    File modelFile;

    File[] models //<co id="nfe.findmodels"/>
      = findNameFinderModels(language, modelDirectory);
    modelNames = new String[models.length];
    finders = new NameFinderME[models.length];

    for (int fi = 0; fi < models.length; fi++) {
      modelFile = models[fi];
      modelNames[fi] = modelNameFromFile(language, modelFile); //<co id="nfe.modelname"/>
     
      log.info("Loading model {}", modelFile);
      InputStream modelStream = new FileInputStream(modelFile);
      TokenNameFinderModel model = //<co id="nfe.modelreader"/>
          new PooledTokenNameFinderModel(modelStream);
      finders[fi] = new NameFinderME(model);
     
    }

    /*<calloutlist>
      <callout arearefs="nfe.findmodels">Find Models</callout>
      <callout arearefs="nfe.modelname">Determine Model Name</callout>
      <callout arearefs="nfe.modelreader">Read Model</callout>
    </calloutlist>*/
    //<end id="maxent.examples.namefinder.setup"/>
  }

  /** Extract the model name from the model file, this is used to display
   *  the type of named entity found
   * @param language
   * @param modelFile
   * @return
   */
  protected String modelNameFromFile(String language, File modelFile) {
    String modelName = modelFile.getName();
    return modelName.replace(language + "-ner-", "").replace(".bin", "");
  }

  /** Obtain a reference to the array of NameFinderME's loaded by the engine.
   * @return
   */
  public NameFinderME[] getNameFinders() {
    return finders;
  }

  /** Returns the names of each of the models loaded by the engine, an array
   *  parallel with the array returned by {@link #getFinders()}
   * @return
   */
  public String[] getModelNames() {
    return modelNames;
  }

}
TOP

Related Classes of com.tamingtext.util.NameFinderFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.