Package edu.stanford.nlp.ie.ner.webapp

Source Code of edu.stanford.nlp.ie.ner.webapp.NERServlet

package edu.stanford.nlp.ie.ner.webapp;

import java.awt.Color;
import java.io.*;
import java.util.*;
import java.util.zip.*;

import javax.servlet.*;
import javax.servlet.http.*;

import org.apache.commons.lang3.StringEscapeUtils;

import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ie.crf.NERGUI;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.ling.CoreAnnotations;

/**
*  This is a servlet interface to the CRFClassifier.
*
@author Dat Hoang 2011
@author John Bauer
*
**/


public class NERServlet extends HttpServlet
{
  private String format;
  private boolean spacing;
  private String defaultClassifier;
  private List<String> classifiers = new ArrayList<String>();
  private Map<String, CRFClassifier> ners;

  private static final int MAXIMUM_QUERY_LENGTH = 3000;
 
  public void init()
    throws ServletException
  {
    format = getServletConfig().getInitParameter("outputFormat");
    if (format == null || format.trim().equals(""))
      throw new ServletException("Invalid outputFormat setting.");
   
    String spacingStr = getServletConfig().getInitParameter("preserveSpacing");
    if (spacingStr == null || spacingStr.trim().equals(""))
      throw new ServletException("Invalid preserveSpacing setting.");
    //spacing = Boolean.valueOf(spacingStr).booleanValue();
    spacingStr = spacingStr.trim().toLowerCase();
    spacing = "true".equals(spacingStr);
   
    String path = getServletContext().getRealPath("/WEB-INF/data/models");
    for (String classifier : new File(path).list()) {
      classifiers.add(classifier);
    }
    // TODO: get this from somewhere more interesting?
    defaultClassifier = classifiers.get(0);

    for (String classifier : classifiers) {
      log(classifier);
    }

    ners = Generics.newHashMap();
    for (String classifier : classifiers) {
      CRFClassifier model = null;
      String filename = "/WEB-INF/data/models/" + classifier;
      InputStream is = getServletConfig().getServletContext().getResourceAsStream(filename);
     
      if (is == null)
        throw new ServletException("File not found. Filename = " + filename);
      try {
        if (filename.endsWith(".gz")) {
          is = new BufferedInputStream(new GZIPInputStream(is));
        } else {
          is = new BufferedInputStream(is);
        }
        model = CRFClassifier.getClassifier(is);
      } catch (IOException e) {
        throw new ServletException("IO problem reading classifier.");
      } catch (ClassCastException e) {
        throw new ServletException("Classifier class casting problem.");
      } catch (ClassNotFoundException e) {
        throw new ServletException("Classifier class not found problem.");
      } finally {
        try {
          is.close();
        } catch (IOException e) {
          //do nothing
        }
      }
      ners.put(classifier, model);
    }
  }
 
  public void doGet(HttpServletRequest request, HttpServletResponse response)
    throws ServletException, IOException
  {
    if (request.getCharacterEncoding() == null) {
      request.setCharacterEncoding("utf-8");
    }
    response.setContentType("text/html; charset=UTF-8");

    this.getServletContext().getRequestDispatcher("/header.jsp").
      include(request, response);
    request.setAttribute("classifiers", classifiers);
    this.getServletContext().getRequestDispatcher("/ner.jsp").
      include(request, response);
    addResults(request, response);
    this.getServletContext().getRequestDispatcher("/footer.jsp").
      include(request, response);
  }
 
  public void doPost(HttpServletRequest request, HttpServletResponse response)
    throws ServletException, IOException
  {
    doGet(request, response);
  }

  public void addResults(HttpServletRequest request,
                         HttpServletResponse response)
    throws IOException
  {
    String input = request.getParameter("input");
    if (input == null) {
      return;
    }
    input = input.trim();
    if (input.equals("")) {
      return;
    }

    PrintWriter out = response.getWriter();
    if (input.length() > MAXIMUM_QUERY_LENGTH) {
      out.print("This query is too long.  If you want to run very long queries, please download and use our <a href=\"http://nlp.stanford.edu/software/CRF-NER.shtml\">publicly released distribution</a>.");
      return;
    }
   
    String outputFormat = request.getParameter("outputFormat");
    if (outputFormat == null || outputFormat.trim().equals("")) {
      outputFormat = this.format;
    }
   
    boolean preserveSpacing;
    String preserveSpacingStr = request.getParameter("preserveSpacing");
    if (preserveSpacingStr == null || preserveSpacingStr.trim().equals("")) {
      preserveSpacing = this.spacing;
    } else {
      preserveSpacingStr = preserveSpacingStr.trim();
      preserveSpacing = Boolean.valueOf(preserveSpacingStr);
    }
   
    String classifier = request.getParameter("classifier");
    if (classifier == null || classifier.trim().equals("")) {
      classifier = this.defaultClassifier;
    }

    response.addHeader("classifier", classifier);
    response.addHeader("outputFormat", outputFormat);
    response.addHeader("preserveSpacing", String.valueOf(preserveSpacing));

    if (outputFormat.equals("highlighted")) {
      outputHighlighting(out, ners.get(classifier), input);
    } else {
      out.print(StringEscapeUtils.escapeHtml4(ners.get(classifier).classifyToString(input, outputFormat, preserveSpacing)));
    }
  }

  public void outputHighlighting(PrintWriter out,
                                 CRFClassifier classifier,
                                 String input) {
    Set<String> labels = classifier.labels();
    String background = classifier.backgroundSymbol();
    List<List<CoreMap>> sentences = classifier.classify(input);
    Map<String, Color> tagToColorMap =
      NERGUI.makeTagToColorMap(labels, background);

    StringBuilder result = new StringBuilder();
    int lastEndOffset = 0;
    for (List<CoreMap> sentence : sentences) {
      for (CoreMap word : sentence) {
        int beginOffset = word.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
        int endOffset = word.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
        String answer = word.get(CoreAnnotations.AnswerAnnotation.class);

        if (beginOffset > lastEndOffset) {
          result.append(StringEscapeUtils.escapeHtml4(input.substring(lastEndOffset, beginOffset)));
        }
        // Add a color bar for any tagged words
        if (!background.equals(answer)) {
          Color color = tagToColorMap.get(answer);
          result.append("<span style=\"color:#ffffff;background:" +
                        NERGUI.colorToHTML(color) + "\">");
        }

        result.append(StringEscapeUtils.escapeHtml4(input.substring(beginOffset, endOffset)));
        // Turn off the color bar
        if (!background.equals(answer)) {
          result.append("</span>");
        }

        lastEndOffset = endOffset;
      }
    }
    if (lastEndOffset < input.length()) {
      result.append(StringEscapeUtils.escapeHtml4(input.substring(lastEndOffset)));     
    }
    result.append("<br><br>");
    result.append("Potential tags:");
    for (String label : tagToColorMap.keySet()) {
      result.append("<br>&nbsp;&nbsp;");
      Color color = tagToColorMap.get(label);
      result.append("<span style=\"color:#ffffff;background:" +
                    NERGUI.colorToHTML(color) + "\">");
      result.append(StringEscapeUtils.escapeHtml4(label));
      result.append("</span>");
    }
    out.print(result.toString());
  }
}
TOP

Related Classes of edu.stanford.nlp.ie.ner.webapp.NERServlet

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.