Package edu.stanford.nlp.pipeline

Source Code of edu.stanford.nlp.pipeline.JSONOutputter$JSONWriter

package edu.stanford.nlp.pipeline;


import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.time.TimeAnnotations;
import edu.stanford.nlp.time.Timex;
import edu.stanford.nlp.trees.TreeCoreAnnotations;

import java.io.*;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Output an Annotation to human readable JSON.
* This is not a lossless operation; for more strict serialization,
* see {@link edu.stanford.nlp.pipeline.AnnotationSerializer}; e.g.,
* {@link edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer}.
*
* @author Gabor Angeli
*/
public class JSONOutputter extends AnnotationOutputter {

  protected static final String INDENT_CHAR = "  ";


  /** {@inheritDoc} */
  @SuppressWarnings("RedundantCast"// It's lying; we need the "redundant" casts (as of 2014-09-08)
  @Override
  public void print(Annotation doc, OutputStream target, Options options) throws IOException {
    JSONWriter l0 = new JSONWriter(new PrintWriter(target));

    l0.object(l1 -> {

      // Add annotations attached to a Document
      l1.set("docId", doc.get(CoreAnnotations.DocIDAnnotation.class));
      l1.set("docDate", doc.get(CoreAnnotations.DocDateAnnotation.class));
      l1.set("docSourceType", doc.get(CoreAnnotations.DocSourceTypeAnnotation.class));
      l1.set("docType", doc.get(CoreAnnotations.DocTypeAnnotation.class));
      l1.set("author", doc.get(CoreAnnotations.AuthorAnnotation.class));
      l1.set("location", doc.get(CoreAnnotations.LocationAnnotation.class));
      if (options.includeText) {
        l1.set("text", doc.get(CoreAnnotations.TextAnnotation.class));
      }

      // Add sentences
      if (doc.get(CoreAnnotations.SentencesAnnotation.class) != null) {
        l1.set("sentences", doc.get(CoreAnnotations.SentencesAnnotation.class).stream().map(sentence -> (Consumer<Writer>) (Writer l2) -> {
          // Add a single sentence
          // (metadata)
          l2.set("id", sentence.get(CoreAnnotations.SentenceIDAnnotation.class));
          l2.set("index", sentence.get(CoreAnnotations.SentenceIndexAnnotation.class));
          l2.set("line", sentence.get(CoreAnnotations.LineNumberAnnotation.class));
          // (constituency tree)
          StringWriter treeStrWriter = new StringWriter();
          options.constituentTreePrinter.printTree(sentence.get(TreeCoreAnnotations.TreeAnnotation.class), new PrintWriter(treeStrWriter, true));
          l2.set("parse", treeStrWriter.toString());
          // (dependency trees)
          l2.set("basic-dependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)));
          l2.set("collapsed-dependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class)));
          l2.set("collapsed-ccprocessed-dependencies", buildDependencyTree(sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class)));

          // (add tokens)
          if (sentence.get(CoreAnnotations.TokensAnnotation.class) != null) {
            l2.set("tokens", doc.get(CoreAnnotations.TokensAnnotation.class).stream().map(token -> (Consumer<Writer>) (Writer l3) -> {
              // Add a single token
              l3.set("index", token.index());
              l3.set("word", token.word());
              l3.set("lemma", token.lemma());
              l3.set("characterOffsetBegin", token.beginPosition());
              l3.set("characterOffsetEnd", token.endPosition());
              l3.set("pos", token.tag());
              l3.set("ner", token.ner());
              l3.set("normalizedNER", token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
              l3.set("speaker", token.get(CoreAnnotations.SpeakerAnnotation.class));
              l3.set("truecase", token.get(CoreAnnotations.TrueCaseAnnotation.class));
              l3.set("truecaseText", token.get(CoreAnnotations.TrueCaseTextAnnotation.class));
              // Timex
              Timex time = token.get(TimeAnnotations.TimexAnnotation.class);
              if (time != null) {
                l3.set("timex", (Consumer<Writer>) l4 -> {
                  l4.set("tid", time.tid());
                  l4.set("type", time.timexType());
                  l4.set("value", time.value());
                  l4.set("altValue", time.altVal());
                });
              }
            }));
          }
        }));
      }
    });

    l0.writer.flush()// flush
  }

  /**
   * Convert a dependency graph to a format expected as input to {@link Writer#set(String, Object)}.
   */
  @SuppressWarnings("RedundantCast"// It's lying; we need the "redundant" casts (as of 2014-09-08)
  private static Object buildDependencyTree(SemanticGraph graph) {
    if(graph != null) {
      return Stream.concat(
          // Roots
          graph.getRoots().stream().map( (IndexedWord root) -> (Consumer<Writer>) dep -> {
            dep.set("dep", "ROOT");
            dep.set("governor", "0");
            dep.set("governorGloss", "ROOT");
            dep.set("dependent", Integer.toString(root.index()));
            dep.set("dependentGloss", root.word());
          }),
          // Regular edges
          graph.edgeListSorted().stream().map( (SemanticGraphEdge edge) -> (Consumer<Writer>) (Writer dep) -> {
            dep.set("dep", edge.getRelation().toString());
            dep.set("governor", Integer.toString(edge.getGovernor().index()));
            dep.set("governorGloss", edge.getGovernor().word());
            dep.set("dependent", Integer.toString(edge.getDependent().index()));
            dep.set("dependentGloss", edge.getDependent().word());
          })
      );
    } else {
      return null;
    }
  }

  public static void jsonPrint(Annotation annotation, OutputStream os) throws IOException {
    new JSONOutputter().print(annotation, os);
  }

  public static void jsonPrint(Annotation annotation, OutputStream os, StanfordCoreNLP pipeline) throws IOException {
    new JSONOutputter().print(annotation, os, pipeline);
  }

  public static void jsonPrint(Annotation annotation, OutputStream os, Options options) throws IOException {
    new JSONOutputter().print(annotation, os, options);
  }


  /**
   * <p>Our very own little JSON writing class.
   * For usage, see the test cases in JSONOutputterTest.</p>
   *
   * <p>For the love of all that is holy, don't try to write JSON multithreaded.
   * It should go without saying that this is not threadsafe.</p>
   */
  protected static class JSONWriter {
    private final PrintWriter writer;
    private JSONWriter(PrintWriter writer) {
      this.writer = writer;
    }

    protected static String cleanJSON(String s) {
      return s
          .replace("\\", "\\\\")
          .replace("\b", "\\b")
          .replace("\f", "\\f")
          .replace("\n", "\\n")
          .replace("\r", "\\r")
          .replace("\t", "\\t")
          .replace("'", "\\'")
          .replace("\"", "\\\"");
    }

    @SuppressWarnings("unchecked")
    private void routeObject(int indent, Object value) {
      if (value instanceof String) {
        // Case: simple string (this is easy!)
        writer.write("\"");
        writer.write(cleanJSON(value.toString()));
        writer.write("\"");
      } else if (value instanceof Collection) {
        // Case: collection
        writer.write("[\n");
        Iterator<Object> elems = ((Collection<Object>) value).iterator();
        while (elems.hasNext()) {
          indent(indent + 1);
          routeObject(indent + 1, elems.next());
          if (elems.hasNext()) {
            writer.write(",");
          }
          writer.write("\n");
        }
        indent(indent);
        writer.write("]");
      } else if (value instanceof Consumer) {
        object(indent, (Consumer<Writer>) value);
      } else if (value instanceof Stream) {
        routeObject(indent, ((Stream) value).collect(Collectors.toList()));
      } else if (value.getClass().isArray()) {
        routeObject(indent, Arrays.asList((Object[]) value));
      } else if (value instanceof Integer) {
        routeObject(indent, Integer.toString((Integer) value));
      } else if (value instanceof Double) {
        routeObject(indent, Double.toString((Double) value));
      } else {
        throw new RuntimeException("Unknown object to serialize: " + value);
      }
    }

    private void indent(int num) {
      for (int i = 0; i < num; ++i) {
        writer.write(INDENT_CHAR);
      }
    }

    public void object(int indent, Consumer<Writer> callback) {
      writer.write("{");
      final boolean[] firstCall = new boolean[]{ true }; // Array is a poor man's pointer
      callback.accept((key, value) -> {
        if (key != null && value != null) {
          // First call overhead
          if (!firstCall[0]) {
            writer.write(",");
          }
          firstCall[0] = false;
          // Write the key
          writer.write("\n");
          indent(indent + 1);
          writer.write("\"");
          writer.write(cleanJSON(key));
          writer.write("\": ");
          // Write the value
          routeObject(indent + 1, value);
        }
      });
      writer.write("\n"); indent(indent); writer.write("}");
    }

    public void object(Consumer<Writer> callback) {
      object(0, callback);
    }

    public static String objectToJSON(Consumer<Writer> callback) {
      OutputStream os = new ByteArrayOutputStream();
      PrintWriter out = new PrintWriter(os);
      new JSONWriter(out).object(callback);
      out.close();
      return os.toString();
    }
  }

  /**
   * A tiny little functional interface for writing a (key, value) pair.
   * The key should always be a String, the value can be either a String,
   * a Collection of valid values, or a Callback taking a Writer (this is how
   * we represent objects while creating JSON).
   */
  @FunctionalInterface
  protected interface Writer {
    /**
     * Set a (key, value) pair in a JSON object.
     * Note that if either the key or the value is null, nothing will be set.
     * @param key The key of the object.
     * @param value The value of the object.
     */
    public void set(String key, Object value);
  }

}
TOP

Related Classes of edu.stanford.nlp.pipeline.JSONOutputter$JSONWriter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.