Source Code of com.sun.xmlsearch.servlet.DocumentServer

/*************************************************************************
 *
 *  $RCSfile: DocumentServer.java,v $
 *
 *  $Revision: 1.1 $
 *
 *  last change: $Author: abi $ $Date: 2000/11/30 18:03:19 $
 *
 *  The Contents of this file are made available subject to the terms of
 *  either of the following licenses
 *
 *         - GNU Lesser General Public License Version 2.1
 *         - Sun Industry Standards Source License Version 1.1
 *
 *  Sun Microsystems Inc., October, 2000
 *
 *  GNU Lesser General Public License Version 2.1
 *  =============================================
 *  Copyright 2000 by Sun Microsystems, Inc.
 *  901 San Antonio Road, Palo Alto, CA 94303, USA
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License version 2.1, as published by the Free Software Foundation.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA  02111-1307  USA
 *
 *
 *  Sun Industry Standards Source License Version 1.1
 *  =================================================
 *  The contents of this file are subject to the Sun Industry Standards
 *  Source License Version 1.1 (the "License"); You may not use this file
 *  except in compliance with the License. You may obtain a copy of the
 *  License at http://www.openoffice.org/license.html.
 *
 *  Software provided under this License is provided on an "AS IS" basis,
 *  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING,
 *  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
 *  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
 *  See the License for the specific provisions governing your rights and
 *  obligations concerning the Software.
 *
 *  The Initial Developer of the Original Code is: Sun Microsystems, Inc.
 *
 *  Copyright: 2000 by Sun Microsystems, Inc.
 *
 *  All Rights Reserved.
 *
 *  Contributor(s): _______________________________________
 *
 *
 ************************************************************************/
package com.sun.xmlsearch.servlet;


import java.io.*;
import java.util.*;
import java.net.URL;
import java.net.MalformedURLException;
import java.text.MessageFormat;


import com.jclark.xsl.om.*;
import com.jclark.xsl.tr.Result;
import com.jclark.xsl.tr.OutputMethod;


import com.jclark.xsl.dom.Transform;
import com.jclark.xsl.dom.TransformEngine;
import com.jclark.xsl.dom.TransformException;
import com.jclark.xsl.dom.XSLTransformEngine;


import com.sun.xmlsearch.util.*;
import com.sun.xmlsearch.tree.*;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import com.sun.xml.tree.XmlDocument;
import javax.swing.tree.TreeNode;


import com.sun.xmlsearch.xml.qe.*;


final class DocumentServer implements Names {
  private String Dir  = "/files8/jacek/docs/";
  private String Http = "http://bigblock.east:8084/";
  
  private static final MessageFormat StepFormat =
  new MessageFormat("{0}[{1,number,integer}]");


  private Tokenizer _tokenizer = new SimpleTokenizer();
  private TreeBuilder _treeBuilder;
  private TreeResult  _treeResult;


  private Cache _docTreeCache         = new Cache(60000);
  private Cache _tocTreeCache         = new Cache(60000);
  private Cache _transformCache       = new Cache(60000 * 5);
  private Hashtable _docTocCorrespondence = new Hashtable();


  private XSLTransformEngine _transformEngine;
  private Transform _defaultTransform;


  // state of the modified tree
  private Hashtable _tokenNumbers = new Hashtable();
  private Vector _substituted     = new Vector();


  private Name _StartHighlight_Name;
  private Name _EndHighlight_Name;
  
  private Name _HighlightedText_Name;
  private Node _StartHighlight_Node;
  private Node _EndHighlight_Node;


  private static DocumentServer _instance = null;
  private final HtmlAdapter _html;


  public static synchronized DocumentServer instance() throws Exception {
    if (_instance == null)
      _instance = new DocumentServer();
    return _instance;
  }


  private DocumentServer() throws Exception {
    initXmlProcessor();
    _html = new HtmlAdapter(_treeBuilder);
    // !!! self-cleaning hack for now
    
    (new Thread() {
      public void run() {
  while (true) {
    try {
      Thread.sleep(60000);
      clearCaches();
      System.out.println("caches cleared");
    }
    catch (InterruptedException e) {}
  }}}).start();
  }


  private synchronized void clearCaches() {
    _docTreeCache.clear();
    _tocTreeCache.clear();
    _transformCache.clear();
    _docTocCorrespondence.clear();
  }


  private final void initXmlProcessor() throws Exception {
    _transformEngine = new XSLTransformEngine();
    
    URL stylesheetUrl = new URL(Http + "default.xsl");
    InputStream stylesheetStream = stylesheetUrl.openStream();
    XmlDocument sheet = XmlDocument.createXmlDocument(stylesheetStream, false);
    _defaultTransform = _transformEngine.createTransform(sheet);
    _treeBuilder = new TreeBuilder(_transformEngine.getNameTable());
    _treeResult = new TreeResult(_transformEngine.getNameTable());


    _StartHighlight_Name = getElementName("StartHighlight");
    _EndHighlight_Name = getElementName("EndHighlight");


    _HighlightedText_Name = getElementName("HighlightedText");
    _StartHighlight_Node = _treeBuilder.makeEmptyElement(_StartHighlight_Name);
    _EndHighlight_Node = _treeBuilder.makeEmptyElement(_EndHighlight_Name);
  }


  private Node followXPath(Node current, String xPath) throws Exception {
    StringTokenizer steps = new StringTokenizer(xPath, "/");
    while (steps.hasMoreTokens())
      current = xPathStep(current, steps.nextToken());
    return current;
  }


  public final Name getElementName(String elementName) throws XSLException {
    return _treeBuilder.getElementName(elementName);
  }


  public final Name getAttributeName(String elementName) throws XSLException {
    return _treeBuilder.getAttributeName(elementName);
  }


  private Node[] expandXPath(Node current, String xPath) throws Exception {
    StringTokenizer steps = new StringTokenizer(xPath, "/");
    Node[] result = new Node[steps.countTokens() + 1]; int index;
    result[index = 0] = current;
    while (steps.hasMoreTokens()) {
      current = xPathStep(current, steps.nextToken());
      result[++index] = current;
    }
    return result;
  }


  private Node xPathStep(Node start, String step) throws Exception {
    if (step.equals("doc")) { // find root
      while (start.getParent() != null)
  start = start.getParent();
      return start;
    }
    else { // form:  type[n-of-type]
      Object[] parts = StepFormat.parse(step);
      final String name = (String)parts[0];
      int n = ((Long)parts[1]).intValue();
      SafeNodeIterator children = start.getChildren();
      Node child;


      if (name.equals("text()")) {
  // find 'n'th text node child
  while ((child = children.next()) != null)
    if (child.getType() == Node.TEXT && (--n == 0))
      return child;  // no more steps expected
      }
      else {
  // get the interned 'Name'
  NamespacePrefixMap npm = start.getNamespacePrefixMap();
  Name elementName = npm.expandElementTypeName(name, start);
  // find 'n'th child with tagName 'name'
  while ((child = children.next()) != null)
    if (child.getType() == Node.ELEMENT &&
        child.getName() == elementName && (--n == 0))
      return child;
      }
    }
    // if not returned above
    throw new Exception("mis-step in xPath: " + step + " " + start.getName());
  }


  private Node parseTargetDocument(URL docUrl) throws Exception {
    return _treeBuilder.getRoot(docUrl);
  }


  private Node parseInputSource(InputSource in) throws Exception {
    return _treeBuilder.getRoot(in);
  }


  private Node getDocumentTree(URL docUrl) throws Exception {
    Object value = _docTreeCache.get(docUrl);
    if (value == null)
      _docTreeCache.put(docUrl, value = parseTargetDocument(docUrl));
    return (Node)value;
  }


  private synchronized TocTreePath getTocTreePath(QueryHitData queryHit)
    throws Exception {
      URL docUrl = new URL(queryHit.getDocument());
      // root of TOC
      TocTree tocTree = getTocTree(docUrl);
      TocTree.TocNode tocNode = tocTree.getRoot();
      // root of document tree
      Node docNode = getDocumentTree(docUrl);
      IntegerArray array = new IntegerArray();
      array.add(tocNode.index());
      StringTokenizer steps = new StringTokenizer(queryHit.getCommonPath(), "/");
      while (steps.hasMoreTokens()) {
  docNode = xPathStep(docNode, steps.nextToken());
  tocNode = getTocNodeForDocNode(docNode);
  if (tocNode != null)
    array.add(tocNode.index());
      }
      return new TocTreePath(tocTree, array.toIntArray());
  }


  private TocTree.TocNode getTocNodeForDocNode(Node docNode) {
    return (TocTree.TocNode)_docTocCorrespondence.get(docNode);
  }


  // little function, lots of functionality
  private synchronized TocTree getTocTree(URL docUrl) throws Exception {
    Object value = _tocTreeCache.get(docUrl);
    if (value == null) {
      Node docRoot = getDocumentTree(docUrl);
      Transform tocTransform = getTocTransformForDoc(docUrl);
      TocTree tree = TocTree.makeTocTree(docRoot, tocTransform, this);
      TocTree.TocNode tocRoot = tree.getRoot();
      _tocTreeCache.put(docUrl, tree);
      establishCorrespondence(docRoot, tocRoot);
      return tree;
    }
    else
      return (TocTree)value;
  }


  private void establishCorrespondence(final Node docRoot, TocTree.TocNode tocNode)
    throws Exception {
      int nChildren = tocNode.getChildCount();
      _docTocCorrespondence.put(followXPath(docRoot,
              tocNode.getXPath()),
        tocNode);
      for (int i = 0; i < nChildren; i++)
  establishCorrespondence(docRoot, (TocTree.TocNode)tocNode.getChildAt(i));
  }


  /*
    private void dumpTree(Node root) {
    dumpTree(System.out, root, "");
    }


    private void dumpTree(PrintStream out, Node node, String indent) {
    out.print(indent);
    out.println(node.toString());
    SafeNodeIterator children = node.getChildren(); Node child;
    while ((child = children.next()) != null)
    dumpTree(out, child, indent + "    ");
    }
    */


  private Transform getTransformForDoc(URL docUrl, String fileName)
    throws Exception {
      System.out.println(docUrl);
      String docFname = docUrl.getFile();
      System.out.println("fname: " + docFname);
      int slash = docFname.indexOf('/', 1);
      String transFname = docFname.substring(0, slash + 1) + fileName;
      URL stylesheetUrl = new URL(docUrl.getProtocol(),
          docUrl.getHost(),
          docUrl.getPort(),
          transFname);
      return getTransform(stylesheetUrl);
  }


  private synchronized Transform getTransform(URL stylesheetUrl) throws Exception {
    if (stylesheetUrl != null) {
      Object value = _transformCache.get(stylesheetUrl);
      if (value != null)
  return (Transform)value;
      else {
  System.out.println("creating transform: " + stylesheetUrl);
  InputStream stylesheetStream = stylesheetUrl.openStream();
  XmlDocument sheet = XmlDocument.createXmlDocument(stylesheetStream, false);
  Transform transform = _transformEngine.createTransform(sheet);
  _transformCache.put(stylesheetUrl, transform);
  return transform;
      }
    }
    else
      return null;
  }


  private Transform getTocTransformForDoc(URL docUrl) throws Exception {
    return getTransformForDoc(docUrl, "TOC.xsl");
  }
  
  private Transform getHtmlTransformForDoc(URL docUrl) throws Exception {
    return getTransformForDoc(docUrl, "toHtml.xsl");
  }


  private void addNumber(Object key, int n) {
    //    System.out.println("addNumber " + key + " " + n);
    IntegerArray array = (IntegerArray)_tokenNumbers.get(key);
    if (array == null)
      _tokenNumbers.put(key, array = new IntegerArray(8));
    array.addNew(n);
  }
  
  private void revertTree() {
    for (int i = 0; i < _substituted.size(); i++)
      _treeBuilder.revertToOriginal((Node)_substituted.elementAt(i));
  }


  private Node makeTextNode(String text) {
    return _treeBuilder.makeTextNode(text);
  }


  private Node makeSubstituteElement(Node textNode, Node2[] childrenArray) {
    return _treeBuilder.makeSubstituteElement(_HighlightedText_Name,
                childrenArray,
                textNode);
  }


  private void highlightTree() {
    Enumeration keys = _tokenNumbers.keys();
    while (keys.hasMoreElements()) {
      Vector children = new Vector();
      Node textNode = (Node)keys.nextElement();
      IntegerArray numbers = (IntegerArray)_tokenNumbers.get(textNode);
      numbers.sort();    // word numbers in ascending order
      String text = textNode.getData();
      int tokenNumber = 0;
      int lastOffset = 0;
      // select tokenizers
      _tokenizer.setText(text);
      for (int i = 0; i < numbers.cardinality(); i++) {
  int n = numbers.at(i);
  Token token;
  do {
    token = _tokenizer.nextToken();
  }
  while (++tokenNumber < n);
  int start = token.getStart();
  int end   = token.getEnd();
  if (lastOffset < start)
    children.addElement(makeTextNode(text.substring(lastOffset, start)));
  children.addElement(_StartHighlight_Node);
  children.addElement(makeTextNode(token.getData()));
  children.addElement(_EndHighlight_Node);
  lastOffset = end;
      }
      if (lastOffset < text.length())
  children.addElement(makeTextNode(text.substring(lastOffset)));
      Node2[] childrenArray = new Node2[children.size()];
      children.toArray(childrenArray);
      Node highlighted = makeSubstituteElement(textNode, childrenArray);
      Node2 parent = (Node2)textNode.getParent();
      for (int i = 0; i < childrenArray.length; i++)
  ((Node2)childrenArray[i]).setParent(highlighted);
      // actually substitute
      int index = parent.getChildIndex(textNode);
      parent.setChild(index, highlighted);
      ((Node2)highlighted).setParent(parent);
      _substituted.addElement(highlighted);
    }
  }


  private synchronized void transform(Transform tr, Node[] nodes, ResultAdapter res)
    throws Exception {
      if (tr != null) {
  res.init();
  for (int i = 0; i < nodes.length; i++)
    tr.transform(nodes[i], res);
  res.finish();
      }
  }


  private static String[] JavaDirs = {
    "Java2JDK1.3",
    "Jini1.1",
    "xerces-1_0_1",
    "xalan_0_19_2",
    "Cocoon-1.6.1"
  };
  
  public URL findDocUrl(String docName) throws MalformedURLException {
    final String javaClass = "JAVACLASS:";
    final String manual    = "MAN:";


    if (docName.startsWith(javaClass)) {
      String className = docName.substring(javaClass.length());
      System.out.println(className);
      String classFile = "/" + className + ".xml";
      
      for (int i = 0; i < JavaDirs.length; i++)
  if ((new File(Dir + JavaDirs[i] + classFile)).exists())
    return new URL(Http + JavaDirs[i] + classFile);
    }
    else if (docName.startsWith(manual)) {
      String pageName = docName.substring(manual.length());
      int dash = pageName.lastIndexOf('-');
      String title = pageName.substring(0,dash).toLowerCase().replace('-','_');
      String ext = pageName.substring(dash + 1).toLowerCase();
      String filename = Dir+"xman/sman"+ext+'/'+title+'.'+ext+".xml";
      System.out.println("filename: " + filename);
      if ((new File(filename)).exists())
  docName = Http+"xman/sman"+ext+'/'+title+'.'+ext+".xml";
    }
      
    System.out.println("\""+docName+"\"");
    return new URL(docName);
  }
  
  public synchronized DocumentFragment getDocumentFragment(DocumentRequest request)
    throws Exception {
      long start = System.currentTimeMillis();
    
      // !!! for testing
      /*
  _transformCache.clear();
  _docTocCorrespondence.clear();
  _docTreeCache.clear();
  _tocTreeCache.clear();
  */
      
      try {
  _tokenNumbers.clear();
  _substituted.setSize(0);
      
  String docName = request.getDocument();
  URL docUrl = findDocUrl(docName);
  final Node docRoot = getDocumentTree(docUrl);
  Node docNode = docRoot;
  // root of TOC
  TocTree tocTree = getTocTree(docUrl);
  TocTree.TocNode tocNode = tocTree.getRoot();
  DocumentFragment result = new DocumentFragment();
  if (request.isTocRequested())
    result.setTOC(tocTree);
  
  IntegerArray array = new IntegerArray();
  array.add(tocNode.index());
  String focusPath = request.getFocus().getCommonPath();
  if (focusPath.indexOf('/') == 0)
    focusPath = focusPath.substring(1);
  //      System.out.println("focusPath = " + focusPath);
  StringTokenizer steps = new StringTokenizer(focusPath, "/");
  while (steps.hasMoreTokens()) {
    docNode = xPathStep(docNode, steps.nextToken());
    tocNode = getTocNodeForDocNode(docNode);
    if (tocNode != null)
      array.add(tocNode.index());
  }
  result.setPath(array.toIntArray());


  // docNode is selected for transformation
  // find highlighted nodes that are its descendants
  // ... by xPath matching


  for (int i = 0; i < request.size(); i++) {
    MultiTokenLocator loc = request.getLocator(i);
    String locPath = loc.getCommonPath();
    //  System.out.println("locPath = " + locPath);
    if (locPath.startsWith(focusPath) || focusPath.equals("doc")) {
      //    System.out.println("matches");
      int nTerms = loc.getNumberOfTerms();
      Node locNode = followXPath(docRoot, locPath);
      for (int j = 0; j < nTerms; j++)
        if (loc.getTerm(j) != null)
    addNumber(followXPath(locNode, loc.getPath(j)),
        loc.getTokenNumber(j));
    }
  }


  
  Node subtree = docNode;
  if (subtree.getType() == Node.TEXT) {
    subtree = subtree.getParent();
    while (getTocNodeForDocNode(subtree) == null)
      subtree = subtree.getParent();
  }
  //      System.out.println("subtree " + subtree.getName().getLocalPart());


  ByteArrayOutputStream out = new ByteArrayOutputStream(1024*16);
  _html.setOutputStream(out);
  highlightTree();
  transform(getHtmlTransformForDoc(docUrl), new Node[] { subtree }, _html);
  byte[] bytes = out.toByteArray();
  System.out.println(bytes.length + " HTML bytes");
  result.setHTML(bytes);
  result.setNumberOfHighlights(_html.getNumberOfHighlights());
  return result;
      }
      catch (Exception e) {
  e.printStackTrace();
  throw e;
      }
      finally {
  revertTree();
  System.out.println((System.currentTimeMillis() - start)
         +" msec getDocumentFragment");
      }
  }
  
  public synchronized CollectionModel getCollectionModel(String classification)
    throws Exception {
      try {
  String modelFile = "model";


  if (classification.indexOf("java") != -1)
    modelFile = "javaModel";
  else if (classification.indexOf("man") != -1)
    modelFile = "manModel";
  else if (classification.indexOf("unix") != -1)
    modelFile = "unixModel";
  else if (classification.indexOf("shakespeare") != -1)
    modelFile = "playModel";
      
  URL modelUrl = new URL(Http + modelFile + ".xml");
  Node modelRoot = getDocumentTree(modelUrl);
  URL identityTransfUrl = new URL(Http + "identity.xsl");
  Transform tocTransform = getTransform(identityTransfUrl);
  TocTree tree = TocTree.makeTocTree(modelRoot, tocTransform, this);
  CollectionModel model = new CollectionModel();
  model.setTree(tree);
  return model;
      }
      catch (Exception e) {
  System.err.println("getCollectionModel ");
  e.printStackTrace();
  throw e;
      }
  }
}
Source Code of com.sun.xmlsearch.servlet.DocumentServer

Related Classes of com.sun.xmlsearch.servlet.DocumentServer