/*************************************************************************
*
* $RCSfile: DocumentServer.java,v $
*
* $Revision: 1.1 $
*
* last change: $Author: abi $ $Date: 2000/11/30 18:03:19 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
*
* - GNU Lesser General Public License Version 2.1
* - Sun Industry Standards Source License Version 1.1
*
* Sun Microsystems Inc., October, 2000
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2000 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*
* Sun Industry Standards Source License Version 1.1
* =================================================
* The contents of this file are subject to the Sun Industry Standards
* Source License Version 1.1 (the "License"); You may not use this file
* except in compliance with the License. You may obtain a copy of the
* License at http://www.openoffice.org/license.html.
*
* Software provided under this License is provided on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING,
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
* See the License for the specific provisions governing your rights and
* obligations concerning the Software.
*
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
*
* Copyright: 2000 by Sun Microsystems, Inc.
*
* All Rights Reserved.
*
* Contributor(s): _______________________________________
*
*
************************************************************************/
package com.sun.xmlsearch.servlet;
import java.io.*;
import java.util.*;
import java.net.URL;
import java.net.MalformedURLException;
import java.text.MessageFormat;
import com.jclark.xsl.om.*;
import com.jclark.xsl.tr.Result;
import com.jclark.xsl.tr.OutputMethod;
import com.jclark.xsl.dom.Transform;
import com.jclark.xsl.dom.TransformEngine;
import com.jclark.xsl.dom.TransformException;
import com.jclark.xsl.dom.XSLTransformEngine;
import com.sun.xmlsearch.util.*;
import com.sun.xmlsearch.tree.*;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import com.sun.xml.tree.XmlDocument;
import javax.swing.tree.TreeNode;
import com.sun.xmlsearch.xml.qe.*;
final class DocumentServer implements Names {
private String Dir = "/files8/jacek/docs/";
private String Http = "http://bigblock.east:8084/";
private static final MessageFormat StepFormat =
new MessageFormat("{0}[{1,number,integer}]");
private Tokenizer _tokenizer = new SimpleTokenizer();
private TreeBuilder _treeBuilder;
private TreeResult _treeResult;
private Cache _docTreeCache = new Cache(60000);
private Cache _tocTreeCache = new Cache(60000);
private Cache _transformCache = new Cache(60000 * 5);
private Hashtable _docTocCorrespondence = new Hashtable();
private XSLTransformEngine _transformEngine;
private Transform _defaultTransform;
// state of the modified tree
private Hashtable _tokenNumbers = new Hashtable();
private Vector _substituted = new Vector();
private Name _StartHighlight_Name;
private Name _EndHighlight_Name;
private Name _HighlightedText_Name;
private Node _StartHighlight_Node;
private Node _EndHighlight_Node;
private static DocumentServer _instance = null;
private final HtmlAdapter _html;
public static synchronized DocumentServer instance() throws Exception {
if (_instance == null)
_instance = new DocumentServer();
return _instance;
}
private DocumentServer() throws Exception {
initXmlProcessor();
_html = new HtmlAdapter(_treeBuilder);
// !!! self-cleaning hack for now
(new Thread() {
public void run() {
while (true) {
try {
Thread.sleep(60000);
clearCaches();
System.out.println("caches cleared");
}
catch (InterruptedException e) {}
}}}).start();
}
private synchronized void clearCaches() {
_docTreeCache.clear();
_tocTreeCache.clear();
_transformCache.clear();
_docTocCorrespondence.clear();
}
private final void initXmlProcessor() throws Exception {
_transformEngine = new XSLTransformEngine();
URL stylesheetUrl = new URL(Http + "default.xsl");
InputStream stylesheetStream = stylesheetUrl.openStream();
XmlDocument sheet = XmlDocument.createXmlDocument(stylesheetStream, false);
_defaultTransform = _transformEngine.createTransform(sheet);
_treeBuilder = new TreeBuilder(_transformEngine.getNameTable());
_treeResult = new TreeResult(_transformEngine.getNameTable());
_StartHighlight_Name = getElementName("StartHighlight");
_EndHighlight_Name = getElementName("EndHighlight");
_HighlightedText_Name = getElementName("HighlightedText");
_StartHighlight_Node = _treeBuilder.makeEmptyElement(_StartHighlight_Name);
_EndHighlight_Node = _treeBuilder.makeEmptyElement(_EndHighlight_Name);
}
private Node followXPath(Node current, String xPath) throws Exception {
StringTokenizer steps = new StringTokenizer(xPath, "/");
while (steps.hasMoreTokens())
current = xPathStep(current, steps.nextToken());
return current;
}
public final Name getElementName(String elementName) throws XSLException {
return _treeBuilder.getElementName(elementName);
}
public final Name getAttributeName(String elementName) throws XSLException {
return _treeBuilder.getAttributeName(elementName);
}
private Node[] expandXPath(Node current, String xPath) throws Exception {
StringTokenizer steps = new StringTokenizer(xPath, "/");
Node[] result = new Node[steps.countTokens() + 1]; int index;
result[index = 0] = current;
while (steps.hasMoreTokens()) {
current = xPathStep(current, steps.nextToken());
result[++index] = current;
}
return result;
}
private Node xPathStep(Node start, String step) throws Exception {
if (step.equals("doc")) { // find root
while (start.getParent() != null)
start = start.getParent();
return start;
}
else { // form: type[n-of-type]
Object[] parts = StepFormat.parse(step);
final String name = (String)parts[0];
int n = ((Long)parts[1]).intValue();
SafeNodeIterator children = start.getChildren();
Node child;
if (name.equals("text()")) {
// find 'n'th text node child
while ((child = children.next()) != null)
if (child.getType() == Node.TEXT && (--n == 0))
return child; // no more steps expected
}
else {
// get the interned 'Name'
NamespacePrefixMap npm = start.getNamespacePrefixMap();
Name elementName = npm.expandElementTypeName(name, start);
// find 'n'th child with tagName 'name'
while ((child = children.next()) != null)
if (child.getType() == Node.ELEMENT &&
child.getName() == elementName && (--n == 0))
return child;
}
}
// if not returned above
throw new Exception("mis-step in xPath: " + step + " " + start.getName());
}
private Node parseTargetDocument(URL docUrl) throws Exception {
return _treeBuilder.getRoot(docUrl);
}
private Node parseInputSource(InputSource in) throws Exception {
return _treeBuilder.getRoot(in);
}
private Node getDocumentTree(URL docUrl) throws Exception {
Object value = _docTreeCache.get(docUrl);
if (value == null)
_docTreeCache.put(docUrl, value = parseTargetDocument(docUrl));
return (Node)value;
}
private synchronized TocTreePath getTocTreePath(QueryHitData queryHit)
throws Exception {
URL docUrl = new URL(queryHit.getDocument());
// root of TOC
TocTree tocTree = getTocTree(docUrl);
TocTree.TocNode tocNode = tocTree.getRoot();
// root of document tree
Node docNode = getDocumentTree(docUrl);
IntegerArray array = new IntegerArray();
array.add(tocNode.index());
StringTokenizer steps = new StringTokenizer(queryHit.getCommonPath(), "/");
while (steps.hasMoreTokens()) {
docNode = xPathStep(docNode, steps.nextToken());
tocNode = getTocNodeForDocNode(docNode);
if (tocNode != null)
array.add(tocNode.index());
}
return new TocTreePath(tocTree, array.toIntArray());
}
private TocTree.TocNode getTocNodeForDocNode(Node docNode) {
return (TocTree.TocNode)_docTocCorrespondence.get(docNode);
}
// little function, lots of functionality
private synchronized TocTree getTocTree(URL docUrl) throws Exception {
Object value = _tocTreeCache.get(docUrl);
if (value == null) {
Node docRoot = getDocumentTree(docUrl);
Transform tocTransform = getTocTransformForDoc(docUrl);
TocTree tree = TocTree.makeTocTree(docRoot, tocTransform, this);
TocTree.TocNode tocRoot = tree.getRoot();
_tocTreeCache.put(docUrl, tree);
establishCorrespondence(docRoot, tocRoot);
return tree;
}
else
return (TocTree)value;
}
private void establishCorrespondence(final Node docRoot, TocTree.TocNode tocNode)
throws Exception {
int nChildren = tocNode.getChildCount();
_docTocCorrespondence.put(followXPath(docRoot,
tocNode.getXPath()),
tocNode);
for (int i = 0; i < nChildren; i++)
establishCorrespondence(docRoot, (TocTree.TocNode)tocNode.getChildAt(i));
}
/*
private void dumpTree(Node root) {
dumpTree(System.out, root, "");
}
private void dumpTree(PrintStream out, Node node, String indent) {
out.print(indent);
out.println(node.toString());
SafeNodeIterator children = node.getChildren(); Node child;
while ((child = children.next()) != null)
dumpTree(out, child, indent + " ");
}
*/
private Transform getTransformForDoc(URL docUrl, String fileName)
throws Exception {
System.out.println(docUrl);
String docFname = docUrl.getFile();
System.out.println("fname: " + docFname);
int slash = docFname.indexOf('/', 1);
String transFname = docFname.substring(0, slash + 1) + fileName;
URL stylesheetUrl = new URL(docUrl.getProtocol(),
docUrl.getHost(),
docUrl.getPort(),
transFname);
return getTransform(stylesheetUrl);
}
private synchronized Transform getTransform(URL stylesheetUrl) throws Exception {
if (stylesheetUrl != null) {
Object value = _transformCache.get(stylesheetUrl);
if (value != null)
return (Transform)value;
else {
System.out.println("creating transform: " + stylesheetUrl);
InputStream stylesheetStream = stylesheetUrl.openStream();
XmlDocument sheet = XmlDocument.createXmlDocument(stylesheetStream, false);
Transform transform = _transformEngine.createTransform(sheet);
_transformCache.put(stylesheetUrl, transform);
return transform;
}
}
else
return null;
}
private Transform getTocTransformForDoc(URL docUrl) throws Exception {
return getTransformForDoc(docUrl, "TOC.xsl");
}
private Transform getHtmlTransformForDoc(URL docUrl) throws Exception {
return getTransformForDoc(docUrl, "toHtml.xsl");
}
private void addNumber(Object key, int n) {
// System.out.println("addNumber " + key + " " + n);
IntegerArray array = (IntegerArray)_tokenNumbers.get(key);
if (array == null)
_tokenNumbers.put(key, array = new IntegerArray(8));
array.addNew(n);
}
private void revertTree() {
for (int i = 0; i < _substituted.size(); i++)
_treeBuilder.revertToOriginal((Node)_substituted.elementAt(i));
}
private Node makeTextNode(String text) {
return _treeBuilder.makeTextNode(text);
}
private Node makeSubstituteElement(Node textNode, Node2[] childrenArray) {
return _treeBuilder.makeSubstituteElement(_HighlightedText_Name,
childrenArray,
textNode);
}
private void highlightTree() {
Enumeration keys = _tokenNumbers.keys();
while (keys.hasMoreElements()) {
Vector children = new Vector();
Node textNode = (Node)keys.nextElement();
IntegerArray numbers = (IntegerArray)_tokenNumbers.get(textNode);
numbers.sort(); // word numbers in ascending order
String text = textNode.getData();
int tokenNumber = 0;
int lastOffset = 0;
// select tokenizers
_tokenizer.setText(text);
for (int i = 0; i < numbers.cardinality(); i++) {
int n = numbers.at(i);
Token token;
do {
token = _tokenizer.nextToken();
}
while (++tokenNumber < n);
int start = token.getStart();
int end = token.getEnd();
if (lastOffset < start)
children.addElement(makeTextNode(text.substring(lastOffset, start)));
children.addElement(_StartHighlight_Node);
children.addElement(makeTextNode(token.getData()));
children.addElement(_EndHighlight_Node);
lastOffset = end;
}
if (lastOffset < text.length())
children.addElement(makeTextNode(text.substring(lastOffset)));
Node2[] childrenArray = new Node2[children.size()];
children.toArray(childrenArray);
Node highlighted = makeSubstituteElement(textNode, childrenArray);
Node2 parent = (Node2)textNode.getParent();
for (int i = 0; i < childrenArray.length; i++)
((Node2)childrenArray[i]).setParent(highlighted);
// actually substitute
int index = parent.getChildIndex(textNode);
parent.setChild(index, highlighted);
((Node2)highlighted).setParent(parent);
_substituted.addElement(highlighted);
}
}
private synchronized void transform(Transform tr, Node[] nodes, ResultAdapter res)
throws Exception {
if (tr != null) {
res.init();
for (int i = 0; i < nodes.length; i++)
tr.transform(nodes[i], res);
res.finish();
}
}
private static String[] JavaDirs = {
"Java2JDK1.3",
"Jini1.1",
"xerces-1_0_1",
"xalan_0_19_2",
"Cocoon-1.6.1"
};
public URL findDocUrl(String docName) throws MalformedURLException {
final String javaClass = "JAVACLASS:";
final String manual = "MAN:";
if (docName.startsWith(javaClass)) {
String className = docName.substring(javaClass.length());
System.out.println(className);
String classFile = "/" + className + ".xml";
for (int i = 0; i < JavaDirs.length; i++)
if ((new File(Dir + JavaDirs[i] + classFile)).exists())
return new URL(Http + JavaDirs[i] + classFile);
}
else if (docName.startsWith(manual)) {
String pageName = docName.substring(manual.length());
int dash = pageName.lastIndexOf('-');
String title = pageName.substring(0,dash).toLowerCase().replace('-','_');
String ext = pageName.substring(dash + 1).toLowerCase();
String filename = Dir+"xman/sman"+ext+'/'+title+'.'+ext+".xml";
System.out.println("filename: " + filename);
if ((new File(filename)).exists())
docName = Http+"xman/sman"+ext+'/'+title+'.'+ext+".xml";
}
System.out.println("\""+docName+"\"");
return new URL(docName);
}
public synchronized DocumentFragment getDocumentFragment(DocumentRequest request)
throws Exception {
long start = System.currentTimeMillis();
// !!! for testing
/*
_transformCache.clear();
_docTocCorrespondence.clear();
_docTreeCache.clear();
_tocTreeCache.clear();
*/
try {
_tokenNumbers.clear();
_substituted.setSize(0);
String docName = request.getDocument();
URL docUrl = findDocUrl(docName);
final Node docRoot = getDocumentTree(docUrl);
Node docNode = docRoot;
// root of TOC
TocTree tocTree = getTocTree(docUrl);
TocTree.TocNode tocNode = tocTree.getRoot();
DocumentFragment result = new DocumentFragment();
if (request.isTocRequested())
result.setTOC(tocTree);
IntegerArray array = new IntegerArray();
array.add(tocNode.index());
String focusPath = request.getFocus().getCommonPath();
if (focusPath.indexOf('/') == 0)
focusPath = focusPath.substring(1);
// System.out.println("focusPath = " + focusPath);
StringTokenizer steps = new StringTokenizer(focusPath, "/");
while (steps.hasMoreTokens()) {
docNode = xPathStep(docNode, steps.nextToken());
tocNode = getTocNodeForDocNode(docNode);
if (tocNode != null)
array.add(tocNode.index());
}
result.setPath(array.toIntArray());
// docNode is selected for transformation
// find highlighted nodes that are its descendants
// ... by xPath matching
for (int i = 0; i < request.size(); i++) {
MultiTokenLocator loc = request.getLocator(i);
String locPath = loc.getCommonPath();
// System.out.println("locPath = " + locPath);
if (locPath.startsWith(focusPath) || focusPath.equals("doc")) {
// System.out.println("matches");
int nTerms = loc.getNumberOfTerms();
Node locNode = followXPath(docRoot, locPath);
for (int j = 0; j < nTerms; j++)
if (loc.getTerm(j) != null)
addNumber(followXPath(locNode, loc.getPath(j)),
loc.getTokenNumber(j));
}
}
Node subtree = docNode;
if (subtree.getType() == Node.TEXT) {
subtree = subtree.getParent();
while (getTocNodeForDocNode(subtree) == null)
subtree = subtree.getParent();
}
// System.out.println("subtree " + subtree.getName().getLocalPart());
ByteArrayOutputStream out = new ByteArrayOutputStream(1024*16);
_html.setOutputStream(out);
highlightTree();
transform(getHtmlTransformForDoc(docUrl), new Node[] { subtree }, _html);
byte[] bytes = out.toByteArray();
System.out.println(bytes.length + " HTML bytes");
result.setHTML(bytes);
result.setNumberOfHighlights(_html.getNumberOfHighlights());
return result;
}
catch (Exception e) {
e.printStackTrace();
throw e;
}
finally {
revertTree();
System.out.println((System.currentTimeMillis() - start)
+" msec getDocumentFragment");
}
}
public synchronized CollectionModel getCollectionModel(String classification)
throws Exception {
try {
String modelFile = "model";
if (classification.indexOf("java") != -1)
modelFile = "javaModel";
else if (classification.indexOf("man") != -1)
modelFile = "manModel";
else if (classification.indexOf("unix") != -1)
modelFile = "unixModel";
else if (classification.indexOf("shakespeare") != -1)
modelFile = "playModel";
URL modelUrl = new URL(Http + modelFile + ".xml");
Node modelRoot = getDocumentTree(modelUrl);
URL identityTransfUrl = new URL(Http + "identity.xsl");
Transform tocTransform = getTransform(identityTransfUrl);
TocTree tree = TocTree.makeTocTree(modelRoot, tocTransform, this);
CollectionModel model = new CollectionModel();
model.setTree(tree);
return model;
}
catch (Exception e) {
System.err.println("getCollectionModel ");
e.printStackTrace();
throw e;
}
}
}