Package org.htmlcleaner

Examples of org.htmlcleaner.DomSerializer


    @Override
    public String select(String text) {
        try {
            HtmlCleaner htmlCleaner = new HtmlCleaner();
            TagNode tagNode = htmlCleaner.clean(text);
            Document document = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
            Object result;
            try {
                result = xPathExpression.evaluate(document, XPathConstants.NODESET);
            } catch (XPathExpressionException e) {
                result = xPathExpression.evaluate(document, XPathConstants.STRING);
View Full Code Here


    public List<String> selectList(String text) {
        List<String> results = new ArrayList<String>();
        try {
            HtmlCleaner htmlCleaner = new HtmlCleaner();
            TagNode tagNode = htmlCleaner.clean(text);
            Document document = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
            Object result;
            try {
                result = xPathExpression.evaluate(document, XPathConstants.NODESET);
            } catch (XPathExpressionException e) {
                result = xPathExpression.evaluate(document, XPathConstants.STRING);
View Full Code Here

 
  public static List<TOCReference> parseHhc(InputStream hhcFile, Resources resources) throws IOException, ParserConfigurationException,  XPathExpressionException {
    HtmlCleaner htmlCleaner = new HtmlCleaner();
    CleanerProperties props = htmlCleaner.getProperties();
    TagNode node = htmlCleaner.clean(hhcFile);
    Document hhcDocument = new DomSerializer(props).createDOM(node);
    XPath xpath = XPathFactory.newInstance().newXPath();
    Node ulNode = (Node) xpath.evaluate("body/ul", hhcDocument
        .getDocumentElement(), XPathConstants.NODE);
    List<TOCReference> sections = processUlNode(ulNode, resources);
    return sections;
View Full Code Here

         
          TagNode node = cleaner.clean(new ByteArrayInputStream(text.getBytes()));
         
          //NewCode : Only use html cleaner for cleansing
          //use JAXP for full Xpath lib
          Document doc = new DomSerializer(new CleanerProperties()).createDOM(node);
         
 
          String extraRegex = extractRegexFromXpath(xpath);
 
          if (extraRegex != null)
View Full Code Here

      try {
        createHtmlCleanerIfNeeded();

        TagNode node = cleaner.clean(new ByteArrayInputStream(field.getBytes()));

        Document doc = new DomSerializer(new CleanerProperties()).createDOM(node);
        XPath xpa = XPathFactory.newInstance().newXPath();       
       
        NodeList res = (NodeList)xpa.evaluate(script, doc, XPathConstants.NODESET);

        if (0 == res.getLength()) { // No match, just return "", unlike regex we don't want anything if we don't match...
View Full Code Here

    props.setRecognizeUnicodeChars(true);
    props.setOmitComments(true);
    props.setNamespacesAware(false);
   
    // Initialize DomSerializer
    domSerializer = new DomSerializer(props);
   
    // Initialize xml parser   
    try {
      DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
      documentBuilder = documentBuilderFactory.newDocumentBuilder();
View Full Code Here

    {
        CleanerProperties props = new CleanerProperties();
        props.setNamespacesAware(false);
       
        HtmlCleaner cleaner = new HtmlCleaner(props);
        return new DomSerializer(props, true).createDOM(cleaner.clean(document(tester)));
    }
View Full Code Here

TOP

Related Classes of org.htmlcleaner.DomSerializer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.