Package org.w3c.tidy

Examples of org.w3c.tidy.Tidy.parseDOM()


            StringWriter stringWriter = new StringWriter();
            PrintWriter errorWriter = new PrintWriter(stringWriter);
            tidy.setErrout(errorWriter);

            // Extract the document using JTidy and stream it.
            org.w3c.dom.Document doc = tidy.parseDOM(new BufferedInputStream(this.inputSource.getInputStream()), null);

            // FIXME: Jtidy doesn't warn or strip duplicate attributes in same
            // tag; stripping.
            XMLUtils.stripDuplicateAttributes(doc, null);
View Full Code Here


  public String getContent(InputStream is)
      throws Exception {
    Tidy tidy = new Tidy();
    tidy.setQuiet(true);
    tidy.setShowWarnings(false);
    org.w3c.dom.Document root = tidy.parseDOM(is, null);
    Element rawDoc = root.getDocumentElement();
    String title = getTitle(rawDoc);
    String body = getBody(rawDoc);

    if ((body != null) && (!body.equals(""))) {
View Full Code Here

            // Extract the document using JTidy and stream it.
            ByteArrayInputStream bais =
                new ByteArrayInputStream(text.getBytes());
            org.w3c.dom.Document doc =
                tidy.parseDOM(new BufferedInputStream(bais), null);

            // FIXME: Jtidy doesn't warn or strip duplicate attributes in same
            // tag; stripping.
            XMLUtils.stripDuplicateAttributes(doc, null);
View Full Code Here

      try {
        in = new ByteArrayInputStream(string.getBytes("utf-8"));
      } catch (UnsupportedEncodingException e) {
        throw new RuntimeException(e);
      }
      Document root = tidy.parseDOM(in, null);// System.out);

      NodeList nodeList = root.getElementsByTagName("body");
      Transformer transformer = TransformerFactory.newInstance()
          .newTransformer();
      transformer.setOutputProperty("omit-xml-declaration", "yes");
View Full Code Here

        tidy.setMakeClean(true);
        tidy.setXmlTags(true);
        // tidy.setQuiet(true);
        String data = "<b>hello</b>";
        InputStream in = connection.getInputStream();
        Document root = tidy.parseDOM(in, null);// System.out);
        NodeList nodeList = root.getElementsByTagName("link");
        Map typeURLMap = new HashMap();
        for (int i = 0; i < nodeList.getLength(); i++) {
            Element current = (Element) nodeList.item(i);
            if (current.getAttribute("rel").equals("alternate")) {
View Full Code Here

            // convert HTML to X(HT)ML
            final Tidy tidy = new Tidy(); // obtain a new Tidy instance
            tidy.setXHTML(true); // set desired config options using tidy setters

            try (InputStream inputStream = IOUtils.toInputStream(html)) {
                return tidy.parseDOM(inputStream, System.out);
            }

        } catch (IOException e) {
            throw new ServiceException("Error converting HTML to XHTML document", e);
        }
View Full Code Here

      tidy.setShowWarnings(false);
      tidy.setIndentContent(false);
      tidy.setSmartIndent(false);
      tidy.setIndentAttributes(false);
      tidy.setWraplen(0);
      Document doc = tidy.parseDOM(in, null);
      tidy.pprint(doc, out);

      String tidied = new String(out.toByteArray());
      return tidied.substring(tidied.indexOf("<body>")+"<body>".length(), tidied.indexOf("</body>")).trim();
    } else {
View Full Code Here

    // borrowed from lai-xin-chu: http://stackoverflow.com/questions/12576119
    private String extractTextFromHtml(String html) {
        Tidy tidy = new Tidy();
        tidy.setQuiet(true);
        tidy.setShowWarnings(false);
        org.w3c.dom.Document root = tidy.parseDOM(new StringReader(html), null);
        return getText(root.getDocumentElement());
    }

    // borrowed from lai-xin-chu: http://stackoverflow.com/questions/12576119
    private String getText(Node node) {
View Full Code Here

  public void buildDocument(InputStream is, Document doc)
      throws DocumentHandlerException {
    Tidy tidy = new Tidy();
    tidy.setQuiet(true);
    tidy.setShowWarnings(false);
    org.w3c.dom.Document root = tidy.parseDOM(is, null);
    Element rawDoc = root.getDocumentElement();
    String title = getTitle(rawDoc);
    String body = getBody(rawDoc);
    if ((title != null) && (!title.equals(""))) {
      doc.addField("title", title, StoreOption.YES, IndexOption.TOKENIZED);
View Full Code Here

            log.info( resultsFileName + " could not be downloaded. Using the template to create anew");
            resultsFile = new File(project.getBasedir(), "src/main/resources/" + resultsFileName);
        }

        FileInputStream is = new FileInputStream( resultsFile );
        Document document = tidy.parseDOM(is, null);
        is.close();

        File reportsDir = new File(targetDirectory, "surefire-reports");
        if ( !reportsDir.exists() ) {
            log.warn("No surefire-reports directory here");
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.