Examples of visitAllNodesWith()


Examples of org.htmlparser.Parser.visitAllNodesWith()

            }
            dataList.add(data);
          }
        }
      };
      parser.visitAllNodesWith(visitor);
    } catch (ParserException e) {
      e.printStackTrace();
    }
    return dataList;
  }
View Full Code Here

Examples of org.htmlparser.Parser.visitAllNodesWith()

    {
      String htmlContents = IOUtils2.readString(htmlFile);
      Lexer lexer = new Lexer(new Page(htmlContents));
      Parser parser = new Parser(lexer, new DefaultParserFeedback(DefaultParserFeedback.QUIET));
      TagFindingVisitor visitor = new TagFindingVisitor(new String[]{tagName});
      parser.visitAllNodesWith(visitor);
      tags = visitor.getTags(0);
    }
    // convert into List<TagNode>
    List<TagNode> tagNodes = Lists.newArrayList();
    CollectionUtils.addAll(tagNodes, tags);
View Full Code Here

Examples of org.htmlparser.Parser.visitAllNodesWith()

          // Extract plain text from HTML

          StringBean oStrBn = new StringBean();

          try {
            oPrsr.visitAllNodesWith (oStrBn);
          } catch (ParserException pe) {
          throw new MessagingException(pe.getMessage(), pe);
          }

          sTextBody = oStrBn.getStrings();
View Full Code Here

Examples of org.htmlparser.Parser.visitAllNodesWith()

         // read links or not
         // sb.setLinks(true); //TODO make this configurable

         // extract text
         parser.visitAllNodesWith(sb);

         String text = sb.getStrings();
         refined_text = (text != null) ? text : ""; // delete(text);

      }
View Full Code Here

Examples of org.htmlparser.Parser.visitAllNodesWith()

        Parser parser =
            new Parser(url);
        TagFindingVisitor tagFinder =
            new TagFindingVisitor(tagsBeingChecked, true);
        parser.visitAllNodesWith(tagFinder);
        for (int i=0;i<tagsBeingChecked.length;i++) {
            System.out.println(
                "Number of "+tagsBeingChecked[i]+" begin tags = "+
            tagFinder.getTagCount(i));
            System.out.println(
View Full Code Here

Examples of org.htmlparser.Parser.visitAllNodesWith()

  public static String trimHtml(String html) {
    Parser parser = Parser.createParser(html, "GBK");
    if (parser != null) {
      StringBean sb = new StringBean();
      try {
        parser.visitAllNodesWith(sb);
        html = sb.getStrings();
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
View Full Code Here

Examples of org.htmlparser.Parser.visitAllNodesWith()

  }

  public void testUrlModificationWithVisitor() throws Exception {
    Parser parser = Parser.createParser(HTML_WITH_LINK);
    UrlModifyingVisitor visitor = new UrlModifyingVisitor(parser, "localhost://");
    parser.visitAllNodesWith(visitor);
    assertStringEquals("Expected HTML", MODIFIED_HTML, visitor.getModifiedResult());
  }
}
View Full Code Here

Examples of org.htmlparser.Parser.visitAllNodesWith()

  private void identify(String string) throws Exception {
    String[] tagsBeingChecked = { "TABLE", "DIV", "SPAN" };

    Parser parser = new Parser("http://www.amazon.com");
    TagFindingVisitor tagFinder = new TagFindingVisitor(tagsBeingChecked, true);
    parser.visitAllNodesWith(tagFinder);
    for (int i = 0; i < tagsBeingChecked.length; i++) {
      System.out.println("Number of " + tagsBeingChecked[i] + " begin tags = " + tagFinder.getTagCount(i));
      System.out.println("Number of " + tagsBeingChecked[i] + " end tags = " + tagFinder.getEndTagCount(i));
    }
View Full Code Here

Examples of org.htmlparser.Parser.visitAllNodesWith()

    URL[] ret;

    parser = new Parser(url);
    parser.registerScanners();
    ObjectFindingVisitor visitor = new ObjectFindingVisitor(LinkTag.class);
    parser.visitAllNodesWith(visitor);
    Node[] nodes = visitor.getTags();
    vector = new Vector();
    for (int i = 0; i < nodes.length; i++)
      try {
        link = (LinkTag) nodes[i];
View Full Code Here

Examples of org.htmlparser.Parser.visitAllNodesWith()

    public void testUrlModificationWithVisitor() throws Exception {
        Parser parser = Parser.createParser(HTML_WITH_LINK, null);
        UrlModifyingVisitor visitor =
            new UrlModifyingVisitor("localhost://");
        parser.visitAllNodesWith(visitor);
        String result = visitor.getModifiedResult();
        assertStringEquals("Expected HTML",
            MODIFIED_HTML,
            result);
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.