Package org.htmlparser.util

Examples of org.htmlparser.util.NodeList.elements()


                for (NodeIterator ni = forms.elements(); ni.hasMoreNodes(); ) {
                  FormTag form = (FormTag) ni.nextNode();
                  String fragment = "action:"+form.getAttribute("action")+" method:"+form.getAttribute("method");
                  _model.addFragment(url, id, FragmentsModel.KEY_FORMS,fragment );
                }
                for (NodeIterator ni = inputs.elements(); ni.hasMoreNodes(); ) {
                  InputTag tag = (InputTag) ni.nextNode();
                  String type = tag.getAttribute("type");
                  if( "hidden".equals(type))
                  {
                    String fragment = tag.toHtml();
View Full Code Here


        filter = new OrFilter(filter, new HasAttributeFilter("src"));
        filter = new OrFilter(filter, new HasAttributeFilter("onclick"));
        filter = new OrFilter(filter, new HasAttributeFilter("onblur"));
        try {
            NodeList links = nodelist.extractAllNodesThatMatch(filter);
            for (NodeIterator ni = links.elements(); ni.hasMoreNodes(); ) {
                Node node = ni.nextNode();
                if (node instanceof Tag) {
                    boolean got = false;
                    Tag tag = (Tag) node;
                    String src = tag.getAttribute("src");
View Full Code Here

        NodeClassFilter filter = new NodeClassFilter (LinkTag.class);
        for (NodeIterator e = parser.elements();e.hasMoreNodes();)
            e.nextNode().collectInto(collectionList,filter);
        assertEquals("Size of collection vector should be 11",11,collectionList.size());
        // All items in collection vector should be links
        for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) {
            Node node = e.nextNode();
            assertTrue("Only links should have been parsed",node instanceof LinkTag);
        }
    }
    public void testImageCollection() throws ParserException {
View Full Code Here

        TagNameFilter filter = new TagNameFilter ("IMG");
        for (NodeIterator e = parser.elements();e.hasMoreNodes();)
            e.nextNode().collectInto(collectionList,filter);
        assertEquals("Size of collection vector should be 5",5,collectionList.size());
        // All items in collection vector should be links
        for (SimpleNodeIterator e = collectionList.elements();e.hasMoreNodes();) {
            Node node = e.nextNode();
            assertTrue("Only images should have been parsed",node instanceof ImageTag);
        }
    }
View Full Code Here

      Node node = e.nextNode();
      node.collectInto(collectionList, LinkTag.class);
    }
    assertEquals("Size of collection vector should be 11", 11, collectionList.size());
    // All items in collection vector should be links
    for (SimpleNodeIterator e = collectionList.elements(); e.hasMoreNodes();) {
      Node node = e.nextNode();
      assertTrue("Only links should have been parsed", node instanceof LinkTag);
    }
  }
View Full Code Here

      Node node = e.nextNode();
      node.collectInto(collectionList, ImageTag.IMAGE_TAG_FILTER);
    }
    assertEquals("Size of collection vector should be 5", 5, collectionList.size());
    // All items in collection vector should be links
    for (SimpleNodeIterator e = collectionList.elements(); e.hasMoreNodes();) {
      Node node = e.nextNode();
      assertTrue("Only images should have been parsed", node instanceof ImageTag);
    }
  }
View Full Code Here

      return null;

    final HashSet<String> set = new HashSet<>();
    Parser parser = new Parser(html);
    NodeList matches = parser.extractAllNodesThatMatch(LINK_FILTER);
    SimpleNodeIterator it = matches.elements();
    while (it.hasMoreNodes()) {
      LinkTag node = (LinkTag) it.nextNode();
      String link = node.getLink().trim();
      // remove the anchor if present
      if (link.contains("#")) {
View Full Code Here

   */
  public static String extractTitle(String html) throws ParserException {
    String title = "";
    Parser parser = new Parser(html);
    NodeList matches = parser.extractAllNodesThatMatch(TITLE_FILTER);
    SimpleNodeIterator it = matches.elements();
    while (it.hasMoreNodes()) {
      TitleTag node = (TitleTag) it.nextNode();
      title = node.getTitle().trim();
    }
    return title;
View Full Code Here

  public String process(String text) {
    Parser htmlParser = new Parser();
    try {
      htmlParser.setInputHTML(text);
      NodeList nodeList = htmlParser.parse(null);
            SimpleNodeIterator itChildren = nodeList.elements();
            Node current;
            while (itChildren.hasMoreNodes()) {
              current = itChildren.nextNode();
              processNode(current);
             }
View Full Code Here

        }
    }
    else if (node instanceof Tag && !forbiddenParents.contains(((Tag)node).getTagName().toLowerCase())){
      NodeList childList = node.getChildren();
      if (childList != null) {
        SimpleNodeIterator children =  childList.elements();
        Node child;
        while (children.hasMoreNodes()) {
          child = children.nextNode();
          processNode(child);
        }
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.