Package org.htmlparser

Examples of org.htmlparser.NodeFilter


            }
     
            String url = MessageFormat.format(SEARCH_POPULAR_URL, URLEncoder.encode(query, "UTF-8"), start);
      parser.setURL(url);

      NodeFilter discFilter = new AndFilter(
          new NodeClassFilter(TableRow.class),
          new HasChildFilter(new HasChildFilter(new CssClassFilter("searchDisplay")))
      );

      NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
View Full Code Here


      if (log.isDebugEnabled()) {
        log.debug("Adding movie at url " + url);
      }
      parser.setURL(url);

      NodeFilter discFilter = new AndFilter(
        new CssClassFilter("disc"),
        new HasChildFilter(new HasChildFilter(
            new HasAttributeFilter("href", "/catalog/movieDetails/" + item.getMovieId())))
      );
     
      NodeFilter removeFilter = new AndFilter(
          new NodeClassFilter(LinkTag.class),
          new CssClassFilter("bvr-qremove")
      );

      NodeList discNodes = parser.extractAllNodesThatMatch(discFilter);
View Full Code Here

            Node infoNode = infoCollector.getNode(i++);
           
            //info
            NodeList list = new NodeList();
            NodeFilter groupsFilter = new CssClassFilter("movieInfo");
            infoNode.collectInto(list, groupsFilter);
            setListValues(item, list);
           
            //summary
            list.removeAll();
            NodeFilter summaryFilter = new CssClassFilter("summary");
            infoNode.collectInto(list, summaryFilter);
            item.setSummary(NodeUtils.getTextData(list));
           
            item.addDetailFlag(MovieItem.DETAIL_MEDIUM);
           
View Full Code Here

        }
        return item;
    }
 
  private void parseMovieQueue(Node queue, List<MovieItem> items) {
    NodeFilter discFilter = new CssClassFilter("disc");

        NodeCollector infoCollector = new NodeCollector(new NodeFilter[] {
                new CssClassFilter("bvr-qremove"),
                new AndFilter(new NodeClassFilter(LinkTag.class),
                        new HasParentFilter(new CssClassFilter("title"))),
View Full Code Here

    }
      if (categories == null) {
      try {
        parser.setURL(CATEGORY_LIST_URL);
 
        NodeFilter categoryFilter = new AndFilter(
            new HasAncestorFilter(
                                new OrFilter(new CssClassFilter("column33"),
                                        new CssClassFilter("column34"))),
            new CssClassFilter("listMain")
        );
View Full Code Here

     
      return categories;
    }
   
    private void parseCategory(Node categoryNode, List<Category> categories) throws ParserException {
      NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);
     
      NodeList links = new NodeList();
      categoryNode.collectInto(links, linkFilter);
     
      if (links.size() == 0) {
View Full Code Here

     */
    public void parseHtml(URI uri, InputStream content)
    {
        ByteArrayOutputStream output = null//####TODO: Add in MIME type detection to this stream
        Parser                parser = null;
        NodeFilter            filter = null;
        NodeList              list   = null;
       
       
        if (uriFilter.isURIInternal(uri) && content != null)
        {
            log.debug("Parsing HTML from URI " + uri.toString());
       
            try
            {
                log.debug("Copying content.");
       
                output = new ByteArrayOutputStream();
                IOUtils.copy(content, output);
               
                log.debug("Creating filter.");
       
                //####TODO: Dependency inject this crap
                filter = new AndFilter(new NodeClassFilter(LinkTag.class),
                                       new NodeFilter()
                                       {
                                           public boolean accept(Node node)
                                           {
                                               return(!((LinkTag)node).isMailLink());
                                           }
View Full Code Here

     * @return A list of all links on the page as URLs.
     * @exception ParserException If the parse fails.
     */
    protected URL[] extractLinks () throws ParserException
    {
        NodeFilter filter;
        NodeList list;
        Vector vector;
        LinkTag link;
        URL[] ret;

View Full Code Here

        return (false);
    }

    private NodeFilter parse ()
    {
        NodeFilter ret;
       
        ret = null;
        do
        {
            switch (tokentype)
View Full Code Here

    }

    private NodeFilter parseSimple ()
    {
        boolean done = false;
        NodeFilter ret = null;

        if (token != null)
            do
            {
                switch (tokentype)
View Full Code Here

TOP

Related Classes of org.htmlparser.NodeFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.