Package org.vietspider.html.util

Examples of org.vietspider.html.util.HTMLText.searchText()


    HTMLText htmlText = new HTMLText();
    HTMLText.EmptyVerify verify = new HTMLText.EmptyVerify();

    List<HTMLNode> contents = new ArrayList<HTMLNode>();
    htmlText.searchText(contents, node, verify);
    return contents;
  }


  public void autoSelect(HTMLDocument document, String url) throws Exception {
View Full Code Here


    HTMLText htmlText = new HTMLText();
    HTMLText.EmptyVerify verify = new HTMLText.EmptyVerify();

    List<HTMLNode> contents = new ArrayList<HTMLNode>();
    htmlText.searchText(contents, node, verify);

    return contents;
  }

  public String searchMaxSequence(String text, char c) {
View Full Code Here

    List<HTMLNode> images = nodeUtil.search(root, Name.IMG);
    for(int i = 0; i < images.size(); i++) {
      HTMLNode image = images.get(i);
      HTMLNode parent  = searchUpper(image, Name.TABLE);
      if(parent != null) {
        textUtils.searchText(values, handleTable(parent, image));
//        addValues(handleTable(parent, image), values);
        continue;       
      }
     
      parent  = searchUpper(image, Name.DIV, Name.CENTER);
View Full Code Here

      }
     
      parent  = searchUpper(image, Name.DIV, Name.CENTER);
      if(parent != null) {
        if(isValidText(parent, 3)) {
          textUtils.searchText(values, parent);
//          addValues(parent, values);
        }
      }
    }
   
View Full Code Here

   
    short selectType = PathConfirmDialog.YES;
    boolean traverse = false;
    for(HTMLNode ele : commons) {
      contents.clear();
      htmlText.searchText(contents, ele);
      int count = countText(characterUtil, contents);

      if(count > maxCountContent) {
        maxCountContent = count;
        maxNodeContent = ele;
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.