Package org.vietspider.html

Examples of org.vietspider.html.HTMLNode


      char [] chars = node.getValue();
      if(!isEmpty(chars)) {
        for(int k = 0; k < chars.length; k++) {
          builder.append(chars[k] == '\n' ? ' ' : chars[k]);
        }
        HTMLNode parent = node.getParent();
        if(parent != null && parent.isNode(Name.SPAN)) builder.append(' ');
      }
      break;
    case IMG:
    case OBJECT:
    case H1:
View Full Code Here


 
  @SuppressWarnings("unused")
  boolean check(CheckModel model) {
//    System.out.println("\n\n================================================");
//    System.out.println(node.getTextValue());
    HTMLNode node = model.getNode();
    if(model.hasRawData()) return true;
   
    if(model.getTextBlockStatus() == CheckModel.UNCHECK) {
      boolean isTextBlock = contentChecker.isTextBlock(node, true, 50, 5);
      model.setTextBlockStatus( isTextBlock ? CheckModel.RIGHT : CheckModel.NOT);
View Full Code Here

  public LinkBlockChecker(ContentChecker contentChecker) {
    this.contentChecker = contentChecker;
  }
 
  boolean isLink(CheckModel model) {
    HTMLNode node = model.getNode();
    List<HTMLNode> links = new ArrayList<HTMLNode>();
    NodeIterator iterator = node.iterator();
    while(iterator.hasNext()) {
      HTMLNode n = iterator.next();
      if(n.isNode(Name.A)) links.add(n);
      else if(n.isNode(Name.DIV)
          || n.isNode(Name.SPAN)) {
        if(hasOnclick(n)) {
          links.add(n);
        }
      }
    }
View Full Code Here

 
  private boolean isBlockLink(HTMLNode node) {
    NodeIterator iterator = node.iterator();
    int counter = 0;
    while(iterator.hasNext()) {
      HTMLNode n = iterator.next();
      if(n.isNode(Name.A)) counter++;
    }
    return counter > 3;
  }
View Full Code Here

    RefsDecoder decoder = new RefsDecoder();
    NodePathParser pathParser = new NodePathParser();
    TextHandler textHandler = new TextHandler();
    if(start == null || start.trim().length() == 0) return pathParser.toPath(node);
    start = textHandler.trim(start);
    HTMLNode startNode = textHandler.findByText(node, start, decoder);
    if(end == null || end.trim().length() == 0) {
      return startNode != null ?  pathParser.toPath(startNode) : pathParser.toPath(node);
    }
    end = textHandler.trim(end);
   
    HTMLNode endNode = textHandler.findByText(node, end, decoder);   
    if(endNode == null)
      return startNode != null ? pathParser.toPath(startNode) : pathParser.toPath(node);
    if(startNode  == null
      return endNode != null  ?  pathParser.toPath(endNode) : pathParser.toPath(node);
    HTMLNodeUtil nodeUtil = new HTMLNodeUtil()
View Full Code Here

  }
 
  public void createTreeItem(Tree tree, HTMLDocument doc){
    if(doc == null) return ;
    selectedItems.clear();
    HTMLNode node = doc.getRoot();
    createItem(tree, node);
  }
View Full Code Here

 
  private void createItem(Object parent, HTMLNode node){
    List<HTMLNode> children = node.getChildren();
    if(children == null) return;
    for(int i=0; i< children.size(); i++){
      HTMLNode child  = children.get( i);     
      TreeItem item;
      if(parent instanceof Tree) {
        item = treeFactory.get((Tree)parent, child.getName(), getIndex(children, child) , getStyle(child));
        item.setData(new String(child.getValue()));
      } else {
        item = treeFactory.get((TreeItem)parent, child.getName(), getIndex(children, child) , getStyle(child));
        item.setData(new String(child.getValue()));
      }
      createItem(item, child);
    }  
  }
View Full Code Here

    List<HTMLNode> children = node.getChildren();
    if(children == null
        || children.size() < 1) return new String[0];
    String [] values = new String[children.size()];
    for(int i = 0; i < children.size(); i++) {
      HTMLNode child = children.get(i);
      StringBuilder builder = new StringBuilder();
      builder.append(child.getName().toString());
      builder.append('[').append(getIndex(children, child)).append(']');
      values[i] = builder.toString();
    }
    return values;
  }
View Full Code Here

   
    HTMLText textUtils = new HTMLText();
   
    List<HTMLNode> images = nodeUtil.search(root, Name.IMG);
    for(int i = 0; i < images.size(); i++) {
      HTMLNode image = images.get(i);
      HTMLNode parent  = searchUpper(image, Name.TABLE);
      if(parent != null) {
        textUtils.searchText(values, handleTable(parent, image));
//        addValues(handleTable(parent, image), values);
        continue;       
      }
View Full Code Here

    }catch (Exception e) {
      ClientLog.getInstance().setException(tree.getShell(), e);
    }
   
    if(bodyPath == null || document == null) return;
    HTMLNode body = new HTMLExtractor().lookNode(document.getRoot(), bodyPath);
    List<HTMLNode> list = new ArrayList<HTMLNode>();
    List<HTMLNode> commons = new ArrayList<HTMLNode>();
    if(HTMLExplorer.SECTION == type) {
      searchSectionCSS(commons, list, body);

      short selectType = PathConfirmDialog.YES;
      for(HTMLNode ele : commons) {
        try {
          NodePath path = pathParser.toPath(ele);  
          if(path == null) continue;
          selectType = handler.traverseTree(tree, path, TreeHandler.MARK, selectType);     
        } catch(Exception exp){
          ClientLog.getInstance().setMessage(tree.getShell(), exp);
       
      }

      return;
    }

    searchContentCSS(commons, list, body);

    int maxCountContent = 0;
    HTMLNode maxNodeContent = null;

    List<HTMLNode> contents = new ArrayList<HTMLNode>();
    CharacterUtil characterUtil = new CharacterUtil();
   
    HTMLText htmlText = new HTMLText();
View Full Code Here

TOP

Related Classes of org.vietspider.html.HTMLNode

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.