Package org.vietspider.ui.text

Source Code of org.vietspider.ui.text.TextHandler

/***************************************************************************
* Copyright 2003-2006 by VietSpider - All rights reserved.  *
*    *
**************************************************************************/
package org.vietspider.ui.text;

import java.util.List;

import org.vietspider.chars.refs.RefsDecoder;
import org.vietspider.html.HTMLNode;
import org.vietspider.html.Name;
import org.vietspider.html.NodeIterator;

/**
* Author : Thuannd
*         nhudinhthuan@yahoo.com
* Apr 21, 2006
*/
public class TextHandler {
 
   public HTMLNode findByText(HTMLNode node, String txt, RefsDecoder decoder){
    String value = getTextContent(node).toString();
    if(decoder != null) value = new String(decoder.decode(value.toCharArray()));
    value = trim(value);
//    System.out.println(value);
//    System.out.println("==================");
//    System.out.println(txt);
//    System.out.println("==================");
//    System.out.println(value.indexOf(txt));
    if(value.indexOf(txt) < 0return null;
    List<HTMLNode> children  = node.getChildrenNode();
    for(HTMLNode ele : children){
      HTMLNode newNode = findByText(ele, txt, decoder);
      if(newNode != null) return newNode;
    }
    return node;
  }

  public StringBuilder getTextContent(HTMLNode node){
    StringBuilder value = new StringBuilder();
    NodeIterator iterator = node.iterator();
    while(iterator.hasNext()) {
      HTMLNode n = iterator.next();
      if(n.isNode(Name.CONTENT)) {
        value.append(n.getValue());   
      }   
    }
    /*if(node.getConfig().name() == Name.CONTENT){
      value.append(node.getValue());   
    }   
    List<HTMLNode> children = node.getChildrenNode();
    for(HTMLNode ele : children){
      if(node.getConfig().name() == Name.CONTENT)
        value.append(ele.getValue());
      else
        value.append(getTextContent(ele));
    }
*/
    return value;
  }

  public String trim(String value){
    value = value.replace('\t', ' ');
    value = value.replace('\n', ' ');
    value = value.replace('\b', ' ');
    value = value.replace('\f', ' ');
    value = value.replace('\r', ' ');
    value = value.replaceAll(" ", "");
    return value;
  }

  public String findStartText(HTMLNode node) {
    List<HTMLNode> children = node.getChildren();
    for(HTMLNode ele : children){
      if(node.getConfig().name() == Name.CONTENT){ 
        String value = new String(ele.getValue()).trim();
        if(value.length() > 0) return value;
      }else {
        String value = findStartText(ele);
        if(value != null) return value;
      }
    }
    return null;
 

  public String findEndText(HTMLNode node) {
    List<HTMLNode> children = node.getChildren();
   
    for(int i = children.size() - 1; i>-1; i--){
      if(node.getConfig().name() == Name.CONTENT){ 
        String value = new String(children.get(i).getValue()).trim();
        if(value.length() > 0) return value;
      }else{
        String value = findEndText(children.get(i));
        if(value != null) return value;
      }
    }
    return null;
  }  
}
TOP

Related Classes of org.vietspider.ui.text.TextHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.