Package org.vietspider.document.util

Source Code of org.vietspider.document.util.ImageDescRemover

/***************************************************************************
* Copyright 2001-2008 The VietSpider         All rights reserved.       *
**************************************************************************/
package org.vietspider.document.util;

import java.util.ArrayList;
import java.util.List;

import org.vietspider.html.HTMLNode;
import org.vietspider.html.Name;
import org.vietspider.html.util.HTMLText;

/**
* Author : Nhu Dinh Thuan
*          nhudinhthuan@yahoo.com
* Oct 28, 2008 
*/
public class ImageDescRemover extends NodeRemover {
 
  public List<HTMLNode> removeDesc(HTMLNode root) {
    List<HTMLNode> values = new ArrayList<HTMLNode>();
   
    HTMLText textUtils = new HTMLText();
   
    List<HTMLNode> images = nodeUtil.search(root, Name.IMG);
    for(int i = 0; i < images.size(); i++) {
      HTMLNode image = images.get(i);
      HTMLNode parent  = searchUpper(image, Name.TABLE);
      if(parent != null) {
        textUtils.searchText(values, handleTable(parent, image));
//        addValues(handleTable(parent, image), values);
        continue;       
      }
     
      parent  = searchUpper(image, Name.DIV, Name.CENTER);
      if(parent != null) {
        if(isValidText(parent, 3)) {
          textUtils.searchText(values, parent);
//          addValues(parent, values);
        }
      }
    }
   
    return values;
  }
 
/* private void addValues(HTMLNode node, List<HTMLNode> values) {
    if(node == null) return;
    if(node.isNode(Name.CONTENT)) {
      values.add(node);
      return;
    }
    List<HTMLNode> children  = node.getChildren();
    if(children == null) return;
    for(int i = 0; i < children.size(); i++) {
      addValues(children.get(i), values);
    }
  }*/

  public HTMLNode handleTable(HTMLNode tableNode, HTMLNode imgNode) {
    List<HTMLNode> trNodes =  nodeUtil.search(tableNode, Name.TR);
    int trIndex = 0;
    for(; trIndex < trNodes.size()-1; trIndex++) {
      if(isSuper(trNodes.get(trIndex), imgNode)) break;
    }
    if(trIndex >= trNodes.size()-1) return null;

    List<HTMLNode> tdNodes =  nodeUtil.search(trNodes.get(trIndex), Name.TD);
    int tdIndex = 0;
    for(; tdIndex < tdNodes.size()-1; tdIndex++) {
      if(isSuper(tdNodes.get(tdIndex), imgNode)) break;
    }

    if(tdIndex >= trNodes.get(trIndex+1).totalOfChildren()) return null;
   
    HTMLNode nodeValue = trNodes.get(trIndex+1).getChild(tdIndex);
    return isValidText(nodeValue, 3) ? nodeValue : null;
   
//    return isValidText(trNodes.get(trIndex+1).getChildren().get(tdIndex), 3);
  }


}
TOP

Related Classes of org.vietspider.document.util.ImageDescRemover

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.