Package org.htmlcleaner

Examples of org.htmlcleaner.HtmlCleaner.clean()


 
  public static Collection<String> findAllLinkHref(String html, String hostUrl) throws Exception{
    Collection<String> urls = new ArrayList<String>();
   
    HtmlCleaner cleaner = new HtmlCleaner();
    TagNode node = cleaner.clean(html);
    Object[] ns = node.evaluateXPath("//a[@href]");
    for (Object object : ns) {
      TagNode node2=(TagNode) object;
      String href = node2.getAttributeByName("href");
      if (href == null || href.trim().length() == 0)
View Full Code Here


 
  private List<Map<String, Object>> parseHtml(Page page) throws Exception{
    HtmlCleaner cleaner = new HtmlCleaner();
    cleaner.getProperties().setTreatUnknownTagsAsContent(true);
    String html = page.getContent();
    TagNode rootNode = cleaner.clean(html);
    fel.getContext().set("$page_content", html);
        final List<Field> fields = target.getModel().getField();
    String isModelArray = target.getModel().getIsArray();
    String modelXpath = target.getModel().getXpath();
    List<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
View Full Code Here

 
  public static Object evalXpath(String html, String xpath, String attribute){
    List<Object> result = new ArrayList<Object>();
    HtmlCleaner cleaner = new HtmlCleaner();
    try {
      TagNode tagNode = cleaner.clean(html);
      Object[] nodeVals = tagNode.evaluateXPath(xpath);
      for (Object tag : nodeVals){
        TagNode _tag = (TagNode)tag;
        Object val = null;
        if (attribute != null)
View Full Code Here

      for( int i=0 ; i<lElements.size() ; i++ )
      {
        try
        {
          HtmlCleaner cleaner = new HtmlCleaner();
          TagNode rootNode = cleaner.clean(new URL(url));
          TagNode tagElements[] = rootNode.getElementsByName( lElements.get(i).getTag(), true );
          for( int j=0 ; j<tagElements.length ; j++ )
            if( lElements.get(i).getClas().equalsIgnoreCase("*") || tagElements[j].getAttributeByName("class").equals(lElements.get(i).getClas()) )
              lElements.get(i).add( tagElements[j].getText().toString() );
        }
View Full Code Here

      HtmlCleaner cleaner = new HtmlCleaner();
     
      CleanerProperties props = cleaner.getProperties();
      props.setUseEmptyElementTags(false);
     
      TagNode node = cleaner.clean(sb.toString());
      Document myJDom = new JDomSerializer(props, true).createJDom(node);
      XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat());
      sbResult.append(outputter.outputString(myJDom));
    }
    catch (IOException e) {logger.error(e);}
View Full Code Here

    // input source or reader. Result is root node of created
    // tree-like structure. Single cleaner instance may be safely used
    // multiple times.
//    TagNode node = cleaner.clean( new File("tests/accept.html"));

    TagNode node = cleaner.clean( new File("tests/rallypoint.htm"));


    Object[] myNodes;
       
View Full Code Here

    // input source or reader. Result is root node of created
    // tree-like structure. Single cleaner instance may be safely used
    // multiple times.
//    TagNode node = cleaner.clean( new File("tests/accept.html"));

    TagNode node = cleaner.clean( new File("tests/materias.html"));

    Village village = new Village();
        IOVillage.updateVillageOverview( village);
        System.out.println( village);
View Full Code Here

       
        HtmlCleaner cleaner = new HtmlCleaner();
        CleanerProperties props = cleaner.getProperties();
        props.setRecognizeUnicodeChars( true);
//        TagNode node = cleaner.clean( new File("tests/ally.html"));
        TagNode node = cleaner.clean( page);

        updatePlayers( ally, node);
      
    }
View Full Code Here

  public static void updateVillageOverview( Village village) throws Exception
  {
    HtmlCleaner cleaner = new HtmlCleaner();
//    CleanerProperties props = cleaner.getProperties();
//    props.setRecognizeUnicodeChars( true);
    TagNode node = cleaner.clean( new File("tests/overview3.html"));

    updateTerrains(village, node);
    updateStorage( village, node);
    updateProductionRate( village, node);
    updateServerTimestamp( village, node);
View Full Code Here

        HtmlCleaner cleaner = new HtmlCleaner();
        CleanerProperties props = cleaner.getProperties();
        props.setRecognizeUnicodeChars( true);
//        TagNode node = cleaner.clean( new File("tests/perfil.html"));
        TagNode node = cleaner.clean( page);

        updateMapVillages2( player, node);

    }
    public static void updateMapVillages2( Player player, TagNode node) throws Exception
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.