Package barrysoft.web.tests

Source Code of barrysoft.web.tests.ParserTest

package barrysoft.web.tests;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.IOException;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.util.Arrays;
import java.util.NoSuchElementException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.junit.Test;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import barrysoft.web.Parser;
import barrysoft.web.ParserRule;
import barrysoft.web.ParserRuleParam;
import barrysoft.web.WebDownloader;

public class ParserTest {
 
  public final static String UA = "Mozilla/5.0 (Windows; Windows NT 5.1; en-US; rv:1.9.2a1pre) "+
                    "Gecko/20090402 Firefox/3.6a1pre";

  public final static String[] regexs = {
    "(?i)(?m).*?<tr id=\"[series]\">\\s*.*?<a href=\"([^<>]+?)\"> ([A-Za-z ]+?)</a></a></h3></td>.*",
    "(?i)func=fileinfo&id=(\\d+?)\">(.*?[subname].*?)</a>",
    "(?i)(?m).*?<tr id=\"Stagione [season]\">\\s*<td><h3><img.*?><a name=\".*?\"><a href=\"(.+?)\"> (.+?)</a></a></h3></td>.*",
    "(?i)(?m).*?<a href=\"([^<>]+?)\">[subname]</a></dd>.*",
    "(?i)(?m).*?<td><h3><img.*?><a name=\"\\d*?\"><a href=\"([^<>]+?)\">\\s*([^<>]+?)</a></a></h3></td>.*",
    "<td><h3><img.*?><a name=\"\\d*?\"><a href=\"([^<>]+?)\">\\s*[type]</a></a></h3></td>",
    "adasdasdas",
    "(?i)(?m)asdasdasdasd",
    "Rome is the capital of (.*)"
  };
 
  public final static int[] regexsResults = {2,2,2,1,2,1,0,0,1};
 
  public void testGroupCount() {
   
    for (int i=0; i < regexs.length; i++) {
      int groups = Parser.getGroupsCount(regexs[i]);
      assertEquals("Testing RegEx "+i, regexsResults[i], groups);
    }
   
  }
 
  public final static String plainData = "Rome, Italy\n"+
                      "Rome is also known as Roma\n"+
                      "Rome is the capital of Italy\n"+
                      "The native name of Italy is Italia";
 
  public final static String parseRule = "(?i)(?s)^[city], [country]\\s"+
                      "[city] is also known as (.+?)\\s"+
                      "[city] is the capital of [country]\\s"+
                      "The native name of [country] is (.+?)$";
  @Test
  public void testParse() {
   
    Parser p = new Parser("Test Parser");
    /*p.addRule(parseRule);
    p.addRuleParam(parseRule, p.new ParserRuleParam("city", "Rome"));
    p.addRuleParam(parseRule, p.new ParserRuleParam("country", "Italy"));*/
   
    ParserRule pr = new ParserRule(parseRule);
    pr.addParam(new ParserRuleParam("city", "Rome"));
    pr.addParam(new ParserRuleParam("country", "Italy"));
   
    p.addRule(pr);
     
    p.parseData(plainData);
   
    String[][] results = p.getRule(0).getResults();
    checkResults(results, 1, new int[] {2});
   
    assertEquals("Wrong group data.", "Roma", results[0][0]);
    assertEquals("Wrong group data.", "Italia", results[0][1]);
   
    assertTrue(true);
   
  }
 
  public final static String timeURL = "http://www.worldtimeserver.com/current_time_in_IT.aspx";
  public final static String timeRule = "(?i)(?s).*The current time.*?(\\d+[.:]+\\d\\d *[AP]*M*).*";
 
  @Test
  public void testParseURL() {
   
    Parser p = new Parser("Time Parser");
    p.addRule(new ParserRule(timeRule));
   
    try {
     
      p.parseData(new WebDownloader(timeURL));
     
      String[][] results = p.getRule(0).getResults();
     
      checkResults(results, 1, new int[] {1});

      System.out.println(results[0][0]);
     
      assertTrue(true);
    } catch (MalformedURLException e) {
      fail(e.getMessage());
    } catch (IOException e) {
      fail(e.getMessage());
    }
   
  }
 
  public final static String itsaRule = "(?i)(?s).*?<a href=\"([^<>]+)\">\\s[series]</a>.*?";
 
  public final static String[] series = new String[] {
    "Dexter", "How I Met Your Mother", "Lost", "Glee"
  };
 
  @Test
  public void testParserITSA() {
   
    Parser p = new Parser("ITSA Parser");
   
    ParserRuleParam prp = new ParserRuleParam("series");
   
    for (String s : series)
      prp.addValue(s);
   
    ParserRule pr = new ParserRule(itsaRule, prp);
    pr.setQuickRule(" [series]<");
   
    p.addRule(pr);
   
    try {
     
      WebDownloader wd = new WebDownloader("http://www.italiansubs.net/index.php");
      wd.addFormElement("option", "com_remository");
     
      p.parseData(wd);

      String[][] results = p.getRule(0).getResults();
     
      int[] expected = new int[series.length];
      Arrays.fill(expected, 1);
     
      checkResults(results, series.length, expected);
     
      for (String[] s : results)
        System.out.println(s[0]);
     
      assertTrue(true);
     
    } catch (IOException e) {
      fail(e.getMessage());
    }
   
  }
 
  public final static String[] multipleRules = new String[] {
    "(?i)(?s).*<div id='remositoryfileinfo'>.*?<center>.*?</a>(.+?)</center>.*",
    "(?i)(?s).*<div id='remositoryfileinfo'>.*?<dd>.*?Inviato il (.+?)\r\n.*?</dd>.*",
    "(?i)(?s).*<div id='remositoryfileinfo'>.*?<dd>.*?Dimensioni del file: (.+?)\r\n.*?</dd>.*"
  };
 
  @Test
  public void testParserMultipleRules() {
   
    Parser p = new Parser("ITSA Parser Multi");
   
    for (String rule : multipleRules)
      p.addRule(new ParserRule(rule));
   
    try {
     
      WebDownloader wd = new WebDownloader("http://www.italiansubs.net/index.php");
      wd.addFormElement("option", "com_remository");
      wd.addFormElement("func", "fileinfo");
      wd.addFormElement("id", "2126");
     
      p.parseData(wd);

      for (int i=0; i < multipleRules.length; i++) {
        String[][] results = p.getRule(i).getResults();
       
        checkResults(results, 1, new int[] {1});
       
        System.out.println(results[0][0].trim());
      }
     
      assertTrue(true);
     
    } catch (IOException e) {
      fail(e.getMessage());
    }
   
  }
 
  public final static String[] multipleParamsRules = new String[] {
    "(?i)(?s).*<div id='remositoryfileinfo'>.*?<center>.*?</a>(.+?)</center>.*",
    "(?i)(?s).*<div id='remositoryfileinfo'>.*?<dd>.*?[tag](.+?)\r\n.*?</dd>.*"
  };
 
  @Test
  public void testParserMultipleParamsRules() {
   
    Parser p = new Parser("ITSA Parser Multi");
   
    for (String rule : multipleParamsRules)
      p.addRule(new ParserRule(rule));
   
    ParserRule pr = p.getRule(multipleParamsRules.length-1);
    pr.addParam(new ParserRuleParam("tag", "Inviato il "));
   
    try {
     
      pr.getParam("tag").addValue("Dimensioni del file: ");
     
      WebDownloader wd = new WebDownloader("http://www.italiansubs.net/index.php");
      wd.addFormElement("option", "com_remository");
      wd.addFormElement("func", "fileinfo");
      wd.addFormElement("id", "2126");
     
      p.parseData(wd);
     
      String[][] results = p.getRule(0).getResults();
      checkResults(results, 1, new int[] {1});
      System.out.println(results[0][0]);
     
      results = p.getRule(1).getResults();
      checkResults(results, 2, new int[] {1, 1});
     
      for(String[] s : results)
        System.out.println(s[0].trim());
     
    } catch (IOException e) {
      fail(e.getMessage());
    } catch (IndexOutOfBoundsException e) {
      fail(e.getMessage());
    } catch (IllegalArgumentException e) {
      fail(e.getMessage());
    }
   
  }
 
  @Test
  public void testSave() {
   
    Parser p = new Parser("Save Parser");
   
    for (String rule : regexs) {
      ParserRule pr = new ParserRule(rule);
     
      for (int i=0; i < 3; i++) {
       
        ParserRuleParam prp = new ParserRuleParam("param"+(i+1));
       
        for (int j=0; j < 5; j++)
          prp.addValue("value"+(j+1));
       
        pr.addParam(prp);
       
      }
     
      pr.setGroupName("group1", 0);
      pr.setGroupName("group2", 1);
     
      p.addRule(pr);
    }
   
    String xmlSource = p.getXML(0);
   
    System.out.println(xmlSource);
   
    InputSource is = new InputSource(new StringReader(xmlSource));
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    DocumentBuilder db = null;
   
    try {
      db = dbf.newDocumentBuilder();
    } catch (ParserConfigurationException e1) {
      fail(e1.getMessage());
    }
   
    Document doc = null;
    try {
      doc = db.parse(is);
    } catch (SAXException e) {
      fail(e.getMessage());
    } catch (IOException e) {
      fail(e.getMessage());
    }
   
    Parser p2 = new Parser();
    p2.loadFromXML(doc.getFirstChild());
   
    assertEquals(p2.getRulesCount(), regexs.length);
   
    for (int i=0; i < p2.getRulesCount(); i++) {
     
      ParserRule pr = p2.getRule(i);
     
      assertEquals(pr.getRule(), regexs[i]);
      assertEquals(3, pr.getParamsCount());
     
      for (int j=0; j < pr.getParamsCount(); j++) {
        try {
          ParserRuleParam prp = pr.getParam("param"+(j+1));
         
          assertEquals(5, prp.getValues().length);
         
          for (int k=0; k < prp.getValues().length; k++)
            assertEquals("value"+(k+1), prp.getValues()[k]);
         
        } catch (IllegalArgumentException e) {
          fail(e.getMessage());
        }
      }
     
      try {
        assertEquals(0, pr.getGroupNumber("group1"));
        assertEquals(1, pr.getGroupNumber("group2"));
      } catch (NoSuchElementException e) {
        fail(e.getMessage());
      }
     
    }
   
  }
 
  public static final String tvstRegex = "<span style=\"[^\"]*\"><b>(\\w+)\\s*subtitles</b></span></div>\\s*<a href=\"([^\"]*)\">";
 
  public void testMultipleResults() {
   
    Parser p = new Parser("TVSubtitles.net Parser");
    ParserRule rule = new ParserRule(tvstRegex);
    rule.setGroupName("Language", 0);
    rule.setGroupName("Link", 1);
   
    p.addRule(rule);
   
    WebDownloader dl = new WebDownloader();
    try {
      dl.setUrl("http://www.tvsubtitles.net/episode-3079.html");
    } catch (MalformedURLException e) {
      fail(e.getMessage());
    }
   
    try {
      p.parseData(dl);
    } catch (IllegalStateException e) {
      fail(e.getMessage());
    } catch (IOException e) {
      fail(e.getMessage());
    }

    try {
      String[] langs = rule.getResults("Language");
      String[] links = rule.getResults("Link");
     
      if (langs == null || langs.length == 0)
        fail("Can't get langs.");
     
      if (links == null || links.length == 0)
        fail("Can't get links.");
     
      assertEquals(langs.length, links.length);
     
      for (int i=0; i < langs.length; i++)
        System.out.println(String.format("%s (%s)", langs[i], links[i]));
     
    } catch (IndexOutOfBoundsException e) {
      fail(e.getMessage());
    } catch (NoSuchElementException e) {
      fail(e.getMessage());
    }
   
   
  }
 
  public void checkResults(String[][] results, int[] expected) {
    checkResults(results, -1, expected);
  }
 
  public void checkResults(String[][] results, int totExpected, int[] expected) {
   
    if (results == null || results.length == 0)
      fail("No groups data.");
   
    if (totExpected >= 0)
      assertEquals("Wrong number of results",totExpected,results.length);
   
    if (expected == null)
      return;
   
    assertEquals("Internal error: Wrong number of expected values",results.length, expected.length);
   
    for (int i=0; i < results.length; i++) {
      assertNotNull("Result number "+i+" couldn't be parsed",results[i]);
      assertEquals("Wrong number of groups data",expected[i],results[i].length);
    }
   
  }
 
}
TOP

Related Classes of barrysoft.web.tests.ParserTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.