Package unify.fileio

Source Code of unify.fileio.RSSParser

package unify.fileio;

/**
* Some code from http://www.javacoffeebreak.com/books/extracts/javanotesv3/c10/s4.html
*/

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.logging.Logger;

import unify.data.Feed;
import unify.data.Season;
import unify.data.Show;


public class RSSParser {
  private Feed myFeed;
  private ArrayList<Show> rssShows;
  private final static Logger LOGGER = Logger.getLogger(RSSParser.class.getName());

  public RSSParser(Feed myFeed) {
    this.myFeed = myFeed;
  }

  public ArrayList<Show> parseFeed() {
    this.rssShows = new ArrayList<Show>();
    try {
      Object content = myFeed.getLink().getContent();
      if (content instanceof InputStream) {
        LOGGER.fine("Checking feed " + myFeed.getLabel() + " @ " + myFeed.getLink());
        Scanner in = new Scanner(new InputStreamReader((InputStream) content ));
        boolean isTitle = false;
        String newTitle = "";
        String link = "";
        String nextString = "";
        while(in.hasNext()) {
          nextString = nextString + in.next() + " ";
          if(nextString.contains("<title>") && nextString.contains("</title>")) {
            isTitle = true;
            newTitle = nextString.split("<title>")[1].split("</title>")[0].trim().replaceAll("&amp;", "&");
            if(newTitle.split(".*(&#)(\\d+)(;)").length>1) {
              int num = Integer.parseInt(newTitle.replaceAll(".*(&#)(\\d+)", "$2").split(";")[0]);
              char c = (char) num;
              newTitle = newTitle.split("(&#)(\\d+)(;)")[0] + c + newTitle.split("(&#)(\\d+)(;)")[1];
            }
            System.out.println("New title: " + newTitle);
            nextString = "";
          }
          else if(nextString.contains("<link>") && nextString.contains("</link>")) {
            link = nextString.split("<link>")[1].split("</link>")[0].trim();
            if(isTitle) {
              parseEp(newTitle, link);
              isTitle = false;
            }
            nextString = "";
          }
        }
        in.close();
      }
      else {
        LOGGER.warning("Error: url.getContent() returning type other than InputStream.");
      }
    }
    catch (MalformedURLException e) {
      LOGGER.warning("Malformed URL.");
    }
    catch (SecurityException e) {
      LOGGER.warning("Security error" + e.toString());
    }
    catch (IOException e) {
      LOGGER.warning("IO error: " + e.toString());
    }
    return this.rssShows;
  }

  private void parseEp(String newTitle, String link) {
    if(newTitle.split("[sS]\\d\\d[eE]\\d\\d").length>1) {
      parseMethod1(newTitle, link);
    }
    else if(newTitle.split("-\\ss\\d+\\s|\\se\\d+").length>1) {
      parseMethod2(newTitle, link);
    }
    else if(newTitle.split("\\d+x\\d+").length>1) {
      parseMethod3(newTitle, link);
    }
    else if(newTitle.contains(" Part")) {
      parseMethod4(newTitle, link);
    }
  }

  //Parses shows with format s00e00, also handles multiple episode tags, like S01E01-E03
  public void parseMethod1(String newTitle, String link) {
    String myTitle = newTitle.split("[sS]\\d\\d[eE]\\d\\d")[0];
    String title = myTitle.trim();
    try {
      int seasonInt = Integer.parseInt(newTitle.substring(myTitle.length()+1, myTitle.length()+3));
      int episodeInt = Integer.parseInt(newTitle.substring(myTitle.length()+4, myTitle.length()+6));
      //doneParsing(title, seasonInt, episodeInt, link);
      int lastEpisodeInt = episodeInt;
      if(newTitle.split("[sS]\\d\\d[eE]\\d\\d-[eE]\\d\\d").length>1) {
        lastEpisodeInt = Integer.parseInt(newTitle.substring(myTitle.length()+8, myTitle.length()+10));
      }
      for(int i=episodeInt;i<=lastEpisodeInt;i++) {
        doneParsing(title, seasonInt, i, link);
      }
     
    } catch (NumberFormatException e) {
      LOGGER.info("Invalid number format for " + newTitle.substring(myTitle.length()+2, myTitle.length()+4) + " or " + newTitle.substring(myTitle.length()+5, myTitle.length()+7) + " in " + newTitle);
    }
  }

  //Parses shows with format s# | e# (Specifically seen on hulu)
  private void parseMethod2(String newTitle, String link) {
    String title = newTitle.split("-\\ss\\d+\\s|\\se\\d+")[0].trim();
    String matched = newTitle.substring(title.length()+4, newTitle.length()-newTitle.split("-\\ss\\d+\\s\\|\\se\\d+")[1].length());
    String season = matched.split("\\s\\|")[0];
    String episode = matched.split("\\s\\|\\se")[1];
    try {
      int seasonInt = Integer.parseInt(season);
      int episodeInt = Integer.parseInt(episode);
      doneParsing(title, seasonInt, episodeInt, link);
    } catch (NumberFormatException e) {
      LOGGER.info("Invalid number format for " + season + " or " + episode + " in " + newTitle);
    }
  }

  //Parses shows with format #x#, used by TVRage and most British shows
  //Has a bug with "Something something [1280x720 resolution]"
  private void parseMethod3(String newTitle, String link) {
    String title = newTitle.split("\\d+x\\d+")[0].trim();
    String season = newTitle.split("x\\d+")[0].split(" ")[newTitle.split(" ").length-1].replaceAll("\\(", "");
    String episode = newTitle.split("\\d+x")[1].split(" ")[0].replaceAll("\\)", "");
    try {
      int seasonInt = Integer.parseInt(season);
      int episodeInt = Integer.parseInt(episode);
      //Special formatting seen at tvrage.com
      if(title.startsWith("- ")) {
        title = title.substring(1, title.length()-1).trim();
      }
      doneParsing(title, seasonInt, episodeInt, link);
    } catch (NumberFormatException e) {
      LOGGER.info("Invalid number format for " + season + " or " + episode + " in " + newTitle);
    }
  }

  //Parses mini series using format "part one" or "part01"
  private void parseMethod4(String newTitle, String link) {
    int episodeInt;

    String title = newTitle.split(" Part")[0].trim();
    String episode = newTitle.split(" Part")[1].substring(0, 2);
    try {
      episodeInt = Integer.parseInt(episode);
    } catch (NumberFormatException e) {
      episode = newTitle.split(" Part ")[1].split("\\s")[0];
      if(episode.equalsIgnoreCase("one")) { episodeInt = 1; }
      else if(episode.equalsIgnoreCase("two")) { episodeInt = 2; }
      else if(episode.equalsIgnoreCase("three")) { episodeInt = 3; }
      else if(episode.equalsIgnoreCase("four")) { episodeInt = 4; }
      else if(episode.equalsIgnoreCase("five")) { episodeInt = 5; }
      else if(episode.equalsIgnoreCase("six")) { episodeInt = 6; }
      else if(episode.equalsIgnoreCase("seven")) { episodeInt = 7; }
      else if(episode.equalsIgnoreCase("eight")) { episodeInt = 8; }
      else if(episode.equalsIgnoreCase("nine")) { episodeInt = 9; }
      else if(episode.equalsIgnoreCase("ten")) { episodeInt = 10; }
      else if(episode.equalsIgnoreCase("eleven")) { episodeInt = 11; }
      else if(episode.equalsIgnoreCase("twelve")) { episodeInt = 12; }
      else if(episode.equalsIgnoreCase("thirteen")) { episodeInt = 13; }
      else if(episode.equalsIgnoreCase("fourteen")) { episodeInt = 14; }
      else if(episode.equalsIgnoreCase("fifteen")) { episodeInt = 15; }
      else if(episode.equalsIgnoreCase("sixteen")) { episodeInt = 16; }
      else if(episode.equalsIgnoreCase("seventeen")) { episodeInt = 17; }
      else if(episode.equalsIgnoreCase("eighteen")) { episodeInt = 18; }
      else if(episode.equalsIgnoreCase("nineteen")) { episodeInt = 19; }
      else if(episode.equalsIgnoreCase("twenty")) { episodeInt = 20; }
      else {
        LOGGER.info("Invalid number format for " + episode + " (or higher than twenty) in " + newTitle);
        return;
      }
    }
    doneParsing(title, 1, episodeInt, link);
  }

  private void doneParsing(String title, int seasonInt, int episodeInt, String link) {
    LOGGER.finest(myFeed.getLabel() + " has " + title + " season " + seasonInt + " episode " + episodeInt);
    Show show = null;
    for(int i=0;i<this.rssShows.size();i++) {
      if(rssShows.get(i).getTitle().equals(title)) {
        show = rssShows.get(i);
      }
    }
    if(show==null) {
      show = new Show(title);
      rssShows.add(show);
    }
    Season season = show.findSeason(seasonInt);
    if(season==null) {
      season = show.addSeason(seasonInt);
    }
    season.addEpisode(episodeInt, link, myFeed.getLabel());
  }
}
TOP

Related Classes of unify.fileio.RSSParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.