Package eu.planets_project.ifr.core.storage.impl.web

Source Code of eu.planets_project.ifr.core.storage.impl.web.BlueMarbleDigitalObjectManagerImpl

package eu.planets_project.ifr.core.storage.impl.web;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;

import eu.planets_project.ifr.core.storage.api.DigitalObjectManager;
import eu.planets_project.ifr.core.storage.api.query.Query;
import eu.planets_project.ifr.core.storage.api.query.QueryValidationException;
import eu.planets_project.ifr.core.storage.impl.util.PDURI;
import eu.planets_project.services.datatypes.Content;
import eu.planets_project.services.datatypes.DigitalObject;

/**
* Implements the DigitalObjectManager interface for the NASA Blue Marble
* image collection. This DigitalObjectManager is read-only and not
* queryable.
*
* A word of warning: the Blue Marble collection does not provide a well-defined
* interface. It is only available through a Web portal and various Web mirrors.
* This interface class therefore scrapes the HTML code from the mirror site at
* the Arctic Region Supercomputing Center (http://mirrors.arsc.edu/nasa/). Any change
* in the HTML at the mirror site is likely to cause this interface class to break!
*
* The directory structure of the Blue Marble collection is as follows:
*
* - root - world_2km
*        - world_500m
*        - world_8km
*
* This directory structure (including the contents of the world_XXXm folders)
* is retrieved once and then buffered in memory to minimize HTTP accesses.
*
* This implementation does not perform lazy loading.
*
* @author SimonR
*
*/
public class BlueMarbleDigitalObjectManagerImpl implements DigitalObjectManager {
 
    /**
     * Logger.
     */
    private static Logger log = Logger.getLogger(BlueMarbleDigitalObjectManagerImpl.class.getName());
 
    /**
     * HttpClient timeout in ms.
     */
    private static final int TIMEOUT = 10000;

    /**
     * Mirror site base URL.
     */  
    private static final String MIRROR_BASE_URL = "http://mirrors.arsc.edu/nasa/";
   
    /**
     * Mirror site base URI.
     */
    private URI rootURI = null;
   
    /**
     * The HTTP client.
     */
    private HttpClient httpClient = new HttpClient();
   
    /**
     * Local buffer for the remote directory structure to increase responsiveness of the GUI.
     */
    private HashMap<URI, List<URI>> directoryCache = new HashMap<URI, List<URI>>();
 
    /**
     * Create a blue marble digital object manager.
     */
    public BlueMarbleDigitalObjectManagerImpl() {
      try {
        this.rootURI = new URI(MIRROR_BASE_URL);
      } catch (URISyntaxException e) {
        log.severe("This should never happen: " + e.getMessage());
      }
     
      // Set up HTTP client
      String host = System.getProperty("http.proxyHost");
        String port = System.getProperty("http.proxyPort");
        if( host != null && port != null ) {
            httpClient.getHostConfiguration().setProxy(host, Integer.parseInt(port));
        }
    httpClient.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(1, false));
    httpClient.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, new Integer(TIMEOUT));
    httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(TIMEOUT);
    httpClient.getHttpConnectionManager().getParams().setSoTimeout(TIMEOUT);
    }
 
  /**
   * {@inheritDoc}
   * @see eu.planets_project.ifr.core.storage.api.DigitalObjectManager#storeAsNew(java.net.URI, eu.planets_project.services.datatypes.DigitalObject)
   */
  public URI storeAsNew(URI pdURI, DigitalObject digitalObject) throws DigitalObjectNotStoredException {
    throw new DigitalObjectNotStoredException("Storing not supported by this implementation.");
  }

    public URI storeAsNew(DigitalObject digitalObject) throws eu.planets_project.ifr.core.storage.api.DigitalObjectManager.DigitalObjectNotStoredException {
        return null//To change body of implemented methods use File | Settings | File Templates.
    }

    public URI updateExisting(URI pdURI, DigitalObject digitalObject) throws eu.planets_project.ifr.core.storage.api.DigitalObjectManager.DigitalObjectNotStoredException, eu.planets_project.ifr.core.storage.api.DigitalObjectManager.DigitalObjectNotFoundException {
        return null//To change body of implemented methods use File | Settings | File Templates.
    }

    /**
     * {@inheritDoc}
     * @see eu.planets_project.ifr.core.storage.api.DigitalObjectManager#isWritable(java.net.URI)
     */
    public boolean isWritable( URI pdURI ) {
      return false;
    }
   
    /**
     * {@inheritDoc}
     * @see eu.planets_project.ifr.core.storage.api.DigitalObjectManager#list(java.net.URI)
     */
    public List<URI> list(URI pdURI) {     
      // 'null' - return mirror base URL
      if (pdURI == null) {
          ArrayList<URI> children = new ArrayList<URI>();
        children.add(rootURI);
      return children;
      }
     
      // Otherwise: Try the dir cache before downloading from remote mirror
      List<URI> bufferedList = directoryCache.get(pdURI);
   
      if (bufferedList == null) {
        // Not in cache - remote download (and cache result if any)
        if (pdURI.equals(rootURI)) {
               try {
               // Top level directory
               GetMethod dirRequest = new GetMethod(MIRROR_BASE_URL);
            dirRequest.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, new Integer(TIMEOUT));
               httpClient.executeMethod(dirRequest);
              
               // Scrape directory names
               List<URI> topLvlDirs = scrapeTopLvlDirs(dirRequest.getResponseBodyAsString());
               directoryCache.put(pdURI, topLvlDirs);
               return topLvlDirs;
               } catch (IOException e) {
                 log.severe("Error scraping top-level directory from Blue Marble mirror");
                 return new ArrayList<URI>();
               }
           } else if (!(pdURI.toString().endsWith("png") || pdURI.toString().endsWith("jpg"))) {
             try {
                  // Sub-directory           
            GetMethod dirRequest = new GetMethod(pdURI.toString());
            dirRequest.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, new Integer(TIMEOUT));
            httpClient.executeMethod(dirRequest);
           
            // Scrape file names
            List<URI> subDirs = scrapeSubDir(dirRequest.getResponseBodyAsString(), pdURI.toString());
            directoryCache.put(pdURI, subDirs);
            return subDirs;
             } catch (IOException e) {
               log.severe("Error scraping subdirectory '" + pdURI + "' from Blue Marble mirror");
               return new ArrayList<URI>();
             }
           } else {
             // Leaf node
             return null;
           }
      } else {
        return bufferedList;
      }
    }

  /**
   * {@inheritDoc}
   * @see eu.planets_project.ifr.core.storage.api.DigitalObjectManager#retrieve(java.net.URI)
   */
  public DigitalObject retrieve(URI pdURI) throws DigitalObjectNotFoundException {
    try {
      // Will simply attempt to download the object at the provided URI,
      // no matter whether located at the NASA mirror or not
            DigitalObject.Builder dob = new DigitalObject.Builder(Content.byReference(pdURI.toURL()));
            dob.title( PDURI.extractLeafname( pdURI.getPath() ) );
            return dob.build();
    } catch (Exception e) {
      throw new DigitalObjectNotFoundException("Error retrieving object from " + pdURI.toString() + " (" + e.getMessage() + ")");
    }
  }

  /**
   * {@inheritDoc}
   * @see eu.planets_project.ifr.core.storage.api.DigitalObjectManager#getQueryTypes()
   */
  public List<Class<? extends Query>> getQueryTypes() {
    return null;
  }

    /**
     * {@inheritDoc}
     * @see eu.planets_project.ifr.core.storage.api.DigitalObjectManager#list(java.net.URI, eu.planets_project.ifr.core.storage.api.query.Query)
     */
    public List<URI> list(URI pdURI, Query q) throws QueryValidationException {
        throw new QueryValidationException("This implementation does not support queries.");
    }
 
  private List<URI> scrapeTopLvlDirs(String html) {
    Pattern worldDirPattern = Pattern.compile("<a href=\"world_((.|\n)*?)</a>");
    Matcher m = worldDirPattern.matcher(html);
   
    ArrayList<URI> dirs = new ArrayList<URI>();
    String match;
    while (m.find()) {
      m.find(); // Skip every odd match
      match = m.group(1);
      try {
        if (match.indexOf("_big") < 0) {
          dirs.add(new URI(MIRROR_BASE_URL + match.substring(match.indexOf('>') + 1)));
        }
      } catch (URISyntaxException e) {
        log.severe("Error parsing Blue Marble dir: " + match);
      }
    }
    return dirs;
  }
 
  private List<URI> scrapeSubDir(String html, String baseURL) {
    Pattern worldDirPattern = Pattern.compile(" <a href=\"world((.|\n)*?)\">");
    Matcher m = worldDirPattern.matcher(html);
    ArrayList<URI> images = new ArrayList<URI>();

    String match;
    while (m.find()) {
      match = "world" + m.group(1);
      if (match.endsWith("jpg") || match.endsWith("png") ) {
        try {
          images.add(new URI(baseURL + match));
        } catch (URISyntaxException e) {
          log.severe("Error parsing Blue Marble image dir: " + match);
        }
      }
    }
    return images;
  }

}
TOP

Related Classes of eu.planets_project.ifr.core.storage.impl.web.BlueMarbleDigitalObjectManagerImpl

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.