Package com.ikanow.infinit.e.api.knowledge

Source Code of com.ikanow.infinit.e.api.knowledge.DocumentHandler$DocumentFileInterface

/*******************************************************************************
* Copyright 2012, The Infinit.e Open Source Project.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package com.ikanow.infinit.e.api.knowledge;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.util.Arrays;
import java.util.zip.GZIPInputStream;

import org.apache.log4j.Logger;
import org.bson.types.ObjectId;

import com.ikanow.infinit.e.api.utils.MimeUtils;
import com.ikanow.infinit.e.api.utils.SocialUtils;
import com.ikanow.infinit.e.data_model.api.ApiManager;
import com.ikanow.infinit.e.data_model.api.ResponsePojo;
import com.ikanow.infinit.e.data_model.api.ResponsePojo.ResponseObject;
import com.ikanow.infinit.e.data_model.api.knowledge.DocumentPojoApiMap;
import com.ikanow.infinit.e.data_model.store.DbManager;
import com.ikanow.infinit.e.data_model.store.MongoDbManager;
import com.ikanow.infinit.e.data_model.store.config.source.SourcePojo;
import com.ikanow.infinit.e.data_model.store.document.CompressedFullTextPojo;
import com.ikanow.infinit.e.data_model.store.document.DocumentPojo;
import com.ikanow.infinit.e.harvest.extraction.document.file.FileHarvester;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;


/**
* This class is for all operations related to the retrieval, addition
* or update of people within the system
*
* @author cmorgan
*
*/
public class DocumentHandler
{
  private static final Logger logger = Logger.getLogger(DocumentHandler.class);
 
  // Utility class used to pass binary/text info between doc handler and interface
  // (but it's changed into a doc object before being sent out the API)
 
  public static class DocumentFileInterface
  {
    public byte[] bytes;
    public String mediaType;
  }
 
  /**
   * Get information function that returns the user information in the form of a JSON String.
   * @param isAdmin
   *
   * @param  key  the key definition of the user ( example email@email.com )
   * @return      a JSON string representation of the person information on success
   */
  public ResponsePojo getInfo(String userIdStr, String sourceKey, String idStrOrUrl, boolean bReturnFullText, boolean returnRawData, boolean isAdmin)
  {
    ResponsePojo rp = new ResponsePojo();
   
    try
    {
      // Set up the query
      BasicDBObject query = new BasicDBObject();
      ObjectId id = null;
      if (null == sourceKey) {
        id = new ObjectId(idStrOrUrl);
        query.put(DocumentPojo._id_, id);       
      }
      else {
        query.put(DocumentPojo.sourceKey_, sourceKey);
        query.put(DocumentPojo.url_, idStrOrUrl);
      }
     
      if ( !isAdmin )
        query.put(DocumentPojo.communityId_, new BasicDBObject(MongoDbManager.in_, SocialUtils.getUserCommunities(userIdStr)));
        // (use DBObject here because DocumentPojo is pretty big and this call could conceivably have perf implications)
     
      BasicDBObject fieldsQ = new BasicDBObject();
      if (!bReturnFullText) {
        fieldsQ.put(DocumentPojo.fullText_, 0); // (XML/JSON have fullText as part of pojo)
      }
     
      BasicDBObject dbo = (BasicDBObject) DbManager.getDocument().getMetadata().findOne(query, fieldsQ);

      if ((null == dbo) ||
          ((null != dbo.get(DocumentPojo.url_)) &&  dbo.getString(DocumentPojo.url_).startsWith("?DEL?")))
      {
        if (null != id) { // this might be the update id...         
          query = new BasicDBObject(DocumentPojo.updateId_, id);
          dbo = (BasicDBObject) DbManager.getDocument().getMetadata().findOne(query, fieldsQ);
        }
      }
      //TESTED (update case, normal case, and intermediate case where both update and original still exist)
     
      if (null == dbo) {
        rp.setResponse(new ResponseObject("Doc Info",true,"Document not found"));
        return rp;
      }
      DocumentPojo dp = DocumentPojo.fromDb(dbo, DocumentPojo.class);
      if (bReturnFullText)
      {
        if (null == dp.getFullText()) { // (Some things like database records might have this stored already)
          byte[] storageArray = new byte[200000];
          DBCollection contentDB = DbManager.getDocument().getContent();
          BasicDBObject contentQ = new BasicDBObject(CompressedFullTextPojo.url_, dp.getUrl());
          contentQ.put(CompressedFullTextPojo.sourceKey_, new BasicDBObject(MongoDbManager.in_, Arrays.asList(null, dp.getSourceKey())));
          BasicDBObject fields = new BasicDBObject(CompressedFullTextPojo.gzip_content_, 1);
          BasicDBObject dboContent = (BasicDBObject) contentDB.findOne(contentQ, fields);
          if (null != dboContent) {
            byte[] compressedData = ((byte[])dboContent.get(CompressedFullTextPojo.gzip_content_));       
            ByteArrayInputStream in = new ByteArrayInputStream(compressedData);
            GZIPInputStream gzip = new GZIPInputStream(in);       
            int nRead = 0;
            StringBuffer output = new StringBuffer();
            while (nRead >= 0) {
              nRead = gzip.read(storageArray, 0, 200000);
              if (nRead > 0) {
                String s = new String(storageArray, 0, nRead, "UTF-8");
                output.append(s);
              }
            }
            dp.setFullText(output.toString());
            dp.makeFullTextNonTransient();
          }
        }       
      }
      else if (!returnRawData) {
        dp.setFullText(null); // (obviously will normally contain full text anyway)
      }
      else // if ( returnRawData )
      {
        //check if the harvest type is file, return the file instead
        //if file is db return the json
        //get source
        SourcePojo source = getSourceFromKey(dp.getSourceKey());
        if ( source.getExtractType().equals( "File" ))
        {
          //get file from harvester
          String fileURL = dp.getUrl();
          if ( dp.getSourceUrl() != null )
            fileURL = dp.getSourceUrl();
          byte[] bytes = FileHarvester.getFile(fileURL, source);
          if ( bytes == null )
          {
            // Try returning JSON instead
            String json = ApiManager.mapToApi(dp, new DocumentPojoApiMap());
            DocumentFileInterface dfp = new DocumentFileInterface();
           
            dfp.bytes = json.getBytes();
            dfp.mediaType = "application/json";
           
            rp.setResponse(new ResponseObject("Doc Info",true,"Document bytes returned successfully"));
            rp.setData(dfp, null);
            return rp;
          }
          else
          {           
            DocumentFileInterface dfp = new DocumentFileInterface();
            dfp.bytes = bytes;
            dfp.mediaType = getMediaType(fileURL);
            rp.setResponse(new ResponseObject("Doc Info",true,"Document bytes returned successfully"));
            rp.setData(dfp, null);
            return rp;
          }
        }
        else
        {       
          String json = ApiManager.mapToApi(dp, new DocumentPojoApiMap());
          DocumentFileInterface dfp = new DocumentFileInterface();
         
          dfp.bytes = json.getBytes();
          dfp.mediaType = "application/json";
         
          rp.setResponse(new ResponseObject("Doc Info",true,"Document bytes returned successfully"));
          rp.setData(dfp, null);
          return rp;
        }       
      }
      rp.setData(dp, new DocumentPojoApiMap());
      rp.setResponse(new ResponseObject("Doc Info",true,"Feed info returned successfully"));
    }//(end full text vs raw data)
    catch (Exception e)
    {
      // If an exception occurs log the error
      logger.error("Exception Message: " + e.getMessage(), e);
      rp.setResponse(new ResponseObject("Doc Info",false,"error returning feed: " + e.getMessage()));
    }
    // Return Json String representing the user
    return rp;
  }
 
  public ResponsePojo getFileContents(String userIdStr, String sourceKey, String relativePath, boolean isAdmin)
  {
    ResponsePojo rp = new ResponsePojo();
   
    try  {
      BasicDBObject query = new BasicDBObject(SourcePojo.key_, sourceKey);
      if ( !isAdmin )
        query.put(SourcePojo.communityIds_, new BasicDBObject(MongoDbManager.in_, SocialUtils.getUserCommunities(userIdStr)));
      BasicDBObject fields = new BasicDBObject(SourcePojo.url_, 1);
      fields.put(SourcePojo.extractType_, 1);
      fields.put(SourcePojo.file_, 1);
      fields.put(SourcePojo.isApproved_, 1);
      SourcePojo source = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query, fields), SourcePojo.class);

      // TEST for security shenanigans
      String baseRelativePath = new File(".").getCanonicalPath();
      String actualRelativePath = new File(relativePath).getCanonicalPath();
      if (!actualRelativePath.startsWith(baseRelativePath)) {
        throw new RuntimeException("Access denied: " + relativePath);
      }     
      //(end security shenanigans)
     
      if (null == source) {
        throw new RuntimeException("Document source not found: " + sourceKey);
      }
      if ((null != source.getExtractType()) && !source.getExtractType().equals("File")) {
        throw new RuntimeException("Document source not a file: " + sourceKey + ", " + source.getExtractType());       
      }
      if (!source.isApproved()) {
        throw new RuntimeException("Document source not approved, access denied: " + sourceKey);       
      }
      String fileURL = source.getUrl() + relativePath;
      byte[] bytes = FileHarvester.getFile(fileURL, source);
      if ( bytes == null )
      {
        //fail
        rp.setResponse(new ResponseObject("Doc Info",false,"Could not find document: " + relativePath));
        return rp;
      }
      else
      {           
        DocumentFileInterface dfp = new DocumentFileInterface();
        dfp.bytes = bytes;
        dfp.mediaType = getMediaType(fileURL);
        rp.setResponse(new ResponseObject("Doc Info",true,"Document bytes returned successfully"));
        rp.setData(dfp, null);
        return rp;
      }     
    }
    catch (Exception e)
    {
      // If an exception occurs log the error
      logger.error("Exception Message: " + e.getMessage(), e);
      rp.setResponse(new ResponseObject("Doc Info",false,"error returning feed: " + e.getMessage()));
    }
    // Return Json String representing the user
    return rp;
  }//TESTED
 
  private SourcePojo getSourceFromKey(String sourceKey)
  {
    SourcePojo source = null;
    try
    {
      BasicDBObject query = new BasicDBObject();
      query.put(SourcePojo.key_, sourceKey);
      source = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query), SourcePojo.class);
    }
    catch (Exception e)
    {

    }
    return source;
  }
 
  private String getMediaType(String url)
  {   
    String mediaType = null;

    int end = url.lastIndexOf("?");
    if (end >= 0) {
      url = url.substring(0, end);
    }
    int mid = url.lastIndexOf(".");
    String extension = url.substring(mid+1, url.length());
    mediaType = MimeUtils.lookupMimeType(extension);
    if (null == mediaType) {
      mediaType = "text/plain";
    }
    return mediaType;
  }//TESTED
}
TOP

Related Classes of com.ikanow.infinit.e.api.knowledge.DocumentHandler$DocumentFileInterface

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.