/*******************************************************************************
* Copyright 2012, The Infinit.e Open Source Project.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package com.ikanow.infinit.e.api.knowledge;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.util.Arrays;
import java.util.zip.GZIPInputStream;
import org.apache.log4j.Logger;
import org.bson.types.ObjectId;
import com.ikanow.infinit.e.api.utils.MimeUtils;
import com.ikanow.infinit.e.api.utils.SocialUtils;
import com.ikanow.infinit.e.data_model.api.ApiManager;
import com.ikanow.infinit.e.data_model.api.ResponsePojo;
import com.ikanow.infinit.e.data_model.api.ResponsePojo.ResponseObject;
import com.ikanow.infinit.e.data_model.api.knowledge.DocumentPojoApiMap;
import com.ikanow.infinit.e.data_model.store.DbManager;
import com.ikanow.infinit.e.data_model.store.MongoDbManager;
import com.ikanow.infinit.e.data_model.store.config.source.SourcePojo;
import com.ikanow.infinit.e.data_model.store.document.CompressedFullTextPojo;
import com.ikanow.infinit.e.data_model.store.document.DocumentPojo;
import com.ikanow.infinit.e.harvest.extraction.document.file.FileHarvester;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
/**
* This class is for all operations related to the retrieval, addition
* or update of people within the system
*
* @author cmorgan
*
*/
public class DocumentHandler
{
private static final Logger logger = Logger.getLogger(DocumentHandler.class);
// Utility class used to pass binary/text info between doc handler and interface
// (but it's changed into a doc object before being sent out the API)
public static class DocumentFileInterface
{
public byte[] bytes;
public String mediaType;
}
/**
* Get information function that returns the user information in the form of a JSON String.
* @param isAdmin
*
* @param key the key definition of the user ( example email@email.com )
* @return a JSON string representation of the person information on success
*/
public ResponsePojo getInfo(String userIdStr, String sourceKey, String idStrOrUrl, boolean bReturnFullText, boolean returnRawData, boolean isAdmin)
{
ResponsePojo rp = new ResponsePojo();
try
{
// Set up the query
BasicDBObject query = new BasicDBObject();
ObjectId id = null;
if (null == sourceKey) {
id = new ObjectId(idStrOrUrl);
query.put(DocumentPojo._id_, id);
}
else {
query.put(DocumentPojo.sourceKey_, sourceKey);
query.put(DocumentPojo.url_, idStrOrUrl);
}
if ( !isAdmin )
query.put(DocumentPojo.communityId_, new BasicDBObject(MongoDbManager.in_, SocialUtils.getUserCommunities(userIdStr)));
// (use DBObject here because DocumentPojo is pretty big and this call could conceivably have perf implications)
BasicDBObject fieldsQ = new BasicDBObject();
if (!bReturnFullText) {
fieldsQ.put(DocumentPojo.fullText_, 0); // (XML/JSON have fullText as part of pojo)
}
BasicDBObject dbo = (BasicDBObject) DbManager.getDocument().getMetadata().findOne(query, fieldsQ);
if ((null == dbo) ||
((null != dbo.get(DocumentPojo.url_)) && dbo.getString(DocumentPojo.url_).startsWith("?DEL?")))
{
if (null != id) { // this might be the update id...
query = new BasicDBObject(DocumentPojo.updateId_, id);
dbo = (BasicDBObject) DbManager.getDocument().getMetadata().findOne(query, fieldsQ);
}
}
//TESTED (update case, normal case, and intermediate case where both update and original still exist)
if (null == dbo) {
rp.setResponse(new ResponseObject("Doc Info",true,"Document not found"));
return rp;
}
DocumentPojo dp = DocumentPojo.fromDb(dbo, DocumentPojo.class);
if (bReturnFullText)
{
if (null == dp.getFullText()) { // (Some things like database records might have this stored already)
byte[] storageArray = new byte[200000];
DBCollection contentDB = DbManager.getDocument().getContent();
BasicDBObject contentQ = new BasicDBObject(CompressedFullTextPojo.url_, dp.getUrl());
contentQ.put(CompressedFullTextPojo.sourceKey_, new BasicDBObject(MongoDbManager.in_, Arrays.asList(null, dp.getSourceKey())));
BasicDBObject fields = new BasicDBObject(CompressedFullTextPojo.gzip_content_, 1);
BasicDBObject dboContent = (BasicDBObject) contentDB.findOne(contentQ, fields);
if (null != dboContent) {
byte[] compressedData = ((byte[])dboContent.get(CompressedFullTextPojo.gzip_content_));
ByteArrayInputStream in = new ByteArrayInputStream(compressedData);
GZIPInputStream gzip = new GZIPInputStream(in);
int nRead = 0;
StringBuffer output = new StringBuffer();
while (nRead >= 0) {
nRead = gzip.read(storageArray, 0, 200000);
if (nRead > 0) {
String s = new String(storageArray, 0, nRead, "UTF-8");
output.append(s);
}
}
dp.setFullText(output.toString());
dp.makeFullTextNonTransient();
}
}
}
else if (!returnRawData) {
dp.setFullText(null); // (obviously will normally contain full text anyway)
}
else // if ( returnRawData )
{
//check if the harvest type is file, return the file instead
//if file is db return the json
//get source
SourcePojo source = getSourceFromKey(dp.getSourceKey());
if ( source.getExtractType().equals( "File" ))
{
//get file from harvester
String fileURL = dp.getUrl();
if ( dp.getSourceUrl() != null )
fileURL = dp.getSourceUrl();
byte[] bytes = FileHarvester.getFile(fileURL, source);
if ( bytes == null )
{
// Try returning JSON instead
String json = ApiManager.mapToApi(dp, new DocumentPojoApiMap());
DocumentFileInterface dfp = new DocumentFileInterface();
dfp.bytes = json.getBytes();
dfp.mediaType = "application/json";
rp.setResponse(new ResponseObject("Doc Info",true,"Document bytes returned successfully"));
rp.setData(dfp, null);
return rp;
}
else
{
DocumentFileInterface dfp = new DocumentFileInterface();
dfp.bytes = bytes;
dfp.mediaType = getMediaType(fileURL);
rp.setResponse(new ResponseObject("Doc Info",true,"Document bytes returned successfully"));
rp.setData(dfp, null);
return rp;
}
}
else
{
String json = ApiManager.mapToApi(dp, new DocumentPojoApiMap());
DocumentFileInterface dfp = new DocumentFileInterface();
dfp.bytes = json.getBytes();
dfp.mediaType = "application/json";
rp.setResponse(new ResponseObject("Doc Info",true,"Document bytes returned successfully"));
rp.setData(dfp, null);
return rp;
}
}
rp.setData(dp, new DocumentPojoApiMap());
rp.setResponse(new ResponseObject("Doc Info",true,"Feed info returned successfully"));
}//(end full text vs raw data)
catch (Exception e)
{
// If an exception occurs log the error
logger.error("Exception Message: " + e.getMessage(), e);
rp.setResponse(new ResponseObject("Doc Info",false,"error returning feed: " + e.getMessage()));
}
// Return Json String representing the user
return rp;
}
public ResponsePojo getFileContents(String userIdStr, String sourceKey, String relativePath, boolean isAdmin)
{
ResponsePojo rp = new ResponsePojo();
try {
BasicDBObject query = new BasicDBObject(SourcePojo.key_, sourceKey);
if ( !isAdmin )
query.put(SourcePojo.communityIds_, new BasicDBObject(MongoDbManager.in_, SocialUtils.getUserCommunities(userIdStr)));
BasicDBObject fields = new BasicDBObject(SourcePojo.url_, 1);
fields.put(SourcePojo.extractType_, 1);
fields.put(SourcePojo.file_, 1);
fields.put(SourcePojo.isApproved_, 1);
SourcePojo source = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query, fields), SourcePojo.class);
// TEST for security shenanigans
String baseRelativePath = new File(".").getCanonicalPath();
String actualRelativePath = new File(relativePath).getCanonicalPath();
if (!actualRelativePath.startsWith(baseRelativePath)) {
throw new RuntimeException("Access denied: " + relativePath);
}
//(end security shenanigans)
if (null == source) {
throw new RuntimeException("Document source not found: " + sourceKey);
}
if ((null != source.getExtractType()) && !source.getExtractType().equals("File")) {
throw new RuntimeException("Document source not a file: " + sourceKey + ", " + source.getExtractType());
}
if (!source.isApproved()) {
throw new RuntimeException("Document source not approved, access denied: " + sourceKey);
}
String fileURL = source.getUrl() + relativePath;
byte[] bytes = FileHarvester.getFile(fileURL, source);
if ( bytes == null )
{
//fail
rp.setResponse(new ResponseObject("Doc Info",false,"Could not find document: " + relativePath));
return rp;
}
else
{
DocumentFileInterface dfp = new DocumentFileInterface();
dfp.bytes = bytes;
dfp.mediaType = getMediaType(fileURL);
rp.setResponse(new ResponseObject("Doc Info",true,"Document bytes returned successfully"));
rp.setData(dfp, null);
return rp;
}
}
catch (Exception e)
{
// If an exception occurs log the error
logger.error("Exception Message: " + e.getMessage(), e);
rp.setResponse(new ResponseObject("Doc Info",false,"error returning feed: " + e.getMessage()));
}
// Return Json String representing the user
return rp;
}//TESTED
private SourcePojo getSourceFromKey(String sourceKey)
{
SourcePojo source = null;
try
{
BasicDBObject query = new BasicDBObject();
query.put(SourcePojo.key_, sourceKey);
source = SourcePojo.fromDb(DbManager.getIngest().getSource().findOne(query), SourcePojo.class);
}
catch (Exception e)
{
}
return source;
}
private String getMediaType(String url)
{
String mediaType = null;
int end = url.lastIndexOf("?");
if (end >= 0) {
url = url.substring(0, end);
}
int mid = url.lastIndexOf(".");
String extension = url.substring(mid+1, url.length());
mediaType = MimeUtils.lookupMimeType(extension);
if (null == mediaType) {
mediaType = "text/plain";
}
return mediaType;
}//TESTED
}