package com.google.code.ftspc.lector.indexers;
import com.google.code.ftspc.lector.ini_and_vars.Vars;
import com.google.code.ftspc.lector.parsers.Archives.ZIP.MoveDir;
import com.google.code.ftspc.lector.parsers.Parser;
import java.io.File;
import java.util.Date;
import java.util.Random;
import org.mozilla.universalchardet.UniversalDetector;
/**
* Class with common functions that use several other classes.
* @author Arthur Khusnutdinov
*/
public class CommonFunctions extends Thread {
private File fileForIndexing;
private Random random = new Random();
/**
* Constructor of the class of common functions
*/
public CommonFunctions() {
}
/**
* Constructor of the class of common functions
* @param fileForIndexing The path to the file or folder to be indexed
*/
public CommonFunctions(File fileForIndexing) {
this.fileForIndexing = fileForIndexing;
}
/**
* Method to start indexing
*/
public void indexDocs_main() {
indexDocs_main(fileForIndexing);
System.gc();
}
private void indexDocs_main(File fileForIndexing) {
if (fileForIndexing.canRead()) {
if (fileForIndexing.isDirectory()) {
String[] files = fileForIndexing.list();
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs_main(new File(fileForIndexing, files[i]));
}
}
} else {
try {
while (Vars.current_run_indexes > Vars.max_threads) {
synchronized (this) {
wait(300);
}
}
indexDocs_extracting_and_adding(fileForIndexing.getAbsolutePath());
} catch (Exception ex) {
Vars.logger.fatal("Error: ", ex);
}
}
}
}
private void indexDocs_extracting_and_adding(String filePath) {
Parser selectedParser;
String fileName = filePath.substring(filePath.lastIndexOf(Vars.fileSeparator)+1);
File someFileForTypeRecognizing = new File(filePath);
String hash;
String newFilePath;
Vars.totalSizeOfProcessedFiles += someFileForTypeRecognizing.length();
try {
String type = Vars.tika.detect(someFileForTypeRecognizing);
if (Vars.parsersFromXML.get(type) != null) {
selectedParser = (Parser) (Class.forName(
Vars.parsersFromXML.get(type).
get("class").toString())).newInstance();
Vars.current_run_indexes++;
if (selectedParser.getClass().getName().indexOf("ZIP") < 1) {
hash = (new Date()).getTime() + Long.toHexString(random.nextLong())
+ Long.toHexString(random.nextLong());
MoveDir moveDir = new MoveDir();
newFilePath = Vars.pathToDayDir + "/" + hash;
moveDir.copyFile(someFileForTypeRecognizing, new File(newFilePath));
someFileForTypeRecognizing.delete();
filePath = newFilePath;
}
selectedParser.start_th(filePath, fileName);
type = null;
} else {
System.out.println("UNKNOWN TYPE " + type + " " + filePath);
}
} catch (Exception ex) {
Vars.logger.fatal("Error: ", ex);
}
selectedParser = null;
someFileForTypeRecognizing = null;
}
/**
* Method to determine the text encoding of the new algorithm.
* @param fileName File encoding is to be determined.
* @return Returns the encoding of the text file.
* @throws java.io.IOException
*/
protected String detectEncoding(String fileName) throws java.io.IOException {
byte[] buf = new byte[4096];
java.io.FileInputStream fis = new java.io.FileInputStream(fileName);
UniversalDetector detector = new UniversalDetector(null);
String encoding;
int nread;
while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
detector.dataEnd();
fis.close();
encoding = detector.getDetectedCharset();
detector.reset();
if (encoding != null) {
return encoding;
} else {
return "UTF-8";
}
}
}