Package edu.buffalo.cse.ir.wikiindexer.indexer

Examples of edu.buffalo.cse.ir.wikiindexer.indexer.SharedDictionary


    long completed = 0, totalTasks = tpe.getTaskCount();
    long remaining = totalTasks - completed;
   
    IndexableDocument idoc = null;
    SharedDictionary docDict = new SharedDictionary(properties, INDEXFIELD.LINK);
    int currDocId;
    ThreadedIndexerRunner termRunner = new ThreadedIndexerRunner(properties);
    SingleIndexerRunner authIdxer = new SingleIndexerRunner(properties, INDEXFIELD.AUTHOR, INDEXFIELD.LINK, docDict, false);
    SingleIndexerRunner catIdxer = new SingleIndexerRunner(properties, INDEXFIELD.CATEGORY, INDEXFIELD.LINK, docDict, false);
    SingleIndexerRunner linkIdxer = new SingleIndexerRunner(properties, INDEXFIELD.LINK, INDEXFIELD.LINK, docDict, true);
    Map<String, Integer> tokenmap;
   
    try {
      while (remaining > 0) {
        idoc = pool.take().get();
        if (idoc != null) {
          currDocId = docDict.lookup(idoc.getDocumentIdentifier());
          TokenStream stream;
          try {
            for (INDEXFIELD fld : INDEXFIELD.values()) {
              stream = idoc.getStream(fld);

              if (stream != null) {
                tokenmap = stream.getTokenMap();

                if (tokenmap != null) {
                  switch (fld) {
                  case TERM:
                    termRunner.addToIndex(tokenmap,
                        currDocId);
                    break;
                  case AUTHOR:
                    authIdxer.processTokenMap(
                        currDocId, tokenmap);
                    break;
                  case CATEGORY:
                    catIdxer.processTokenMap(currDocId,
                        tokenmap);
                    break;
                  case LINK:
                    linkIdxer.processTokenMap(
                        currDocId, tokenmap);
                    break;
                  }
                }
              }

            }
          } catch (IndexerException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
          }
        }
       
        completed++;
       
        if (tokenizerThread.isAlive())
          totalTasks = tpe.getTaskCount();
       
        remaining = totalTasks - completed;
      }
    } catch (ExecutionException e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }

    try {
      termRunner.cleanup();
      authIdxer.cleanup();
      catIdxer.cleanup();
      linkIdxer.cleanup();
      docDict.writeToDisk();
      docDict.cleanUp();
    } catch (IndexerException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
   
View Full Code Here


    }
   
    System.out.println("Submitted tokenization: " + (System.currentTimeMillis() - start));
   
    IndexableDocument idoc;
    SharedDictionary docDict = new SharedDictionary(properties, INDEXFIELD.LINK);
    int currDocId;
    ThreadedIndexerRunner termRunner = new ThreadedIndexerRunner(properties);
    SingleIndexerRunner authIdxer = new SingleIndexerRunner(properties, INDEXFIELD.AUTHOR, INDEXFIELD.LINK, docDict, false);
    SingleIndexerRunner catIdxer = new SingleIndexerRunner(properties, INDEXFIELD.CATEGORY, INDEXFIELD.LINK, docDict, false);
    SingleIndexerRunner linkIdxer = new SingleIndexerRunner(properties, INDEXFIELD.LINK, INDEXFIELD.LINK, docDict, true);
    Map<String, Integer> tokenmap;
   
    System.out.println("Starting indexing.....");
    start = System.currentTimeMillis();
    double pctComplete = 0;
    for (int i = 0; i < numdocs; i++) {
      try {
        idoc = pool.take().get();
        if (idoc != null) {
          currDocId = docDict.lookup(idoc.getDocumentIdentifier());
          TokenStream stream;
          try {
            for (INDEXFIELD fld : INDEXFIELD.values()) {
              stream = idoc.getStream(fld);

              if (stream != null) {
                tokenmap = stream.getTokenMap();

                if (tokenmap != null) {
                  switch (fld) {
                  case TERM:
                    termRunner.addToIndex(tokenmap,
                        currDocId);
                    break;
                  case AUTHOR:
                    authIdxer.processTokenMap(
                        currDocId, tokenmap);
                    break;
                  case CATEGORY:
                    catIdxer.processTokenMap(currDocId,
                        tokenmap);
                    break;
                  case LINK:
                    linkIdxer.processTokenMap(
                        currDocId, tokenmap);
                    break;
                  }
                }
              }

            }
          } catch (IndexerException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
          }
        }
      } catch (ExecutionException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
     
      pctComplete = (i * 100.0d) / numdocs;
     
      if (pctComplete % 10 == 0) {
        System.out.println(pctComplete+ "% submission complete");
      }
    }
   
    System.out.println("Submitted all tasks in: " + (System.currentTimeMillis() - start));
   
    try {
      termRunner.cleanup();
      authIdxer.cleanup();
      catIdxer.cleanup();
      linkIdxer.cleanup();
      docDict.writeToDisk();
      docDict.cleanUp();
    } catch (IndexerException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
   
View Full Code Here

TOP

Related Classes of edu.buffalo.cse.ir.wikiindexer.indexer.SharedDictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.