Package com.ikanow.infinit.e.processing.generic.utils

Examples of com.ikanow.infinit.e.processing.generic.utils.PropertiesManager


  private double _dutyCycle_ratio = 0.5;
 
  // C'tor:
 
  public AssociationBackgroundAggregationManager() {
    PropertiesManager pm = new PropertiesManager();
    _dutyCycle_ratio = pm.getHarvestAggregationDutyCycle();
  }//TESTED
View Full Code Here


  private Map<String, SourcePojo> _sourceCache = new HashMap<String, SourcePojo>();
  private TreeSet<String> _deletedIndex = null;
 
  private void doTransfer(BasicDBObject query, int nSkip, int nLimit, boolean bAggregate, BasicDBObject chunk) throws IOException
  {   
    PropertiesManager pm = new PropertiesManager();
    int nMaxContentSize_bytes = pm.getMaxContentSize();
   
    // Initialize the DB:
   
    DBCollection docsDB = DbManager.getDocument().getMetadata();
    DBCollection contentDB = DbManager.getDocument().getContent();
View Full Code Here

 
  public void InitializeDatabase() {
    // Add indices:
    try
    {
      PropertiesManager pm = new PropertiesManager();
     
      ////////////////////////
      //
      // Remove old indexes, mostly just old code that is no longer needed
      //
      dropIndexIfItExists(DbManager.getDocument().getContent(), CompressedFullTextPojo.url_, 1);
      dropIndexIfItExists(DbManager.getDocument().getContent(), CompressedFullTextPojo.sourceKey_, 2);
      dropIndexIfItExists(DbManager.getDocument().getMetadata(), DocumentPojo.sourceUrl_, 1);
      dropIndexIfItExists(DbManager.getDocument().getMetadata(), DocumentPojo.sourceKey_, 1);
      dropIndexIfItExists(DbManager.getDocument().getMetadata(), DocumentPojo.title_, 1);
        // (Title simply not needed, that was a mistake from an early iteration)
      dropIndexIfItExists(DbManager.getDocument().getMetadata(), DocumentPojo.updateId_, 1);
      dropIndexIfItExists(DbManager.getSocial().getShare(), "type", 1);
      dropIndexIfItExists(DbManager.getSocial().getCookies(), "apiKey", 1);
      dropIndexIfItExists(DbManager.getCustom().getLookup(),CustomMapReduceJobPojo.jobidS_, 2);
      dropIndexIfItExists(DbManager.getCustom().getLookup(),CustomMapReduceJobPojo.waitingOn_, 2);
      // (see shard keys below, these legacy ones can appear if the DB is restored from a different machine's backup)
      dropIndexIfNotNeeded(DbManager.getDocument().getContent(), "sourceKey_1_url_1", 0, "sourceKey_2_url_2", 0);
      dropIndexIfNotNeeded(DbManager.getDocument().getMetadata(), "sourceKey_1__id_1", 0, "sourceKey_1__id_-1", 0);
     
      ////////////////////////
      //
      // Indexes needed for sharding:
      //
      // ** Content (has changed a bit)     
      BasicDBObject compIndex = new BasicDBObject(CompressedFullTextPojo.sourceKey_, 1);
      compIndex.put(CompressedFullTextPojo.url_, 1);
      addIndexIfNeeded(DbManager.getDocument().getContent(), "sourceKey_2_url_2", 0, compIndex); // (remove legacy 2_2 and replace with 1_1, which supports shards)
      // ** Metadata
      // Add {_id:1} to "standalone" sourceKey, sort docs matching source key by "time" (sort of!)
      compIndex = new BasicDBObject(DocumentPojo.sourceKey_, 1);
      compIndex.put(DocumentPojo._id_, 1);
      addIndexIfNeeded(DbManager.getDocument().getMetadata(), "sourceKey_1__id_-1", 0, compIndex); // (remove legacy 1_-1 and replace with 1_1, which supports shards)
      // ** Entities and associations
      DbManager.getFeature().getEntity().ensureIndex(new BasicDBObject(EntityFeaturePojo.index_, 1));
      DbManager.getFeature().getAssociation().ensureIndex(new BasicDBObject(AssociationFeaturePojo.index_, 1));
     
      ////////////////////////
      //
      // Other indexes
      //
      // Needed to handle updates of large files containing many URLs:
      DbManager.getDocument().getMetadata().ensureIndex(new BasicDBObject(DocumentPojo.sourceUrl_, 2), new BasicDBObject(MongoDbManager.sparse_, true));
      // Needed for duplicate checking
      // (Compound index lets me access {url, sourceKey}, {url} efficiently ... but need sourceKey separately to do {sourceKey})
      compIndex = new BasicDBObject(DocumentPojo.url_, 1);
      compIndex.put(DocumentPojo.sourceKey_, 1);
      DbManager.getDocument().getMetadata().ensureIndex(compIndex);
      // Needed to handle document updates
      DbManager.getDocument().getMetadata().ensureIndex(new BasicDBObject(DocumentPojo.updateId_, 2), new BasicDBObject(MongoDbManager.sparse_, true));
      // Needed to update documents' entities' doc counts
      if (!pm.getAggregationDisabled()) {
        compIndex = new BasicDBObject(EntityPojo.docQuery_index_, 1);
        compIndex.put(DocumentPojo.communityId_, 1);
        DbManager.getDocument().getMetadata().ensureIndex(compIndex);
      }
      // Needed for keeping source/community doc counts
View Full Code Here

 
  public void InitializeIndex(boolean bDeleteDocs, boolean bDeleteEntityFeature, boolean bDeleteEventFeature, boolean bRebuildDocsIndex) {
   
    try { //create elasticsearch indexes
     
      PropertiesManager pm = new PropertiesManager();
     
      if (!pm.getAggregationDisabled()) {
       
        boolean languageNormalization = pm.getNormalizeEncoding();
       
        Builder localSettingsEvent = ImmutableSettings.settingsBuilder();
        localSettingsEvent.put("number_of_shards", 1).put("number_of_replicas", 0);
        localSettingsEvent.put("index.analysis.analyzer.suggestAnalyzer.tokenizer", "standard");
        if (languageNormalization) {
View Full Code Here

 
  public static void createCommunityDocIndex(String nameOrCommunityIdStr, ObjectId parentCommunityId,
      boolean bPersonalGroup, boolean bSystemGroup, boolean bClearIndex, boolean bParentsOnly)
  {
    //create elasticsearch indexes
    PropertiesManager pm = new PropertiesManager();
    boolean languageNormalization = pm.getNormalizeEncoding();
    int nPreferredReplicas = pm.getMaxIndexReplicas();
   
    String docMapping = new Gson().toJson(new DocumentPojoIndexMap.Mapping(), DocumentPojoIndexMap.Mapping.class).replace("__AMP__", "@");
   
    String sGroupIndex = null; // for indexing, ie always a single index
    String sAliasIndex = null; // for querying, ie will point to doc_commid, doc_commid_1, etc
View Full Code Here

  {
    processDocuments(harvestType, toAdd, toUpdate_subsetOfAdd, toDelete, null);
  }
  public void processDocuments(int harvestType, List<DocumentPojo> toAdd, List<DocumentPojo> toUpdate_subsetOfAdd, List<DocumentPojo> toDelete, SourcePojo source)
  {
    PropertiesManager props = new PropertiesManager();
   
    // Note: toAdd = toAdd(old) + toUpdate
    // Need to treat updates as follows:
    // - Delete (inc children, eg events) but get fields to keep (currently _id, created; in the future comments etc)

    // Delete toUpdate and toAdd (also overwriting "created" for updated docs, well all actually...)
    toDelete.addAll(toUpdate_subsetOfAdd);
    StoreAndIndexManager storageManager = new StoreAndIndexManager();
    storageManager.removeFromDatastore_byURL(toDelete);
      // (note: expands toDelete if any sourceUrl "docs" are present, see FileHarvester)

    // (Storing docs messes up the doc/event/entity objects, so don't do that just yet...)
   
    // Aggregation:
    // 1+2. Create aggregate entities/events ("features") and write them to the DB
    // (then can store feeds - doesn't matter that the event/entities have been modified by the aggregation)
    // 3. (Scheduled for efficiency) Update all documents' frequencies based on new entities and events
    // 4. (Scheduled for efficiency) Synchronize with index [after this, queries can find them - so (2) must have happened]
      // (Syncronization currently "corrupts" the entities so needs to be run last)

    AggregationManager perSourceAggregation = null;
   
    if (!props.getAggregationDisabled()) {
      perSourceAggregation = new AggregationManager();
    }
   
    // 1+2]
    if (null != perSourceAggregation) {
View Full Code Here

    AssociationAggregationUtils.setDiagnosticMode(bMode);
    _diagnosticMode = bMode;
  }
 
  public AggregationManager() {
    PropertiesManager props = new PropertiesManager();
    double dDutyCycle = props.getHarvestAggregationDutyCycle();
    if (dDutyCycle > 0.0) { // Do most of the aggregation in a separate thread
      _bBackgroundAggregationEnabled = true;
    }
  }
View Full Code Here

  /////////////////////////////////////////////////////////////////////////////////////// 

  public static void updateEntitiesFromDeletedDocuments(String uuid)
  {   
    try {
      PropertiesManager props = new PropertiesManager();
      if (props.getAggregationDisabled()) { // (no need to do this)
        return;
      }   
      // Load string resource
     
      InputStream in = EntityAggregationUtils.class.getResourceAsStream("AggregationUtils_scriptlets.xml");
View Full Code Here

 
  public static void updateDocEntitiesFromDeletedDocuments(String uuid)
  {
    String outCollection = new StringBuilder(uuid).append("_AggregationUtils").toString();
    try {     
      PropertiesManager props = new PropertiesManager();
      if (props.getAggregationDisabled()) { // (no need to do this)
        return;
      }
      DBCollection outColl = DbManager.getDB("doc_metadata").getCollection(outCollection);
     
      DBCursor dbc = outColl.find();
View Full Code Here

  private double _dutyCycle_ratio = 0.5;
 
  // C'tor:
 
  public EntityBackgroundAggregationManager() {
    PropertiesManager pm = new PropertiesManager();
    _dutyCycle_ratio = pm.getHarvestAggregationDutyCycle();
  }//TESTED
View Full Code Here

TOP

Related Classes of com.ikanow.infinit.e.processing.generic.utils.PropertiesManager

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.