Examples of MongoInputSplit


Examples of com.mongodb.hadoop.input.MongoInputSplit

      Iterator<InputSplit> splitIt = splits.iterator();
      while (splitIt.hasNext()) {
        try {
          orderedArraySet_afterMin.clear();
         
          MongoInputSplit mongoSplit = (MongoInputSplit)splitIt.next();
          BasicDBObject min = (BasicDBObject) mongoSplit.getQuerySpec().get("$min");
          BasicDBObject max = (BasicDBObject) mongoSplit.getQuerySpec().get("$max");
         
          //DEBUG
          //_logger.info("+----------------- NEW SPLIT ----------------: " + min + " /" + max);
          //System.out.println("+----------------- NEW SPLIT ----------------: " + min + " /" + max);
         
          if (null != min) { // How does the min fit in with the general query
            try {
              if (compareFields(-1, originalQuery, min, max, orderedArraySet, orderedArraySet_afterMin) < 0) {
                splitIt.remove();
                continue;
              }
            }
            catch (Exception e) {} // do nothing probably just some comparable issue
          }//TESTED
         
          if (null != max) { // How does the min fit in with the general query
            try {
              if (compareFields(1, originalQuery, max, min, orderedArraySet, orderedArraySet_afterMin) > 0) {
                splitIt.remove();
                continue;
              }
            }
            catch (Exception e) {} // do nothing probably just some comparable issue
          }//TESTED
         
          //DEBUG
          //_logger.info("(retained split)");
          //System.out.println("(retained split)");
         
          // (don't worry about edge cases, won't happen very often and will just result in a spurious empty mapper)
         
          ////////////////////////////////
         
          // Now some infinit.e specific processing...
         
          if (newShardScheme) {
            @SuppressWarnings("rawtypes")
            TreeSet<Comparable> sourceKeyOrderedArray = orderedArraySet.get(DocumentPojo.sourceKey_);
            if ((null != sourceKeyOrderedArray) && !sourceKeyOrderedArray.isEmpty()) {
              @SuppressWarnings("rawtypes")
              Comparable minSourceKey = null;
              Object minSourceKeyObj = (null == min) ? null : min.get(DocumentPojo.sourceKey_);
              if (minSourceKeyObj instanceof String) {
                minSourceKey = (String)minSourceKeyObj;
              }
              if (null == minSourceKey) {
                minSourceKey = sourceKeyOrderedArray.first();
              }//TESTED
              @SuppressWarnings("rawtypes")
              Comparable maxSourceKey = null;
              Object maxSourceKeyObj = (null == max) ? null : max.get(DocumentPojo.sourceKey_);
              if (maxSourceKeyObj instanceof String) {
                maxSourceKey = (String)maxSourceKeyObj;
              }
              if (null == maxSourceKey) {
                maxSourceKey = sourceKeyOrderedArray.last();
              }//TESTED
             
              DBObject splitQuery = mongoSplit.getQuerySpec();
              BasicDBObject splitQueryQuery = new BasicDBObject((BasicBSONObject) splitQuery.get("$query"));             
              if (0 == minSourceKey.compareTo(maxSourceKey)) { // single matching sourceKEy
                splitQueryQuery.put(DocumentPojo.sourceKey_, maxSourceKey);
              }//TESTED (array of sources, only one matches)
              else { // multiple matching source keys
                splitQueryQuery.put(DocumentPojo.sourceKey_,
                    new BasicDBObject(DbManager.in_, sourceKeyOrderedArray.subSet(minSourceKey, true, maxSourceKey, true)));
              }//TESTED (array of sources, multiple match)         
              newsplits.add(new InfiniteMongoInputSplit(mongoSplit, splitQueryQuery, conf.isNoTimeout()));                             
            }
            else { // original query is of sufficient simplicity
              newsplits.add(new InfiniteMongoInputSplit(mongoSplit, originalQuery, conf.isNoTimeout()));             
            }//TESTED (no change to existing source)
           
          }//TESTED
          else { // old sharding scheme, remove min/max and replace with normal _id based query where possible
           
            DBObject splitQuery = mongoSplit.getQuerySpec();
            // Step 1: create a query range for _id:
            BasicDBObject idRange = null;
            Object idMin = (min == null) ? null : min.get(DocumentPojo._id_);
            Object idMax = (max == null) ? null : max.get(DocumentPojo._id_);
            if (!(idMin instanceof ObjectId))
View Full Code Here

Examples of com.mongodb.hadoop.input.MongoInputSplit

  @Override
    public RecordReader<Object, BSONObject> createRecordReader( InputSplit split, TaskAttemptContext context ){
        if ( !( split instanceof MongoInputSplit ) )
            throw new IllegalStateException( "Creation of a new RecordReader requires a MongoInputSplit instance." );

        final MongoInputSplit mis = (MongoInputSplit) split;

        return new InfiniteMongoRecordReader( mis );
   
View Full Code Here

Examples of com.mongodb.hadoop.input.MongoInputSplit

    @Test
    public void testNullBounds() throws Exception {
        Configuration config = new Configuration();
        StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config);
        MongoInputSplit split = splitter.createSplitFromBounds(null, null);
        assertEquals(new BasicDBObject(), split.getMin());
        assertEquals(new BasicDBObject(), split.getMax());
    }
View Full Code Here

Examples of com.mongodb.hadoop.input.MongoInputSplit

    @Test
    public void testNullLowerBound() throws Exception {
        Configuration config = new Configuration();
        StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config);
        BasicDBObject upperBound = new BasicDBObject("a", 10);
        MongoInputSplit split = splitter.createSplitFromBounds(null, upperBound);
        assertEquals(new BasicDBObject(), split.getMin());
        assertEquals(10, split.getMax().get("a"));
    }
View Full Code Here

Examples of com.mongodb.hadoop.input.MongoInputSplit

    @Test
    public void testNullUpperBound() throws Exception {
        Configuration config = new Configuration();
        StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config);
        BasicDBObject lowerBound = new BasicDBObject("a", 10);
        MongoInputSplit split = splitter.createSplitFromBounds(lowerBound, null);
        assertEquals(10, split.getMin().get("a"));
        assertEquals(new BasicDBObject(), split.getMax());
    }
View Full Code Here

Examples of com.mongodb.hadoop.input.MongoInputSplit

    public void testLowerUpperBounds() throws Exception {
        Configuration config = new Configuration();
        StandaloneMongoSplitter splitter = new StandaloneMongoSplitter(config);
        BasicDBObject lowerBound = new BasicDBObject("a", 0);
        BasicDBObject upperBound = new BasicDBObject("a", 10);
        MongoInputSplit split = splitter.createSplitFromBounds(lowerBound, upperBound);
        assertEquals(0, split.getMin().get("a"));
        assertEquals(10, split.getMax().get("a"));
    }
View Full Code Here

Examples of com.mongodb.hadoop.input.MongoInputSplit

                                                                                             final Reporter reporter) {
        if (!(split instanceof MongoInputSplit)) {
            throw new IllegalStateException("Creation of a new RecordReader requires a MongoInputSplit instance.");
        }

        final MongoInputSplit mis = (MongoInputSplit) split;

        return new MongoRecordReader(mis);
    }
View Full Code Here

Examples of com.mongodb.hadoop.input.MongoInputSplit

    public RecordReader<Object, BSONObject> createRecordReader(final InputSplit split, final TaskAttemptContext context) {
        if (!(split instanceof MongoInputSplit)) {
            throw new IllegalStateException("Creation of a new RecordReader requires a MongoInputSplit instance.");
        }

        final MongoInputSplit mis = (MongoInputSplit) split;

        return new com.mongodb.hadoop.input.MongoRecordReader(mis);
    }
View Full Code Here

Examples of com.mongodb.hadoop.input.MongoInputSplit

                    splitMax.put(key, val);
                }
            }
        }

        MongoInputSplit split = null;

        //If enabled, attempt to build the split using $gt/$lte
        DBObject query = MongoConfigUtil.getQuery(getConfiguration());
        if (MongoConfigUtil.isRangeQueryEnabled(getConfiguration())) {
            try {
                query = MongoConfigUtil.getQuery(getConfiguration());
                split = createRangeQuerySplit(lowerBound, upperBound, query);
            } catch (Exception e) {
                throw new SplitFailedException("Couldn't use range query to create split: " + e.getMessage());
            }
        }
        if (split == null) {
            split = new MongoInputSplit();
            BasicDBObject splitQuery = new BasicDBObject();
            if (query != null) {
                splitQuery.putAll(query);
            }
            split.setQuery(splitQuery);
            split.setMin(splitMin);
            split.setMax(splitMax);
        }
        split.setInputURI(MongoConfigUtil.getInputURI(getConfiguration()));
        split.setAuthURI(MongoConfigUtil.getAuthURI(getConfiguration()));
        split.setNoTimeout(MongoConfigUtil.isNoTimeout(getConfiguration()));
        split.setFields(MongoConfigUtil.getFields(getConfiguration()));
        split.setSort(MongoConfigUtil.getSort(getConfiguration()));
        return split;
    }
View Full Code Here

Examples of com.mongodb.hadoop.input.MongoInputSplit

        //If the boundaries are actually empty, just return
        //a split without boundaries.
        if (chunkLowerBound == null && chunkUpperBound == null) {
            DBObject splitQuery = new BasicDBObject();
            splitQuery.putAll(query);
            MongoInputSplit split = new MongoInputSplit();
            split.setQuery(splitQuery);
            return split;
        }

        //The boundaries are not empty, so try to build
        //a split using $gt/$lte.

        //First check that the split contains no compound keys.
        // e.g. this is valid: { _id : "foo" }
        // but this is not {_id : "foo", name : "bar"}
        Map.Entry<String, Object> minKey = chunkLowerBound != null && chunkLowerBound.keySet().size() == 1
                                           ? chunkLowerBound.entrySet().iterator().next() : null;
        Map.Entry<String, Object> maxKey = chunkUpperBound != null && chunkUpperBound.keySet().size() == 1
                                           ? chunkUpperBound.entrySet().iterator().next() : null;
        if (minKey == null && maxKey == null) {
            throw new IllegalArgumentException("Range query is enabled but one or more split boundaries contains a compound key:\n"
                                               + "min:  " + chunkLowerBound + "\nmax:  " + chunkUpperBound);
        }

        //Now, check that the lower and upper bounds don't have any keys
        //which overlap with the query.
        if (minKey != null && query.containsField(minKey.getKey()) || maxKey != null && query.containsField(maxKey.getKey())) {
            throw new IllegalArgumentException("Range query is enabled but split key conflicts with query filter:\n"
                                               + "min:  " + chunkLowerBound + "\nmax:  " + chunkUpperBound + "\nquery:  " + query);
        }

        String key = null;
        BasicDBObject rangeObj = new BasicDBObject();
        if (minKey != null) {
            key = minKey.getKey();
            rangeObj.put("$gte", minKey.getValue());
        }

        if (maxKey != null) {
            key = maxKey.getKey();
            rangeObj.put("$lt", maxKey.getValue());
        }

        DBObject splitQuery = new BasicDBObject();
        splitQuery.putAll(query);
        splitQuery.put(key, rangeObj);
        MongoInputSplit split = new MongoInputSplit();
        split.setQuery(splitQuery);
        return split;
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.