Source Code of org.apache.jena.tdbloader4.partitioners.TotalOrderPartitioner$TrieNode

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.jena.tdbloader4.partitioners;


import java.io.IOException;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;


import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.jena.tdbloader4.io.LongQuadWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
 * Partitioner effecting a total order by reading split points from an externally generated source.
 */
public class TotalOrderPartitioner<K extends WritableComparable<?>, V> extends Partitioner<K, V> implements Configurable {


  private static final Logger log = LoggerFactory.getLogger(TotalOrderPartitioner.class);


    @SuppressWarnings("rawtypes")
  private Map<String, Node> partitions = new HashMap<String, Node>(9); // nine indexes!
  public static final String DEFAULT_PATH = "_partition.lst";
  public static final String PARTITIONER_PATH = "mapreduce.totalorderpartitioner.path";
  public static final String MAX_TRIE_DEPTH = "mapreduce.totalorderpartitioner.trie.maxdepth";
  public static final String NATURAL_ORDER = "mapreduce.totalorderpartitioner.naturalorder";
  Configuration conf;


  private int numReduceTasks;
  
  public TotalOrderPartitioner() {
    log.debug("constructor()");
  }


  /**
   * Read in the partition file and build indexing data structures. If the
   * keytype is {@link org.apache.hadoop.io.BinaryComparable} and
   * <tt>total.order.partitioner.natural.order</tt> is not false, a trie of
   * the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
   * will be built. Otherwise, keys will be located using a binary search of
   * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
   * defined for this job. The input file must be sorted with the same
   * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
   */
  // keytype from conf not static
  public void setConf(Configuration conf) {
    log.debug("setConf({})", conf);
    this.conf = conf;
    init("GSPO", conf);
    init("GPOS", conf);
    init("GOSP", conf);
    init("SPOG", conf);
    init("POSG", conf);
    init("OSPG", conf);
    init("SPO", conf);
    init("POS", conf);
    init("OSP", conf);
    log.debug("setConf() finished.");    
  }
  
  @SuppressWarnings("unchecked")
  private void init(String indexName, Configuration conf) {
    log.debug("init({}, {})", indexName, conf);
    try {
      String parts = getPartitionFile(conf);
      final Path partFile = new Path(parts + "_" + indexName);
      final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(conf) // assume in DistributedCache
          : partFile.getFileSystem(conf);
      log.debug("FileSystem is {}", fs);
      Job job = new Job(conf);
      Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
      log.debug("Map output key class is {}", keyClass.getSimpleName());
      K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
      numReduceTasks = job.getNumReduceTasks();
      log.debug("Found {} split points, number of reducers is {}", splitPoints.length, numReduceTasks);
      if (splitPoints.length != (numReduceTasks / 9) - 1) {
        log.debug("Split points are {} which is different from {}", splitPoints.length, (numReduceTasks / 9) - 1);
        throw new IOException("Wrong number of partitions in keyset");
      }
      RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
      for (int i = 0; i < splitPoints.length - 1; ++i) {
        if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
          log.debug("Split points are out of order");
          throw new IOException("Split points are out of order");
        }
      }
      boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
      Node<?> partitions = null;
      if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
        partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
            // Now that blocks of identical splitless trie nodes are
            // represented reentrantly, and we develop a leaf for any trie
            // node with only one split point, the only reason for a depth
            // limit is to refute stack overflow or bloat in the pathological
            // case where the split points are long and mostly look like bytes
            // iii...iixii...iii . Therefore, we make the default
            // depth limit large but not huge.
            conf.getInt(MAX_TRIE_DEPTH, 200));
      } else {
        partitions = new BinarySearchNode(splitPoints, comparator);
      }
      log.debug("Adding {} to {}", partitions, this.partitions);
      this.partitions.put(indexName, partitions);
    } catch (IOException e) {
      throw new IllegalArgumentException("Can't read partitions file", e);
    }
    log.debug("init({}, {}) finished.", indexName, conf);
  }


  public Configuration getConf() {
    log.debug("getConf() = {}", conf);
    return conf;
  }


  // by construction, we know if our keytype
  @SuppressWarnings("unchecked")
  // is memcmp-able and uses the trie
  public int getPartition(K key, V value, int numPartitions) {
    LongQuadWritable quad = (LongQuadWritable)key;
    String indexName = quad.getIndexName();
    int indexOffset = ( numReduceTasks / 9 ) * LongQuadWritable.getIndexOffset(indexName) ;
    int indexPartition = partitions.get(indexName).findPartition(key);
    int partition = indexPartition + indexOffset;
    if ( log.isDebugEnabled() ) {
      log.debug("indexName = {}", indexName);
      log.debug("indexOffset = {}", indexOffset);
      log.debug("indexPartition = {}", indexPartition);
      log.debug("getPartition({}, {}, {}) = {}",
          new String[] { key.toString(), value.toString(),
          String.valueOf(numPartitions),
          String.valueOf(partition) });
    }
    return partition;
  }


  /**
   * Set the path to the SequenceFile storing the sorted partition keyset. It
   * must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt> keys
   * in the SequenceFile.
   */
  public static void setPartitionFile(Configuration conf, Path p) {
    log.debug("setPartitionFile({}, {})", conf, p);
    conf.set(PARTITIONER_PATH, p.toString());
  }


  /**
   * Get the path to the SequenceFile storing the sorted partition keyset.
   * 
   * @see #setPartitionFile(Configuration, Path)
   */
  public static String getPartitionFile(Configuration conf) {
    String p = conf.get(PARTITIONER_PATH, DEFAULT_PATH); 
    log.debug("getPartitionFile({}) = {}", conf, p);
    return p;
  }


  /**
   * Interface to the partitioner to locate a key in the partition keyset.
   */
  interface Node<T> {
    /**
     * Locate partition in keyset K, st [Ki..Ki+1) defines a partition, with
     * implicit K0 = -inf, Kn = +inf, and |K| = #partitions - 1.
     */
    int findPartition(T key);
  }


  /**
   * Base class for trie nodes. If the keytype is memcomp-able, this builds
   * tries of the first <tt>total.order.partitioner.max.trie.depth</tt> bytes.
   */
  static abstract class TrieNode implements Node<BinaryComparable> {
    private final int level;


    TrieNode(int level) {
      this.level = level;
    }


    int getLevel() {
      return level;
    }
  }


  /**
   * For types that are not {@link org.apache.hadoop.io.BinaryComparable} or
   * where disabled by <tt>total.order.partitioner.natural.order</tt>, search
   * the partition keyset with a binary search.
   */
  class BinarySearchNode implements Node<K> {
    private final K[] splitPoints;
    private final RawComparator<K> comparator;


    BinarySearchNode(K[] splitPoints, RawComparator<K> comparator) {
      this.splitPoints = splitPoints;
      this.comparator = comparator;
    }


    public int findPartition(K key) {
      final int pos = Arrays.binarySearch(splitPoints, key, comparator) + 1;
      return (pos < 0) ? -pos : pos;
    }
  }


  /**
   * An inner trie node that contains 256 children based on the next character.
   */
  class InnerTrieNode extends TrieNode {
    private TrieNode[] child = new TrieNode[256];


    InnerTrieNode(int level) {
      super(level);
    }


    public int findPartition(BinaryComparable key) {
      int level = getLevel();
      if (key.getLength() <= level) {
        return child[0].findPartition(key);
      }
      return child[0xFF & key.getBytes()[level]].findPartition(key);
    }
  }


  /**
   * @param level the tree depth at this node
   * @param splitPoints the full split point vector, which holds the split point or points this leaf node should contain
   * @param lower first INcluded element of splitPoints
   * @param upper first EXcluded element of splitPoints
   * @return a leaf node. They come in three kinds: no split points [and the
   *         findParttion returns a canned index], one split point [and we
   *         compare with a single comparand], or more than one [and we do a
   *         binary search]. The last case is rare.
   */
  private TrieNode LeafTrieNodeFactory(int level,
      BinaryComparable[] splitPoints, int lower, int upper) {
    switch (upper - lower) {
    case 0:
      return new UnsplitTrieNode(level, lower);


    case 1:
      return new SinglySplitTrieNode(level, splitPoints, lower);


    default:
      return new LeafTrieNode(level, splitPoints, lower, upper);
    }
  }


  /**
   * A leaf trie node that scans for the key between lower..upper.
   * 
   * We don't generate many of these now, since we usually continue trie-ing
   * when more than one split point remains at this level. and we make
   * different objects for nodes with 0 or 1 split point.
   */
  private class LeafTrieNode extends TrieNode {
    final int lower;
    final int upper;
    final BinaryComparable[] splitPoints;


    LeafTrieNode(int level, BinaryComparable[] splitPoints, int lower, int upper) {
      super(level);
      this.lower = lower;
      this.upper = upper;
      this.splitPoints = splitPoints;
    }


    public int findPartition(BinaryComparable key) {
      final int pos = Arrays.binarySearch(splitPoints, lower, upper, key) + 1;
      return (pos < 0) ? -pos : pos;
    }
  }


  private class UnsplitTrieNode extends TrieNode {
    final int result;


    UnsplitTrieNode(int level, int value) {
      super(level);
      this.result = value;
    }


    public int findPartition(BinaryComparable key) {
      return result;
    }
  }


  private class SinglySplitTrieNode extends TrieNode {
    final int lower;
    final BinaryComparable mySplitPoint;


    SinglySplitTrieNode(int level, BinaryComparable[] splitPoints, int lower) {
      super(level);
      this.lower = lower;
      this.mySplitPoint = splitPoints[lower];
    }


    public int findPartition(BinaryComparable key) {
      return lower + (key.compareTo(mySplitPoint) < 0 ? 0 : 1);
    }
  }


  /**
   * Read the cut points from the given IFile.
   * 
   * @param fs The file system
   * @param p The path to read
   * @param keyClass The map output key class
   * @param job The job config
   * @throws IOException
   */
  // matching key types enforced by passing in
  @SuppressWarnings("unchecked")
  // map output key class
  private K[] readPartitions(FileSystem fs, Path p, Class<K> keyClass, Configuration conf) throws IOException {
    log.debug("readPartitions({}, {}, {}, {})", new Object[]{fs, p, keyClass.getSimpleName(), conf});
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf);
    log.debug("SequenceFile.Reader is {}", reader);
    log.debug("SequenceFile.Reader position is {}", reader.getPosition());
    ArrayList<K> parts = new ArrayList<K>();
    K key = ReflectionUtils.newInstance(keyClass, conf);
    NullWritable value = NullWritable.get();
    while (reader.next(key, value)) {
      log.debug("Partition key {}", key);
      parts.add(key);
      key = ReflectionUtils.newInstance(keyClass, conf);
    }
    reader.close();
    return parts.toArray((K[]) Array.newInstance(keyClass, parts.size()));
  }


  /**
   * 
   * This object contains a TrieNodeRef if there is such a thing that can be
   * repeated. Two adjacent trie node slots that contain no split points can
   * be filled with the same trie node, even if they are not on the same
   * level. See buildTreeRec, below.
   * 
   */
  private class CarriedTrieNodeRef {
    TrieNode content;


    CarriedTrieNodeRef() {
      content = null;
    }
  }


  /**
   * Given a sorted set of cut points, build a trie that will find the correct
   * partition quickly.
   * 
   * @param splits
   *            the list of cut points
   * @param lower
   *            the lower bound of partitions 0..numPartitions-1
   * @param upper
   *            the upper bound of partitions 0..numPartitions-1
   * @param prefix
   *            the prefix that we have already checked against
   * @param maxDepth
   *            the maximum depth we will build a trie for
   * @return the trie node that will divide the splits correctly
   */
  private TrieNode buildTrie(BinaryComparable[] splits, int lower, int upper, byte[] prefix, int maxDepth) {
    return buildTrieRec(splits, lower, upper, prefix, maxDepth, new CarriedTrieNodeRef());
  }


  /**
   * This is the core of buildTrie. The interface, and stub, above, just adds
   * an empty CarriedTrieNodeRef.
   * 
   * We build trie nodes in depth first order, which is also in key space
   * order. Every leaf node is referenced as a slot in a parent internal node.
   * If two adjacent slots [in the DFO] hold leaf nodes that have no split
   * point, then they are not separated by a split point either, because
   * there's no place in key space for that split point to exist.
   * 
   * When that happens, the leaf nodes would be semantically identical, and we
   * reuse the object. A single CarriedTrieNodeRef "ref" lives for the
   * duration of the tree-walk. ref carries a potentially reusable, unsplit
   * leaf node for such reuse until a leaf node with a split arises, which
   * breaks the chain until we need to make a new unsplit leaf node.
   * 
   * Note that this use of CarriedTrieNodeRef means that for internal nodes,
   * for internal nodes if this code is modified in any way we still need to
   * make or fill in the subnodes in key space order.
   */
  private TrieNode buildTrieRec(BinaryComparable[] splits, int lower, int upper, byte[] prefix, int maxDepth, CarriedTrieNodeRef ref) {
    final int depth = prefix.length;
    // We generate leaves for a single split point as well as for
    // no split points.
    if (depth >= maxDepth || lower >= upper - 1) {
      // If we have two consecutive requests for an unsplit trie node, we
      // can deliver the same one the second time.
      if (lower == upper && ref.content != null) {
        return ref.content;
      }
      TrieNode result = LeafTrieNodeFactory(depth, splits, lower, upper);
      ref.content = lower == upper ? result : null;
      return result;
    }
    InnerTrieNode result = new InnerTrieNode(depth);
    byte[] trial = Arrays.copyOf(prefix, prefix.length + 1);
    // append an extra byte on to the prefix
    int currentBound = lower;
    for (int ch = 0; ch < 0xFF; ++ch) {
      trial[depth] = (byte) (ch + 1);
      lower = currentBound;
      while (currentBound < upper) {
        if (splits[currentBound].compareTo(trial, 0, trial.length) >= 0) {
          break;
        }
        currentBound += 1;
      }
      trial[depth] = (byte) ch;
      result.child[0xFF & ch] = buildTrieRec(splits, lower, currentBound, trial, maxDepth, ref);
    }
    // pick up the rest
    trial[depth] = (byte) 0xFF;
    result.child[0xFF] = buildTrieRec(splits, lower, currentBound, trial, maxDepth, ref);


    return result;
  }


}
Source Code of org.apache.jena.tdbloader4.partitioners.TotalOrderPartitioner$TrieNode

Related Classes of org.apache.jena.tdbloader4.partitioners.TotalOrderPartitioner$TrieNode