Source Code of org.apache.lucene.codecs.diskdv.DiskDocValuesProducer$BinaryEntry

package org.apache.lucene.codecs.diskdv;


/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.DELTA_COMPRESSED;
import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.GCD_COMPRESSED;
import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.TABLE_COMPRESSED;


import java.io.IOException;
import java.util.HashMap;
import java.util.Map;


import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.BlockPackedReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
import org.apache.lucene.util.packed.PackedInts;


class DiskDocValuesProducer extends DocValuesProducer {
  private final Map<Integer,NumericEntry> numerics;
  private final Map<Integer,BinaryEntry> binaries;
  private final Map<Integer,NumericEntry> ords;
  private final Map<Integer,NumericEntry> ordIndexes;
  private final IndexInput data;


  // memory-resident structures
  private final Map<Integer,BlockPackedReader> ordinalInstances = new HashMap<Integer,BlockPackedReader>();
  private final Map<Integer,MonotonicBlockPackedReader> addressInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
  private final Map<Integer,MonotonicBlockPackedReader> ordIndexInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
  
  DiskDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
    // read in the entries from the metadata file.
    IndexInput in = state.directory.openInput(metaName, state.context);
    boolean success = false;
    final int version;
    try {
      version = CodecUtil.checkHeader(in, metaCodec, 
                                      DiskDocValuesFormat.VERSION_START,
                                      DiskDocValuesFormat.VERSION_CURRENT);
      numerics = new HashMap<Integer,NumericEntry>();
      ords = new HashMap<Integer,NumericEntry>();
      ordIndexes = new HashMap<Integer,NumericEntry>();
      binaries = new HashMap<Integer,BinaryEntry>();
      readFields(in, state.fieldInfos);


      success = true;
    } finally {
      if (success) {
        IOUtils.close(in);
      } else {
        IOUtils.closeWhileHandlingException(in);
      }
    }


    success = false;
    try {
      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
      data = state.directory.openInput(dataName, state.context);
      final int version2 = CodecUtil.checkHeader(data, dataCodec, 
                                                 DiskDocValuesFormat.VERSION_START,
                                                 DiskDocValuesFormat.VERSION_CURRENT);
      if (version != version2) {
        throw new CorruptIndexException("Format versions mismatch");
      }


      success = true;
    } finally {
      if (!success) {
        IOUtils.closeWhileHandlingException(this.data);
      }
    }
  }
  
  private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
    int fieldNumber = meta.readVInt();
    while (fieldNumber != -1) {
      byte type = meta.readByte();
      if (type == DiskDocValuesFormat.NUMERIC) {
        numerics.put(fieldNumber, readNumericEntry(meta));
      } else if (type == DiskDocValuesFormat.BINARY) {
        BinaryEntry b = readBinaryEntry(meta);
        binaries.put(fieldNumber, b);
      } else if (type == DiskDocValuesFormat.SORTED) {
        // sorted = binary + numeric
        if (meta.readVInt() != fieldNumber) {
          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
        }
        if (meta.readByte() != DiskDocValuesFormat.BINARY) {
          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
        }
        BinaryEntry b = readBinaryEntry(meta);
        binaries.put(fieldNumber, b);
        
        if (meta.readVInt() != fieldNumber) {
          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
        }
        if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
          throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
        }
        NumericEntry n = readNumericEntry(meta);
        ords.put(fieldNumber, n);
      } else if (type == DiskDocValuesFormat.SORTED_SET) {
        // sortedset = binary + numeric + ordIndex
        if (meta.readVInt() != fieldNumber) {
          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
        }
        if (meta.readByte() != DiskDocValuesFormat.BINARY) {
          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
        }
        BinaryEntry b = readBinaryEntry(meta);
        binaries.put(fieldNumber, b);
        
        if (meta.readVInt() != fieldNumber) {
          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
        }
        if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
        }
        NumericEntry n1 = readNumericEntry(meta);
        ords.put(fieldNumber, n1);
        
        if (meta.readVInt() != fieldNumber) {
          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
        }
        if (meta.readByte() != DiskDocValuesFormat.NUMERIC) {
          throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")");
        }
        NumericEntry n2 = readNumericEntry(meta);
        ordIndexes.put(fieldNumber, n2);
      } else {
        throw new CorruptIndexException("invalid type: " + type + ", resource=" + meta);
      }
      fieldNumber = meta.readVInt();
    }
  }
  
  static NumericEntry readNumericEntry(IndexInput meta) throws IOException {
    NumericEntry entry = new NumericEntry();
    entry.format = meta.readVInt();
    entry.packedIntsVersion = meta.readVInt();
    entry.offset = meta.readLong();
    entry.count = meta.readVLong();
    entry.blockSize = meta.readVInt();
    switch(entry.format) {
      case GCD_COMPRESSED:
        entry.minValue = meta.readLong();
        entry.gcd = meta.readLong();
        break;
      case TABLE_COMPRESSED:
        if (entry.count > Integer.MAX_VALUE) {
          throw new CorruptIndexException("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta);
        }
        final int uniqueValues = meta.readVInt();
        if (uniqueValues > 256) {
          throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta);
        }
        entry.table = new long[uniqueValues];
        for (int i = 0; i < uniqueValues; ++i) {
          entry.table[i] = meta.readLong();
        }
        break;
      case DELTA_COMPRESSED:
        break;
      default:
        throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
    }
    return entry;
  }
  
  static BinaryEntry readBinaryEntry(IndexInput meta) throws IOException {
    BinaryEntry entry = new BinaryEntry();
    entry.minLength = meta.readVInt();
    entry.maxLength = meta.readVInt();
    entry.count = meta.readVLong();
    entry.offset = meta.readLong();
    if (entry.minLength != entry.maxLength) {
      entry.addressesOffset = meta.readLong();
      entry.packedIntsVersion = meta.readVInt();
      entry.blockSize = meta.readVInt();
    }
    return entry;
  }


  @Override
  public NumericDocValues getNumeric(FieldInfo field) throws IOException {
    NumericEntry entry = numerics.get(field.number);
    return getNumeric(entry);
  }
  
  LongNumericDocValues getNumeric(NumericEntry entry) throws IOException {
    final IndexInput data = this.data.clone();
    data.seek(entry.offset);


    switch (entry.format) {
      case DELTA_COMPRESSED:
        final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
        return new LongNumericDocValues() {
          @Override
          public long get(long id) {
            return reader.get(id);
          }
        };
      case GCD_COMPRESSED:
        final long min = entry.minValue;
        final long mult = entry.gcd;
        final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
        return new LongNumericDocValues() {
          @Override
          public long get(long id) {
            return min + mult * quotientReader.get(id);
          }
        };
      case TABLE_COMPRESSED:
        final long table[] = entry.table;
        final int bitsRequired = PackedInts.bitsRequired(table.length - 1);
        final PackedInts.Reader ords = PackedInts.getDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.packedIntsVersion, (int) entry.count, bitsRequired);
        return new LongNumericDocValues() {
          @Override
          public long get(long id) {
            return table[(int) ords.get((int) id)];
          }
        };
      default:
        throw new AssertionError();
    }
  }


  @Override
  public BinaryDocValues getBinary(FieldInfo field) throws IOException {
    BinaryEntry bytes = binaries.get(field.number);
    if (bytes.minLength == bytes.maxLength) {
      return getFixedBinary(field, bytes);
    } else {
      return getVariableBinary(field, bytes);
    }
  }
  
  private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
    final IndexInput data = this.data.clone();


    return new LongBinaryDocValues() {
      @Override
      public void get(long id, BytesRef result) {
        long address = bytes.offset + id * bytes.maxLength;
        try {
          data.seek(address);
          // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource) 
          // assume "they" own the bytes after calling this!
          final byte[] buffer = new byte[bytes.maxLength];
          data.readBytes(buffer, 0, buffer.length);
          result.bytes = buffer;
          result.offset = 0;
          result.length = buffer.length;
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
    };
  }
  
  private BinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException {
    final IndexInput data = this.data.clone();
    
    final MonotonicBlockPackedReader addresses;
    synchronized (addressInstances) {
      MonotonicBlockPackedReader addrInstance = addressInstances.get(field.number);
      if (addrInstance == null) {
        data.seek(bytes.addressesOffset);
        addrInstance = new MonotonicBlockPackedReader(data, bytes.packedIntsVersion, bytes.blockSize, bytes.count, false);
        addressInstances.put(field.number, addrInstance);
      }
      addresses = addrInstance;
    }


    return new LongBinaryDocValues() {
      @Override
      public void get(long id, BytesRef result) {
        long startAddress = bytes.offset + (id == 0 ? 0 : addresses.get(id-1));
        long endAddress = bytes.offset + addresses.get(id);
        int length = (int) (endAddress - startAddress);
        try {
          data.seek(startAddress);
          // NOTE: we could have one buffer, but various consumers (e.g. FieldComparatorSource) 
          // assume "they" own the bytes after calling this!
          final byte[] buffer = new byte[length];
          data.readBytes(buffer, 0, buffer.length);
          result.bytes = buffer;
          result.offset = 0;
          result.length = length;
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
    };
  }


  @Override
  public SortedDocValues getSorted(FieldInfo field) throws IOException {
    final int valueCount = (int) binaries.get(field.number).count;
    final BinaryDocValues binary = getBinary(field);
    final BlockPackedReader ordinals;
    synchronized (ordinalInstances) {
      BlockPackedReader ordsInstance = ordinalInstances.get(field.number);
      if (ordsInstance == null) {
        NumericEntry entry = ords.get(field.number);
        IndexInput data = this.data.clone();
        data.seek(entry.offset);
        ordsInstance = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, false);
        ordinalInstances.put(field.number, ordsInstance);
      }
      ordinals = ordsInstance;
    }
    return new SortedDocValues() {


      @Override
      public int getOrd(int docID) {
        return (int) ordinals.get(docID);
      }


      @Override
      public void lookupOrd(int ord, BytesRef result) {
        binary.get(ord, result);
      }


      @Override
      public int getValueCount() {
        return valueCount;
      }
    };
  }


  @Override
  public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
    final long valueCount = binaries.get(field.number).count;
    // we keep the byte[]s and list of ords on disk, these could be large
    final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
    final LongNumericDocValues ordinals = getNumeric(ords.get(field.number));
    // but the addresses to the ord stream are in RAM
    final MonotonicBlockPackedReader ordIndex;
    synchronized (ordIndexInstances) {
      MonotonicBlockPackedReader ordIndexInstance = ordIndexInstances.get(field.number);
      if (ordIndexInstance == null) {
        NumericEntry entry = ordIndexes.get(field.number);
        IndexInput data = this.data.clone();
        data.seek(entry.offset);
        ordIndexInstance = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, false);
        ordIndexInstances.put(field.number, ordIndexInstance);
      }
      ordIndex = ordIndexInstance;
    }
    
    return new SortedSetDocValues() {
      long offset;
      long endOffset;
      
      @Override
      public long nextOrd() {
        if (offset == endOffset) {
          return NO_MORE_ORDS;
        } else {
          long ord = ordinals.get(offset);
          offset++;
          return ord;
        }
      }


      @Override
      public void setDocument(int docID) {
        offset = (docID == 0 ? 0 : ordIndex.get(docID-1));
        endOffset = ordIndex.get(docID);
      }


      @Override
      public void lookupOrd(long ord, BytesRef result) {
        binary.get(ord, result);
      }


      @Override
      public long getValueCount() {
        return valueCount;
      }
    };
  }


  @Override
  public void close() throws IOException {
    data.close();
  }
  
  static class NumericEntry {
    long offset;


    int format;
    int packedIntsVersion;
    long count;
    int blockSize;
    
    long minValue;
    long gcd;
    long table[];
  }
  
  static class BinaryEntry {
    long offset;


    long count;
    int minLength;
    int maxLength;
    long addressesOffset;
    int packedIntsVersion;
    int blockSize;
  }
  
  // internally we compose complex dv (sorted/sortedset) from other ones
  static abstract class LongNumericDocValues extends NumericDocValues {
    @Override
    public final long get(int docID) {
      return get((long) docID);
    }
    
    abstract long get(long id);
  }
  
  static abstract class LongBinaryDocValues extends BinaryDocValues {
    @Override
    public final void get(int docID, BytesRef result) {
      get((long)docID, result);
    }
    
    abstract void get(long id, BytesRef Result);
  }
}
Source Code of org.apache.lucene.codecs.diskdv.DiskDocValuesProducer$BinaryEntry

Related Classes of org.apache.lucene.codecs.diskdv.DiskDocValuesProducer$BinaryEntry