Package org.apache.lucene.util.fst

Source Code of org.apache.lucene.util.fst.BytesStore

package org.apache.lucene.util.fst;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.blur.lucene.fst.ByteArray;
import org.apache.blur.lucene.fst.ByteArrayFactory;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;

// TODO: merge with PagedBytes, except PagedBytes doesn't
// let you read while writing which FST needs

class BytesStore extends DataOutput {

  private final List<ByteArray> blocks = new ArrayList<ByteArray>();
  private final static ByteArrayFactory factory = ByteArrayFactory.getDefaultFactory();

  private final int blockSize;
  private final int blockBits;
  private final int blockMask;

  private ByteArray current;
  private int nextWrite;

  public BytesStore(int blockBits) {
    this.blockBits = blockBits;
    blockSize = 1 << blockBits;
    blockMask = blockSize - 1;
    nextWrite = blockSize;
  }

  /** Pulls bytes from the provided IndexInput. */
  public BytesStore(DataInput in, long numBytes, int maxBlockSize) throws IOException {
    int blockSize = 2;
    int blockBits = 1;
    while (blockSize < numBytes && blockSize < maxBlockSize) {
      blockSize *= 2;
      blockBits++;
    }
    this.blockBits = blockBits;
    this.blockSize = blockSize;
    this.blockMask = blockSize - 1;
    long left = numBytes;
    while (left > 0) {
      final int chunk = (int) Math.min(blockSize, left);
      ByteArray block = factory.newByteArray(chunk);
      block.readBytes(in, 0, block.length());
      blocks.add(block);
      left -= chunk;
    }

    // So .getPosition still works
    nextWrite = blocks.get(blocks.size() - 1).length();
  }

  /**
   * Absolute write byte; you must ensure dest is < max position written so far.
   */
  public void writeByte(int dest, byte b) {
    int blockIndex = dest >> blockBits;
    ByteArray block = blocks.get(blockIndex);
    block.put(dest & blockMask, b);
  }

  @Override
  public void writeByte(byte b) {
    if (nextWrite == blockSize) {
      current = factory.newByteArray(blockSize);
      blocks.add(current);
      nextWrite = 0;
    }
    current.put(nextWrite++, b);
  }

  @Override
  public void writeBytes(byte[] b, int offset, int len) {
    while (len > 0) {
      int chunk = blockSize - nextWrite;
      if (len <= chunk) {
        arraycopy(b, offset, current, nextWrite, len);
        nextWrite += len;
        break;
      } else {
        if (chunk > 0) {
          arraycopy(b, offset, current, nextWrite, chunk);
          offset += chunk;
          len -= chunk;
        }
        current = factory.newByteArray(blockSize);
        blocks.add(current);
        nextWrite = 0;
      }
    }
  }

  int getBlockBits() {
    return blockBits;
  }

  void writeBytes(long dest, byte[] b, int offset, int len) {
    ByteArray byteArray = factory.newByteArray(len);
    byteArray.put(0, b, offset, len);
    writeBytes(dest, byteArray, 0, len);
  }

  /**
   * Absolute writeBytes without changing the current position. Note: this
   * cannot "grow" the bytes, so you must only call it on already written parts.
   */
  void writeBytes(long dest, ByteArray b, int offset, int len) {
    // System.out.println("  BS.writeBytes dest=" + dest + " offset=" + offset +
    // " len=" + len);
    assert dest + len <= getPosition() : "dest=" + dest + " pos=" + getPosition() + " len=" + len;

    // Note: weird: must go "backwards" because copyBytes
    // calls us with overlapping src/dest. If we
    // go forwards then we overwrite bytes before we can
    // copy them:

    /*
     * int blockIndex = dest >> blockBits; int upto = dest & blockMask; byte[]
     * block = blocks.get(blockIndex); while (len > 0) { int chunk = blockSize -
     * upto; System.out.println("    cycle chunk=" + chunk + " len=" + len); if
     * (len <= chunk) { System.arraycopy(b, offset, block, upto, len); break; }
     * else { System.arraycopy(b, offset, block, upto, chunk); offset += chunk;
     * len -= chunk; blockIndex++; block = blocks.get(blockIndex); upto = 0; } }
     */

    final long end = dest + len;
    int blockIndex = (int) (end >> blockBits);
    int downTo = (int) (end & blockMask);
    if (downTo == 0) {
      blockIndex--;
      downTo = blockSize;
    }
    ByteArray block = blocks.get(blockIndex);

    while (len > 0) {
      // System.out.println("    cycle downTo=" + downTo + " len=" + len);
      if (len <= downTo) {
        // System.out.println("      final: offset=" + offset + " len=" + len +
        // " dest=" + (downTo-len));
        arraycopy(b, offset, block, downTo - len, len);
        break;
      } else {
        len -= downTo;
        // System.out.println("      partial: offset=" + (offset + len) +
        // " len=" + downTo + " dest=0");
        arraycopy(b, offset + len, block, 0, downTo);
        blockIndex--;
        block = blocks.get(blockIndex);
        downTo = blockSize;
      }
    }
  }

  /**
   * Absolute copy bytes self to self, without changing the position. Note: this
   * cannot "grow" the bytes, so must only call it on already written parts.
   */
  public void copyBytes(long src, long dest, int len) {
    // System.out.println("BS.copyBytes src=" + src + " dest=" + dest + " len="
    // + len);
    assert src < dest;

    // Note: weird: must go "backwards" because copyBytes
    // calls us with overlapping src/dest. If we
    // go forwards then we overwrite bytes before we can
    // copy them:

    /*
     * int blockIndex = src >> blockBits; int upto = src & blockMask; byte[]
     * block = blocks.get(blockIndex); while (len > 0) { int chunk = blockSize -
     * upto; System.out.println("  cycle: chunk=" + chunk + " len=" + len); if
     * (len <= chunk) { writeBytes(dest, block, upto, len); break; } else {
     * writeBytes(dest, block, upto, chunk); blockIndex++; block =
     * blocks.get(blockIndex); upto = 0; len -= chunk; dest += chunk; } }
     */

    long end = src + len;

    int blockIndex = (int) (end >> blockBits);
    int downTo = (int) (end & blockMask);
    if (downTo == 0) {
      blockIndex--;
      downTo = blockSize;
    }
    ByteArray block = blocks.get(blockIndex);

    while (len > 0) {
      // System.out.println("  cycle downTo=" + downTo);
      if (len <= downTo) {
        // System.out.println("    finish");
        writeBytes(dest, block, downTo - len, len);
        break;
      } else {
        // System.out.println("    partial");
        len -= downTo;
        writeBytes(dest + len, block, 0, downTo);
        blockIndex--;
        block = blocks.get(blockIndex);
        downTo = blockSize;
      }
    }
  }

  /**
   * Writes an int at the absolute position without changing the current
   * pointer.
   */
  public void writeInt(long pos, int value) {
    int blockIndex = (int) (pos >> blockBits);
    int upto = (int) (pos & blockMask);
    ByteArray block = blocks.get(blockIndex);
    int shift = 24;
    for (int i = 0; i < 4; i++) {
      block.put(upto++, (byte) (value >> shift));
      shift -= 8;
      if (upto == blockSize) {
        upto = 0;
        blockIndex++;
        block = blocks.get(blockIndex);
      }
    }
  }

  /** Reverse from srcPos, inclusive, to destPos, inclusive. */
  public void reverse(long srcPos, long destPos) {
    assert srcPos < destPos;
    assert destPos < getPosition();
    // System.out.println("reverse src=" + srcPos + " dest=" + destPos);

    int srcBlockIndex = (int) (srcPos >> blockBits);
    int src = (int) (srcPos & blockMask);
    ByteArray srcBlock = blocks.get(srcBlockIndex);

    int destBlockIndex = (int) (destPos >> blockBits);
    int dest = (int) (destPos & blockMask);
    ByteArray destBlock = blocks.get(destBlockIndex);
    // System.out.println("  srcBlock=" + srcBlockIndex + " destBlock=" +
    // destBlockIndex);

    int limit = (int) (destPos - srcPos + 1) / 2;
    for (int i = 0; i < limit; i++) {
      // System.out.println("  cycle src=" + src + " dest=" + dest);
      byte b = srcBlock.get(src);
      srcBlock.put(src, destBlock.get(dest));
      destBlock.put(dest, b);
      src++;
      if (src == blockSize) {
        srcBlockIndex++;
        srcBlock = blocks.get(srcBlockIndex);
        // System.out.println("  set destBlock=" + destBlock + " srcBlock=" +
        // srcBlock);
        src = 0;
      }

      dest--;
      if (dest == -1) {
        destBlockIndex--;
        destBlock = blocks.get(destBlockIndex);
        // System.out.println("  set destBlock=" + destBlock + " srcBlock=" +
        // srcBlock);
        dest = blockSize - 1;
      }
    }
  }

  public void skipBytes(int len) {
    while (len > 0) {
      int chunk = blockSize - nextWrite;
      if (len <= chunk) {
        nextWrite += len;
        break;
      } else {
        len -= chunk;
        current = factory.newByteArray(blockSize);
        blocks.add(current);
        nextWrite = 0;
      }
    }
  }

  public long getPosition() {
    return ((long) blocks.size() - 1) * blockSize + nextWrite;
  }

  /**
   * Pos must be less than the max position written so far! Ie, you cannot
   * "grow" the file with this!
   */
  public void truncate(long newLen) {
    assert newLen <= getPosition();
    assert newLen >= 0;
    int blockIndex = (int) (newLen >> blockBits);
    nextWrite = (int) (newLen & blockMask);
    if (nextWrite == 0) {
      blockIndex--;
      nextWrite = blockSize;
    }
    blocks.subList(blockIndex + 1, blocks.size()).clear();
    if (newLen == 0) {
      current = null;
    } else {
      current = blocks.get(blockIndex);
    }
    assert newLen == getPosition();
  }

  public void finish() {
    if (current != null) {
      ByteArray lastBuffer = factory.newByteArray(nextWrite);
      arraycopy(current, 0, lastBuffer, 0, nextWrite);
      blocks.set(blocks.size() - 1, lastBuffer);
      current = null;
    }
  }

  /** Writes all of our bytes to the target {@link DataOutput}. */
  public void writeTo(DataOutput out) throws IOException {
    for (ByteArray block : blocks) {
      block.writeBytes(out, 0, block.length());
    }
  }

  public FST.BytesReader getForwardReader() {
    if (blocks.size() == 1) {
      return new ForwardBytesReaderByteArray(blocks.get(0));
    }
    return new FST.BytesReader() {
      private ByteArray current;
      private int nextBuffer;
      private int nextRead = blockSize;

      @Override
      public byte readByte() {
        if (nextRead == blockSize) {
          current = blocks.get(nextBuffer++);
          nextRead = 0;
        }
        return current.get(nextRead++);
      }

      @Override
      public void skipBytes(int count) {
        setPosition(getPosition() + count);
      }

      @Override
      public void readBytes(byte[] b, int offset, int len) {
        while (len > 0) {
          int chunkLeft = blockSize - nextRead;
          if (len <= chunkLeft) {
            arraycopy(current, nextRead, b, offset, len);
            nextRead += len;
            break;
          } else {
            if (chunkLeft > 0) {
              arraycopy(current, nextRead, b, offset, chunkLeft);
              offset += chunkLeft;
              len -= chunkLeft;
            }
            current = blocks.get(nextBuffer++);
            nextRead = 0;
          }
        }
      }

      @Override
      public long getPosition() {
        return ((long) nextBuffer - 1) * blockSize + nextRead;
      }

      @Override
      public void setPosition(long pos) {
        int bufferIndex = (int) (pos >> blockBits);
        nextBuffer = bufferIndex + 1;
        current = blocks.get(bufferIndex);
        nextRead = (int) (pos & blockMask);
        assert getPosition() == pos;
      }

      @Override
      public boolean reversed() {
        return false;
      }
    };
  }

  public FST.BytesReader getReverseReader() {
    return getReverseReader(true);
  }

  FST.BytesReader getReverseReader(boolean allowSingle) {
    if (allowSingle && blocks.size() == 1) {
      return new ReverseBytesReaderByteArray(blocks.get(0));
    }
    return new FST.BytesReader() {
      private ByteArray current = blocks.size() == 0 ? null : blocks.get(0);
      private int nextBuffer = -1;
      private int nextRead = 0;

      @Override
      public byte readByte() {
        if (nextRead == -1) {
          current = blocks.get(nextBuffer--);
          nextRead = blockSize - 1;
        }
        return current.get(nextRead--);
      }

      @Override
      public void skipBytes(int count) {
        setPosition(getPosition() - count);
      }

      @Override
      public void readBytes(byte[] b, int offset, int len) {
        for (int i = 0; i < len; i++) {
          b[offset + i] = readByte();
        }
      }

      @Override
      public long getPosition() {
        return ((long) nextBuffer + 1) * blockSize + nextRead;
      }

      @Override
      public void setPosition(long pos) {
        // NOTE: a little weird because if you
        // setPosition(0), the next byte you read is
        // bytes[0] ... but I would expect bytes[-1] (ie,
        // EOF)...?
        int bufferIndex = (int) (pos >> blockBits);
        nextBuffer = bufferIndex - 1;
        current = blocks.get(bufferIndex);
        nextRead = (int) (pos & blockMask);
        assert getPosition() == pos : "pos=" + pos + " getPos()=" + getPosition();
      }

      @Override
      public boolean reversed() {
        return true;
      }
    };
  }

  static class ForwardBytesReaderByteArray extends FST.BytesReader {
    private final ByteArray bytes;
    private int pos;

    public ForwardBytesReaderByteArray(ByteArray bytes) {
      this.bytes = bytes;
    }

    @Override
    public byte readByte() {
      return bytes.get(pos++);
    }

    @Override
    public void readBytes(byte[] b, int offset, int len) {
      arraycopy(bytes, pos, b, offset, len);
      pos += len;
    }

    @Override
    public void skipBytes(int count) {
      pos += count;
    }

    @Override
    public long getPosition() {
      return pos;
    }

    @Override
    public void setPosition(long pos) {
      this.pos = (int) pos;
    }

    @Override
    public boolean reversed() {
      return false;
    }
  }

  static class ReverseBytesReaderByteArray extends FST.BytesReader {
    private final ByteArray bytes;
    private int pos;

    public ReverseBytesReaderByteArray(ByteArray bytes) {
      this.bytes = bytes;
    }

    @Override
    public byte readByte() {
      return bytes.get(pos--);
    }

    @Override
    public void readBytes(byte[] b, int offset, int len) {
      for (int i = 0; i < len; i++) {
        b[offset + i] = bytes.get(pos--);
      }
    }

    @Override
    public void skipBytes(int count) {
      pos -= count;
    }

    @Override
    public long getPosition() {
      return pos;
    }

    @Override
    public void setPosition(long pos) {
      this.pos = (int) pos;
    }

    @Override
    public boolean reversed() {
      return true;
    }
  }

  public static void arraycopy(ByteArray src, int srcOffset, byte[] dest, int destOffset, int len) {
    src.get(srcOffset, dest, destOffset, len);
  }

  public static void arraycopy(ByteArray src, int srcOffset, ByteArray dest, int destOffset, int len) {
    src.copy(srcOffset, dest, destOffset, len);
  }

  public static void arraycopy(byte[] src, int srcOffset, ByteArray dest, int destOffset, int len) {
    dest.put(destOffset, src, srcOffset, len);
  }

}
TOP

Related Classes of org.apache.lucene.util.fst.BytesStore

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.