Package it.unimi.dsi.io

Examples of it.unimi.dsi.io.InputBitStream


      int offsetStep = queryProperties != null && queryProperties.get( UriKeys.OFFSETSTEP ) != null ? Integer.parseInt( queryProperties.get( UriKeys.OFFSETSTEP ) ) : DEFAULT_OFFSET_STEP;
     
      if ( offsetStep < 0 ) { // Memory-mapped
        offsetStep  = -offsetStep;
        offsets = LongLists.synchronize( new SemiExternalOffsetList(
            new InputBitStream( ByteBufferInputStream.map( new FileInputStream( basename + DiskBasedIndex.OFFSETS_EXTENSION ).getChannel(), MapMode.READ_ONLY ) ),
            offsetStep, numberOfTerms + 1 ) );
      }
      else {
        offsets = offsetStep == 0?
            DiskBasedIndex.readOffsets( basename + DiskBasedIndex.OFFSETS_EXTENSION, numberOfTerms ) :
              LongLists.synchronize( new SemiExternalOffsetList( new InputBitStream( basename + DiskBasedIndex.OFFSETS_EXTENSION, 1024 ), offsetStep, numberOfTerms + 1 ) );
      }
      if ( offsets.size() != numberOfTerms + 1 ) throw new IllegalStateException( "The length of the offset list (" + offsets.size() + ") is not equal to the number of terms plus one (" + numberOfTerms + " + 1)" );
    }
    else offsets = null;
   
View Full Code Here


    try {
      docOffsets = loadOffsetsSuccinctly( basename + DOCUMENT_OFFSETS_EXTENSION, documents, new File( basename + DOCUMENTS_EXTENSION ).length() * Byte.SIZE + 1 );
      termOffsets = loadOffsetsSuccinctly( basename + TERM_OFFSETS_EXTENSION, terms, new File( basename + TERMS_EXTENSION ).length() + 1 );
      nonTermOffsets = nonTerms < 0 ? null : loadOffsetsSuccinctly( basename + NONTERM_OFFSETS_EXTENSION, nonTerms, new File( basename + NONTERMS_EXTENSION ).length() + 1 );

      documentsInputBitStream = documentsByteBufferInputStream != null ? new InputBitStream( documentsByteBufferInputStream ) : new InputBitStream( basename + DOCUMENTS_EXTENSION );
      termsInputStream = new FastBufferedInputStream( termsByteBufferInputStream != null ? termsByteBufferInputStream : new FileInputStream( basename + TERMS_EXTENSION ) );
      nonTermsInputStream = exact ? new FastBufferedInputStream( nonTermsByteBufferInputStream != null ? nonTermsByteBufferInputStream : new FileInputStream( basename + NONTERMS_EXTENSION ) ) : null;
      zipFile = hasNonText ? new ZipFilebasename + ZipDocumentCollection.ZIP_EXTENSION ) : null;
      fileOpenOk = true;
    }
View Full Code Here

  private void ensureFiles() {
    if ( ! fileOpenOk ) throw new IllegalStateException( "Some of the files used by this " + SimpleCompressedDocumentCollection.class.getSimpleName() + " have not been loaded correctly; please use " + AbstractDocumentSequence.class.getName() + ".load() or call filename() after deserialising this instance" );
  }
 
  private static EliasFanoMonotoneLongBigList loadOffsetsSuccinctly( final CharSequence filename, final long numberOfItems, final long upperBound ) throws IOException {
    final InputBitStream ibs = new InputBitStream( filename.toString() );
    final EliasFanoMonotoneLongBigList offsets = new EliasFanoMonotoneLongBigList( numberOfItems + 1, upperBound, new OffsetsLongIterator( ibs, numberOfItems + 1 ) );
    ibs.close();
    return offsets;
  }
View Full Code Here

      this.n = n;
    }

    public IntIterator iterator() {
      return new AbstractIntIterator() {
        final InputBitStream ibs = new InputBitStream( array );
        int pos;
       
        public boolean hasNext() {
          return pos < n;
        }
       
        public int nextInt() {
          if ( ! hasNext() ) throw new NoSuchElementException();
          pos++;
          try {
            return ibs.readGamma();
          }
          catch ( IOException e ) {
            throw new RuntimeException( e );
          }
        }
View Full Code Here

  // Unfinished, experimental method
  public static void optimize( final CharSequence basename ) throws IOException, ClassNotFoundException {
    final SimpleCompressedDocumentCollection collection = (SimpleCompressedDocumentCollection)AbstractDocumentCollection.load( basename );
    final long[] termFrequency = new long[ (int)collection.terms ];
    final long[] nonTermFrequency = collection.exact ? new long[ (int)collection.nonTerms ] : null;
    final InputBitStream documentsIbs = collection.documentsInputBitStream;
    final DocumentFactory factory = collection.factory;
    final boolean exact = collection.exact;
    final MutableString s = new MutableString();
    documentsIbs.position( 0 );
    for( int i = (int)collection.documents; i-- != 0; ) {
      readSelfDelimitedUtf8String( documentsIbs, s ); // Skip URI
      readSelfDelimitedUtf8String( documentsIbs, s ); // Skip title
      for( int f = factory.numberOfFields() - 1; f-- !=0; ) {
        int len = documentsIbs.readDelta();
        while( len-- != 0 ) {
          termFrequency[ documentsIbs.readDelta() ]++;
          if ( exact ) nonTermFrequency[ documentsIbs.readDelta() ]++;
        }
      }
    }
   
    int[] termPerm = new int[ termFrequency.length ];
    for( int i = termPerm.length; i-- != 0; ) termPerm[ i ] = i;
    IntArrays.quickSort( termPerm, 0, termPerm.length, new AbstractIntComparator() {
      public int compare( int arg0, int arg1 ) {
        return termFrequency[ arg1 ] - termFrequency[ arg0 ] < 0 ? -1 : termFrequency[ arg1 ] == termFrequency[ arg0 ] ? 0 : 1;
      }
    });
   
    int[] invTermPerm = new int[ termFrequency.length ];
    for( int i = invTermPerm.length; i-- != 0; ) invTermPerm[ termPerm[ i ] ] = i;
   
    int[] nonTermPerm = null, invNonTermPerm = null;
    if ( exact ) {
      nonTermPerm = new int[ termFrequency.length ];
      for( int i = nonTermPerm.length; i-- != 0; ) nonTermPerm[ i ] = i;
      IntArrays.quickSort( nonTermPerm, 0, nonTermPerm.length, new AbstractIntComparator() {
        public int compare( int arg0, int arg1 ) {
          return termFrequency[ arg1 ] - termFrequency[ arg0 ] < 0 ? -1 : termFrequency[ arg1 ] == termFrequency[ arg0 ] ? 0 : 1;
        }
      });
      invNonTermPerm = new int[ nonTermFrequency.length ];
      for( int i = invNonTermPerm.length; i-- != 0; ) invNonTermPerm[ nonTermPerm[ i ] ] = i;
    }

    File newDocumentsFile = File.createTempFile( SimpleCompressedDocumentCollection.class.getSimpleName(), "temp", new File( basename.toString() ).getParentFile() );
    OutputBitStream newDocumentsObs = new OutputBitStream( newDocumentsFile );
    documentsIbs.position( 0 );
    for( int i = (int)collection.documents; i-- != 0; ) {
      readSelfDelimitedUtf8String( documentsIbs, s ); // Skip URI
      SimpleCompressedDocumentCollectionBuilder.writeSelfDelimitedUtf8String( newDocumentsObs, s );
      readSelfDelimitedUtf8String( documentsIbs, s ); // Skip title
      SimpleCompressedDocumentCollectionBuilder.writeSelfDelimitedUtf8String( newDocumentsObs, s );
      for( int f = factory.numberOfFields() - 1; f-- !=0; ) {
        int len = documentsIbs.readDelta();
        newDocumentsObs.writeDelta( len );
        while( len-- != 0 ) {
          newDocumentsObs.writeDelta( invTermPerm[ documentsIbs.readDelta() ] );
          if ( exact ) newDocumentsObs.writeDelta( invNonTermPerm[ documentsIbs.readDelta() ] );
        }
      }
    }
    newDocumentsObs.close();
    new File( basename + DOCUMENTS_EXTENSION ).delete();
View Full Code Here

  }


  @Override
  public InputBitStream getInputBitStream( int bufferSizeUnused ) {
    return new InputBitStream( index );
  }
View Full Code Here

    final Properties properties = new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION );
    final int numberOfTerms = properties.getInt( Index.PropertyKeys.TERMS );
    final long numberOfoccurrences = properties.getLong( Index.PropertyKeys.OCCURRENCES );
   
    final InputBitStream globCounts = new InputBitStream( basename + DiskBasedIndex.GLOBCOUNTS_EXTENSION );
    long gc[] = new long[ numberOfTerms ];
    for( int t = 0; t < numberOfTerms; t++ ) gc[ t ] = globCounts.readLongGamma();
    globCounts.close();

    final MutableString line = new MutableString();
    MutableString number;
    final FastBufferedReader reader = new FastBufferedReader( new FileReader( statFile ) );
   
View Full Code Here

    return new FileInputStream( indexFile );
  }

  @Override
  public InputBitStream getInputBitStream( final int bufferSize ) throws FileNotFoundException {
    return new InputBitStream( indexFile, bufferSize );
  }
View Full Code Here

    return this.getClass().getSimpleName() + "(" + field + ")";
  }

  @Override
  public InputBitStream getInputBitStream( int bufferSizeUnused ) {
    return new InputBitStream( index );
  }
View Full Code Here

    return new FastByteArrayInputStream( index );
  }

  @Override
  public InputBitStream getPositionsInputBitStream( int bufferSizeUnused ) throws IOException {
    return new InputBitStream( positions );
  }
View Full Code Here

TOP

Related Classes of it.unimi.dsi.io.InputBitStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.