Package joshua.util.io

Examples of joshua.util.io.BinaryOut


    File alignmentsFile = new File(alignmentsFileName);
    Scanner scanner = new Scanner(alignmentsFile);
   
    AlignmentGrids grids = new AlignmentGrids(scanner, null, null, 10);
   
    BinaryOut out = new BinaryOut(binaryAlignmentsFileName);
    grids.writeExternal(out);
    out.flush();
    out.close();
  }
View Full Code Here


    Vocabulary vocab = Vocabulary.getVocabFromSRILM(scanner);
//    vocab.fixVocabulary();
   
   
    // Write the vocabulary to disk in binary format
    ObjectOutput out = new BinaryOut(outVocabFile);
    vocab.writeExternal(out);
   
  }
View Full Code Here

    }

    public void write(String corpusFilename, String vocabFilename, String charset) throws IOException {
   
      ObjectOutput vocabOut =
        new BinaryOut(new FileOutputStream(vocabFilename), true);
//        new ObjectOutputStream(new FileOutputStream(vocabFilename));
      symbolTable.setExternalizableEncoding(charset);
      symbolTable.writeExternal(vocabOut);
      vocabOut.flush();
     
      BinaryOut corpusOut = new BinaryOut(new FileOutputStream(corpusFilename), false);
      this.writeExternal(corpusOut)
      corpusOut.flush();
     
    }
View Full Code Here

    {
      String binaryVocabFilename = outputDirName + File.separator + "common.vocab";
      if (logger.isLoggable(Level.INFO)) logger.info("Writing binary common vocabulary to disk at " + binaryVocabFilename);
     
      ObjectOutput vocabOut =
          new BinaryOut(new FileOutputStream(binaryVocabFilename), true);
      symbolTable.setExternalizableEncoding(charset);
        symbolTable.writeExternal(vocabOut);
        vocabOut.flush();
       
      out.println("Common symbol table for source and target language: " + binaryVocabFilename);
    }
   
   
   
    // Construct source language corpus
    if (logger.isLoggable(Level.INFO)) logger.info("Constructing corpus array from file " + sourceCorpusFileName);
    CorpusArray sourceCorpusArray = SuffixArrayFactory.createCorpusArray(sourceCorpusFileName, symbolTable, sourceLengths[0], sourceLengths[1]);
   
    // Write source corpus to disk
    {
      String binarySourceCorpusFilename = outputDirName + File.separator + "source.corpus";
      if (logger.isLoggable(Level.INFO)) logger.info("Writing binary source corpus to disk at " + binarySourceCorpusFilename);
     
        BinaryOut corpusOut = new BinaryOut(new FileOutputStream(binarySourceCorpusFilename), false);
        sourceCorpusArray.writeExternal(corpusOut)
        corpusOut.flush();
       
      out.println("Source language corpus: " + binarySourceCorpusFilename);
    }
   
    // Construct target language corpus
    if (logger.isLoggable(Level.INFO)) logger.info("Constructing corpus array from file " + targetCorpusFileName);
    CorpusArray targetCorpusArray = SuffixArrayFactory.createCorpusArray(targetCorpusFileName, symbolTable, targetLengths[0], targetLengths[1]);
   
   
    // Write target language corpus to disk
    {
      String binaryTargetCorpusFilename = outputDirName + File.separator + "target.corpus";
      if (logger.isLoggable(Level.INFO)) logger.info("Writing binary target corpus to disk at " + binaryTargetCorpusFilename);
     
        BinaryOut corpusOut = new BinaryOut(new FileOutputStream(binaryTargetCorpusFilename), false);
        targetCorpusArray.writeExternal(corpusOut)
        corpusOut.flush();
       
        out.println("Target language corpus: " + binaryTargetCorpusFilename);
    }
   
    {
      // Construct alignments data structure
      AlignmentGrids grids = new AlignmentGrids(
          new Scanner(new File(alignmentsFileName)),
          sourceCorpusArray,
          targetCorpusArray,
          numberOfSentences);

      // Write alignments to disk
      {
        String binaryAlignmentsFilename = outputDirName + File.separator + "alignment.grids";
        if (logger.isLoggable(Level.INFO)) logger.info("Writing binary alignment grids to disk at " + binaryAlignmentsFilename);

        BinaryOut alignmentsOut = new BinaryOut(binaryAlignmentsFilename);
        grids.writeExternal(alignmentsOut);
        alignmentsOut.flush();
        alignmentsOut.close();

        out.println("Source-target alignment grids: " + binaryAlignmentsFilename);
      }

      // Write lexprobs to disk
      {
        ParallelCorpus parallelCorpus = new AlignedParallelCorpus(sourceCorpusArray, targetCorpusArray, grids);

        if (logger.isLoggable(Level.INFO)) logger.info("Constructing lexprob table");
        LexicalProbabilities lexProbs =
          new LexProbs(parallelCorpus, Float.MIN_VALUE);

        String lexprobsFilename = outputDirName + File.separator + "lexprobs.txt";
        FileOutputStream stream = new FileOutputStream(lexprobsFilename);
        OutputStreamWriter lexprobsOut = new OutputStreamWriter(stream, charset);

        String binaryLexCountFilename = outputDirName + File.separator + "lexicon.counts";
        if (logger.isLoggable(Level.INFO)) logger.info("Writing binary lexicon counts to disk at " + binaryLexCountFilename);

        //      BinaryOut lexCountOut = new BinaryOut(binaryLexCountFilename);
        ObjectOutput lexCountOut = new ObjectOutputStream(new FileOutputStream(binaryLexCountFilename));
        lexProbs.writeExternal(lexCountOut);
        lexCountOut.close();

        String s = lexProbs.toString();

        if (logger.isLoggable(Level.INFO)) logger.info("Writing lexprobs at " + lexprobsFilename);
        lexprobsOut.write(s)
        lexprobsOut.flush();
        lexprobsOut.close();
        out.println("Lexprobs at " + lexprobsFilename);

      }
    }
   
    // Write target language suffix array to disk
    {
      // Construct target language suffix array
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing suffix array from file " + targetCorpusFileName);
      SuffixArray targetSuffixArray = SuffixArrayFactory.createSuffixArray(targetCorpusArray, Cache.DEFAULT_CAPACITY);
     
      String binaryTargetSuffixesFilename = outputDirName + File.separator + "target.suffixes";
      if (logger.isLoggable(Level.INFO)) logger.info("Writing binary target corpus to disk at " + binaryTargetSuffixesFilename);
     
      BinaryOut suffixesOut = new BinaryOut(new FileOutputStream(binaryTargetSuffixesFilename), false);
      targetSuffixArray.writeExternal(suffixesOut)
        suffixesOut.flush();
       
      out.println("Target language suffix array: " + binaryTargetSuffixesFilename);
    }
   
   
    {
      // Construct source language suffix array
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing suffix array from file " + sourceCorpusFileName);
      SuffixArray sourceSuffixArray = SuffixArrayFactory.createSuffixArray(sourceCorpusArray, Cache.DEFAULT_CAPACITY);

      // Write source language suffix array to disk
      {
        String binarySourceSuffixesFilename = outputDirName + File.separator + "source.suffixes";
        if (logger.isLoggable(Level.INFO)) logger.info("Writing binary source corpus to disk at " + binarySourceSuffixesFilename);

        BinaryOut suffixesOut = new BinaryOut(new FileOutputStream(binarySourceSuffixesFilename), false);
        sourceSuffixArray.writeExternal(suffixesOut)
        suffixesOut.flush();

        out.println("Source language suffix array: " + binarySourceSuffixesFilename);
      }

      // Precompute and write frequent phrase locations to disk
      {
        if (logger.isLoggable(Level.INFO)) logger.info("Precomputing indices for most frequent phrases");
        FrequentPhrases frequentPhrases =
          new FrequentPhrases(sourceSuffixArray, minFrequency, maxPhrases, maxPhraseLength, maxPhraseLength, maxPhraseSpan, minNonterminalSpan);

        String frequentPhrasesFilename = outputDirName + File.separator + "frequentPhrases";
        if (logger.isLoggable(Level.INFO)) logger.info("Writing precomputing indices for most frequent phrases at " + frequentPhrasesFilename);
        BinaryOut frequentPhrasesOut = new BinaryOut(frequentPhrasesFilename);
        frequentPhrases.writeExternal(frequentPhrasesOut);
        frequentPhrasesOut.close();
      }
    }
   
    out.flush();
    out.close();
View Full Code Here

      alignments = new AlignmentGrids(new Scanner(new File(alignmentFileName)), sourceCorpusArray, targetCorpusArray, 3);     
    } else if (alignmentsType.equals("MemoryMappedAlignmentGrids")) {
      AlignmentGrids grids = new AlignmentGrids(new Scanner(new File(alignmentFileName)), sourceCorpusArray, targetCorpusArray, 3);
     
      File mmAlignmentFile = File.createTempFile("memoryMappedAlignment", new Date().toString());
      ObjectOutput out = new BinaryOut(mmAlignmentFile);
      grids.writeExternal(out);
      out.flush();
      out.close();
     
      alignments = new MemoryMappedAlignmentGrids(mmAlignmentFile.getAbsolutePath(), sourceCorpusArray, targetCorpusArray);
    } else {
      Assert.fail(alignmentsType + " is not a known alignment type.");
    }
View Full Code Here

TOP

Related Classes of joshua.util.io.BinaryOut

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.