Examples of org.apache.hadoop.util.LineReader

org.apache.hadoop.util.LineReader
A class that provides a line reader from an input stream. Depending on the constructor used, lines will either be terminated by:
- one of the following: '\n' (LF) , '\r' (CR), or '\r\n' (CR+LF).
- or, a custom byte sequence delimiter
In both cases, EOF also terminates an otherwise unterminated line.

      }
    }
  }


  public void testNewLines() throws Exception {
    LineReader in = null;
    try {
      in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
      Text out = new Text();
      in.readLine(out);
      assertEquals("line1 length", 1, out.getLength());
      in.readLine(out);
      assertEquals("line2 length", 2, out.getLength());
      in.readLine(out);
      assertEquals("line3 length", 0, out.getLength());
      in.readLine(out);
      assertEquals("line4 length", 3, out.getLength());
      in.readLine(out);
      assertEquals("line5 length", 4, out.getLength());
      in.readLine(out);
      assertEquals("line5 length", 5, out.getLength());
      assertEquals("end of file", 0, in.readLine(out));
    } finally {
      if (in != null) {
        in.close();
      }
    }
  }

View Full Code Here

      assertEquals("Some keys in no partition.", length, bits.cardinality());


    }
  }
  private LineReader makeStream(String str) throws IOException {
    return new LineReader(new ByteArrayInputStream
                                           (str.getBytes("UTF-8")), 
                                           defaultConf);
  }

View Full Code Here

                                           (str.getBytes("UTF-8")), 
                                           defaultConf);
  }
  
  public void testUTF8() throws Exception {
    LineReader in = null;
    try {
      in = makeStream("abcd\u20acbdcd\u20ac");
      Text line = new Text();
      in.readLine(line);
      assertEquals("readLine changed utf8 characters", 
                   "abcd\u20acbdcd\u20ac", line.toString());
      in = makeStream("abc\u200axyz");
      in.readLine(line);
      assertEquals("split on fake newline", "abc\u200axyz", line.toString());
    } finally {
      if (in != null) {
        in.close();
      }
    }
  }

View Full Code Here

      }
    }
  }


  public void testNewLines() throws Exception {
    LineReader in = null;
    try {
      in = makeStream("a\nbb\n\nccc\rdddd\r\neeeee");
      Text out = new Text();
      in.readLine(out);
      assertEquals("line1 length", 1, out.getLength());
      in.readLine(out);
      assertEquals("line2 length", 2, out.getLength());
      in.readLine(out);
      assertEquals("line3 length", 0, out.getLength());
      in.readLine(out);
      assertEquals("line4 length", 3, out.getLength());
      in.readLine(out);
      assertEquals("line5 length", 4, out.getLength());
      in.readLine(out);
      assertEquals("line5 length", 5, out.getLength());
      assertEquals("end of file", 0, in.readLine(out));
    } finally {
      if (in != null) {
        in.close();
      }
    }
  }

View Full Code Here

   *           We will deem a stream to be a good 0.20 job history stream if the
   *           first line is exactly "Meta VERSION=\"1\" ."
   */
  public static boolean canParse(InputStream input) throws IOException {
    try {
      LineReader reader = new LineReader(input);


      Text buffer = new Text();


      return reader.readLine(buffer) != 0
          && buffer.toString().equals("Meta VERSION=\"1\" .");
    } catch (EOFException e) {
      return false;
    }
  }

View Full Code Here

  }


  public Hadoop20JHParser(InputStream input) throws IOException {
    super();


    reader = new LineReader(input);
  }

View Full Code Here

        if (fileStats[i].getPath().getName().startsWith("_")) {
          continue;
        }


        LOG.info("processing " + fileStats[i].getPath());
        LineReader reader = new LineReader(fs.open(fileStats[i].getPath()));


        Text line = new Text();
        while (reader.readLine(line) > 0) {
          String[] arr = line.toString().split("\\t+", 2);


          int docno = Integer.parseInt(arr[0]);
          int len = Integer.parseInt(arr[1]);


          // Note that because of speculative execution there may be
          // multiple copies of doclength data. Therefore, we can't
          // just count number of doclengths read. Instead, keep track
          // of largest docno encountered.
          if (docno < docnoOffset) {
            throw new RuntimeException(
                "Error: docno " + docno + " < docnoOffset " + docnoOffset + "!");
          }


          doclengths[docno - docnoOffset] = len;


          if (docno > maxDocno) {
            maxDocno = docno;
          }
          if (docno < minDocno) {
            minDocno = docno;
          }
        }
        reader.close();
        context.getCounter(DocLengths.Files).increment(1);
      }


      LOG.info("min docno: " + minDocno);
      LOG.info("max docno: " + maxDocno);

View Full Code Here

              pos, flen, SplittableCompressionCodec.READ_MODE.BYBLOCK);
        if (in.getAdjustedStart() >= flen) {
          break;
        }
        LOG.info("SAMPLE " + in.getAdjustedStart() + "," + in.getAdjustedEnd());
        final LineReader lreader = new LineReader(in);
        lreader.readLine(line); // ignore; likely partial
        if (in.getPos() >= flen) {
          break;
        }
        lreader.readLine(line);
        final int seq1 = readLeadingInt(line);
        lreader.readLine(line);
        if (in.getPos() >= flen) {
          break;
        }
        final int seq2 = readLeadingInt(line);
        assertEquals("Mismatched lines", seq1 + 1, seq2);

View Full Code Here

        event = parser.nextEvent();
      }


      printStream.close();


      LineReader goldLines = new LineReader(inputGoldStream);
      LineReader resultLines =
          new LineReader(new PossiblyDecompressedInputStream(resultPath, conf));


      int lineNumber = 1;


      try {
        Text goldLine = new Text();
        Text resultLine = new Text();


        int goldRead = goldLines.readLine(goldLine);
        int resultRead = resultLines.readLine(resultLine);


        while (goldRead * resultRead != 0) {
          if (!goldLine.equals(resultLine)) {
            assertEquals("Type mismatch detected", goldLine, resultLine);
            break;
          }


          goldRead = goldLines.readLine(goldLine);
          resultRead = resultLines.readLine(resultLine);


          ++lineNumber;
        }


        if (goldRead != resultRead) {
          assertEquals("the " + (goldRead > resultRead ? "gold" : resultRead)
              + " file contains more text at line " + lineNumber, goldRead,
              resultRead);
        }


        success = true;
      } finally {
        goldLines.close();
        resultLines.close();


        if (success) {
          lfs.delete(resultPath, false);
        }
      }

View Full Code Here

  // get the version of the filesystem from the masterindex file
  // the version is currently not useful since its the first version 
  // of archives
  public int getHarVersion() throws IOException { 
    FSDataInputStream masterIn = fs.open(masterIndex);
    LineReader lmaster = new LineReader(masterIn, getConf());
    Text line = new Text();
    lmaster.readLine(line);
    try {
      masterIn.close();
    } catch(IOException e){
      //disregard it.
      // its a read.

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.util.LineReader

co.nubetech.hiho.dedup.DelimitedLineRecordReader

com.cloudera.iterativereduce.io.HDFSLineParser

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat$TupleTextInputReader

com.hadoop.mapred.DeprecatedLzoLineRecordReader

com.hadoop.mapreduce.LzoLineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoBinaryB64LineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoJsonRecordReader

com.twitter.elephantbird.mapreduce.input.LzoLineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoW3CLogRecordReader

edu.umd.cloud9.collection.aquaint2.Aquaint2DocnoMapping

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.