Examples of org.apache.hadoop.util.LineReader

org.apache.hadoop.util.LineReader
A class that provides a line reader from an input stream. Depending on the constructor used, lines will either be terminated by:
- one of the following: '\n' (LF) , '\r' (CR), or '\r\n' (CR+LF).
- or, a custom byte sequence delimiter
In both cases, EOF also terminates an otherwise unterminated line.


      for (FileStatus file: listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FSDataInputStream fileIn = fs.open(path);
        LineReader in = new LineReader(fileIn, job.getConfiguration());
        int lineLen = 0;
        while(true) {
          Text lineText = new Text();
          lineLen = in.readLine(lineText);
          if(lineLen <= 0) {
          break;
          }
          Matcher m = LINE_PATTERN.matcher(lineText.toString());
          if((m != null) && m.matches()) {
            int startRow = Integer.parseInt(m.group(1));
            int rows = Integer.parseInt(m.group(2));
            int totalRows = Integer.parseInt(m.group(3));
            float sampleRate = Float.parseFloat(m.group(4));
            int clients = Integer.parseInt(m.group(5));
            boolean flushCommits = Boolean.parseBoolean(m.group(6));
            boolean writeToWAL = Boolean.parseBoolean(m.group(7));
            boolean reportLatency = Boolean.parseBoolean(m.group(8));


            LOG.debug("split["+ splitList.size() + "] " +
                     " startRow=" + startRow +
                     " rows=" + rows +
                     " totalRows=" + totalRows +
                     " sampleRate=" + sampleRate +
                     " clients=" + clients +
                     " flushCommits=" + flushCommits +
                     " writeToWAL=" + writeToWAL +
                     " reportLatency=" + reportLatency);


            PeInputSplit newSplit =
              new PeInputSplit(startRow, rows, totalRows, sampleRate, clients,
                flushCommits, writeToWAL, reportLatency);
            splitList.add(newSplit);
          }
        }
        in.close();
      }


      LOG.info("Total # of splits: " + splitList.size());
      return splitList;
    }

View Full Code Here

      this.output = output;
      this.reporter = reporter;
    }


    public void run() {
      LineReader lineReader = null;
      try {
        Text key = new Text();
        Text val = new Text();
        Text line = new Text();
        lineReader = new LineReader((InputStream)clientIn_, job_);
        // 3/4 Tool to Hadoop
        while (lineReader.readLine(line) > 0) {
          answer = line.getBytes();
          splitKeyVal(answer, line.getLength(), key, val);
          output.collect(key, val);
          line.clear();
          numRecWritten_++;
          long now = System.currentTimeMillis();
          if (now-lastStdoutReport > reporterOutDelay_) {
            lastStdoutReport = now;
            String hline = "Records R/W=" + numRecRead_ + "/" + numRecWritten_;
            reporter.setStatus(hline);
            logprintln(hline);
            logflush();
          }
        }
        if (lineReader != null) {
          lineReader.close();
        }
        if (clientIn_ != null) {
          clientIn_.close();
          clientIn_ = null;
          LOG.info("MROutputThread done");
        }
      } catch (Throwable th) {
        outerrThreadsThrowable = th;
        LOG.warn(StringUtils.stringifyException(th));
        try {
          if (lineReader != null) {
            lineReader.close();
          }
          if (clientIn_ != null) {
            clientIn_.close();
            clientIn_ = null;
          }

View Full Code Here

      this.reporter = reporter;
    }
      
    public void run() {
      Text line = new Text();
      LineReader lineReader = null;
      try {
        lineReader = new LineReader((InputStream)clientErr_, job_);
        while (lineReader.readLine(line) > 0) {
          String lineStr = line.toString();
          if (matchesReporter(lineStr)) {
            if (matchesCounter(lineStr)) {
              incrCounter(lineStr);
            } else if (matchesStatus(lineStr)) {
              setStatus(lineStr);
            } else {
              LOG.warn("Cannot parse reporter line: " + lineStr);
            }
          } else {
            System.err.println(lineStr);
          }
          long now = System.currentTimeMillis(); 
          if (reporter != null && now-lastStderrReport > reporterErrDelay_) {
            lastStderrReport = now;
            reporter.progress();
          }
          line.clear();
        }
        if (lineReader != null) {
          lineReader.close();
        }
        if (clientErr_ != null) {
          clientErr_.close();
          clientErr_ = null;
          LOG.info("MRErrorThread done");
        }
      } catch (Throwable th) {
        outerrThreadsThrowable = th;
        LOG.warn(StringUtils.stringifyException(th));
        try {
          if (lineReader != null) {
            lineReader.close();
          }
          if (clientErr_ != null) {
            clientErr_.close();
            clientErr_ = null;
          }

View Full Code Here

  // get the version of the filesystem from the masterindex file
  // the version is currently not useful since its the first version
  // of archives
  public int getHarVersion() throws IOException {
    FSDataInputStream masterIn = fs.open(masterIndex);
    LineReader lmaster = new LineReader(masterIn, getConf());
    Text line = new Text();
    lmaster.readLine(line);
    try {
      masterIn.close();
    } catch(IOException e){
      //disregard it.
      // its a read.

View Full Code Here

      List<String> children, FileStatus archiveIndexStat) throws IOException {
    // read the index file
    FSDataInputStream aIn = null;
    try {
      aIn = fs.open(archiveIndex);
      LineReader aLin;
      long read = 0;
      aLin = new LineReader(aIn, getConf());
      String parentString = parent.getName();
      if (!parentString.endsWith(Path.SEPARATOR)){
          parentString += Path.SEPARATOR;
      }
      Path harPath = new Path(parentString);
      int harlen = harPath.depth();
      Text line = new Text();
      while (read < archiveIndexStat.getLen()) {
        int tmp = aLin.readLine(line);
        read += tmp;
        String lineFeed = line.toString();
        String child = decodeFileName(lineFeed.substring(0, lineFeed.indexOf(" ")));
        if ((child.startsWith(parentString))) {
          Path thisPath = new Path(child);

View Full Code Here

    int hashCode = getHarHash(harPath);
    // get the master index to find the pos
    // in the index file
    FSDataInputStream in = fs.open(masterIndex);
    FileStatus masterStat = fs.getFileStatus(masterIndex);
    LineReader lin = new LineReader(in, getConf());
    Text line = new Text();
    long read = lin.readLine(line);
   //ignore the first line. this is the header of the index files
    String[] readStr = null;
    List<Store> stores = new ArrayList<Store>();
    while(read < masterStat.getLen()) {
      int b = lin.readLine(line);
      read += b;
      readStr = line.toString().split(" ");
      int startHash = Integer.parseInt(readStr[0]);
      int endHash  = Integer.parseInt(readStr[1]);
      if (startHash <= hashCode && hashCode <= endHash) {
        stores.add(new Store(Long.parseLong(readStr[2]),
            Long.parseLong(readStr[3]), startHash,
            endHash));
      }
      line.clear();
    }
    try {
      lin.close();
    } catch(IOException io){
      // do nothing just a read.
    }
    FSDataInputStream aIn = fs.open(archiveIndex);
    LineReader aLin;
    String retStr = null;
    // now start reading the real index file
    for (Store s: stores) {
      read = 0;
      aIn.seek(s.begin);
      aLin = new LineReader(aIn, getConf());
      while (read + s.begin < s.end) {
        int tmp = aLin.readLine(line);
        read += tmp;
        String lineFeed = line.toString();
        String[] parsed = lineFeed.split(" ");
        parsed[0] = decodeFileName(parsed[0]);
        if (harPath.compareTo(new Path(parsed[0])) == 0) {

View Full Code Here


      Text key = new Text();
      for (FileStatus file: files) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        LineReader reader = new LineReader(fs.open(path));
        long pos = 0;
        int n;
        try {
          while ((n = reader.readLine(key)) > 0) {
            String[] hosts = getStoreDirHosts(fs, path);
            splits.add(new FileSplit(path, pos, n, hosts));
            pos += n;
          }
        } finally {
          reader.close();
        }
      }


      return splits;
    }

View Full Code Here

      
      for (FileStatus file: listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FSDataInputStream fileIn = fs.open(path);
        LineReader in = new LineReader(fileIn, job.getConfiguration());
        int lineLen = 0;
        while(true) {
          Text lineText = new Text();
          lineLen = in.readLine(lineText);
          if(lineLen <= 0) {
          break;
          }
          Matcher m = LINE_PATTERN.matcher(lineText.toString());
          if((m != null) && m.matches()) {
            int startRow = Integer.parseInt(m.group(1));
            int rows = Integer.parseInt(m.group(2));
            int totalRows = Integer.parseInt(m.group(3));
            int clients = Integer.parseInt(m.group(4));
            int rowsPerPut = Integer.parseInt(m.group(5));


            LOG.debug("split["+ splitList.size() + "] " + 
                     " startRow=" + startRow +
                     " rows=" + rows +
                     " totalRows=" + totalRows +
                     " clients=" + clients +
                     " rowsPerPut=" + rowsPerPut);


            PeInputSplit newSplit =
              new PeInputSplit(startRow, rows, totalRows, clients, rowsPerPut);
            splitList.add(newSplit);
          }
        }
        in.close();
      }
      
      LOG.info("Total # of splits: " + splitList.size());
      return splitList;
    }

View Full Code Here


      for (FileStatus file: listStatus(job)) {
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FSDataInputStream fileIn = fs.open(path);
        LineReader in = new LineReader(fileIn, job.getConfiguration());
        int lineLen = 0;
        while(true) {
          Text lineText = new Text();
          lineLen = in.readLine(lineText);
          if(lineLen <= 0) {
          break;
          }
          Matcher m = LINE_PATTERN.matcher(lineText.toString());
          if((m != null) && m.matches()) {
            int startRow = Integer.parseInt(m.group(1));
            int rows = Integer.parseInt(m.group(2));
            int totalRows = Integer.parseInt(m.group(3));
            int clients = Integer.parseInt(m.group(4));
            boolean flushCommits = Boolean.parseBoolean(m.group(5));
            boolean writeToWAL = Boolean.parseBoolean(m.group(6));


            LOG.debug("split["+ splitList.size() + "] " +
                     " startRow=" + startRow +
                     " rows=" + rows +
                     " totalRows=" + totalRows +
                     " clients=" + clients +
                     " flushCommits=" + flushCommits +
                     " writeToWAL=" + writeToWAL);


            PeInputSplit newSplit =
              new PeInputSplit(startRow, rows, totalRows, clients,
                flushCommits, writeToWAL);
            splitList.add(newSplit);
          }
        }
        in.close();
      }


      LOG.info("Total # of splits: " + splitList.size());
      return splitList;
    }

View Full Code Here

  private static Path workDir =
    new Path(new Path(System.getProperty("test.build.data", "/tmp")),
             "TestConcatenatedCompressedInput").makeQualified(localFs);


  private static LineReader makeStream(String str) throws IOException {
    return new LineReader(new ByteArrayInputStream(str.getBytes("UTF-8")),
                          defaultConf);
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.util.LineReader

co.nubetech.hiho.dedup.DelimitedLineRecordReader

com.cloudera.iterativereduce.io.HDFSLineParser

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat$TupleTextInputReader

com.hadoop.mapred.DeprecatedLzoLineRecordReader

com.hadoop.mapreduce.LzoLineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoBinaryB64LineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoJsonRecordReader

com.twitter.elephantbird.mapreduce.input.LzoLineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoW3CLogRecordReader

edu.umd.cloud9.collection.aquaint2.Aquaint2DocnoMapping

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.