Package org.apache.sqoop.mapreduce.db

Source Code of org.apache.sqoop.mapreduce.db.DataDrivenDBRecordReader

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.mapreduce.db;


import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.sqoop.mapreduce.DBWritable;

import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
import com.cloudera.sqoop.mapreduce.db.DBInputFormat;
import com.cloudera.sqoop.mapreduce.db.DBRecordReader;
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;

/**
* A RecordReader that reads records from a SQL table,
* using data-driven WHERE clause splits.
* Emits LongWritables containing the record number as
* key and DBWritables as value.
*/
public class DataDrivenDBRecordReader<T extends DBWritable>
    extends DBRecordReader<T> {

  private static final Log LOG =
      LogFactory.getLog(DataDrivenDBRecordReader.class);

  private String dbProductName; // database manufacturer string.

  // CHECKSTYLE:OFF
  // TODO(aaron): Refactor constructor to use fewer arguments.
  /**
   * @param split The InputSplit to read data for
   * @throws SQLException
   */
  public DataDrivenDBRecordReader(DBInputFormat.DBInputSplit split,
      Class<T> inputClass, Configuration conf, Connection conn,
      DBConfiguration dbConfig, String cond, String [] fields, String table,
      String dbProduct) throws SQLException {
    super(split, inputClass, conf, conn, dbConfig, cond, fields, table);
    this.dbProductName = dbProduct;
  }
  // CHECKSTYLE:ON

  @Override
  /** {@inheritDoc} */
  public float getProgress() throws IOException {
    return isDone() ? 1.0f : 0.0f;
  }

  /** Returns the query for selecting the records,
   * subclasses can override this for custom behaviour.*/
  protected String getSelectQuery() {
    DataDrivenDBInputFormat.DataDrivenDBInputSplit dataSplit =
        (DataDrivenDBInputFormat.DataDrivenDBInputSplit) getSplit();
    return getSelectQuery(dataSplit.getLowerClause(),
        dataSplit.getUpperClause());
  }

  /** Returns the query for selecting the records, with lower and upper
   * clause consitions provided as parameters
   * This is needed for recovering from connection failures after some data
   * in the split have been already processed */
  protected String getSelectQuery(String lowerClause, String upperClause) {
    StringBuilder query = new StringBuilder();
    DBConfiguration dbConf = getDBConf();
    String [] fieldNames = getFieldNames();
    String tableName = getTableName();
    String conditions = getConditions();

    // Build the WHERE clauses associated with the data split first.
    // We need them in both branches of this function.
    StringBuilder conditionClauses = new StringBuilder();
    conditionClauses.append("( ").append(lowerClause);
    conditionClauses.append(" ) AND ( ").append(upperClause);
    conditionClauses.append(" )");

    if (dbConf.getInputQuery() == null) {
      // We need to generate the entire query.
      query.append("SELECT ");

      for (int i = 0; i < fieldNames.length; i++) {
        query.append(fieldNames[i]);
        if (i != fieldNames.length -1) {
          query.append(", ");
        }
      }

      query.append(" FROM ").append(tableName);
      if (!dbProductName.startsWith("ORACLE")
          && !dbProductName.startsWith("DB2")
          && !dbProductName.startsWith("MICROSOFT SQL SERVER")
          && !dbProductName.startsWith("POSTGRESQL")) {
        // The AS clause is required for hsqldb. Some other databases might have
        // issues with it, so we're skipping some of them.
        query.append(" AS ").append(tableName);
      }
      query.append(" WHERE ");
      if (conditions != null && conditions.length() > 0) {
        // Put the user's conditions first.
        query.append("( ").append(conditions).append(" ) AND ");
      }

      // Now append the conditions associated with our split.
      query.append(conditionClauses.toString());

    } else {
      // User provided the query. We replace the special token with
      // our WHERE clause.
      String inputQuery = dbConf.getInputQuery();
      if (inputQuery.indexOf(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) == -1) {
        LOG.error("Could not find the clause substitution token "
            + DataDrivenDBInputFormat.SUBSTITUTE_TOKEN + " in the query: ["
            + inputQuery + "]. Parallel splits may not work correctly.");
      }

      query.append(inputQuery.replace(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN,
          conditionClauses.toString()));
    }

    LOG.debug("Using query: " + query.toString());

    return query.toString();
  }
}
TOP

Related Classes of org.apache.sqoop.mapreduce.db.DataDrivenDBRecordReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.