Package org.apache.pig.ResourceSchema

Examples of org.apache.pig.ResourceSchema.ResourceFieldSchema


            System.err.println("Complex data: ");
            System.err.println(line);
            String[] flds = line.split(":",-1);
            Tuple t = new DefaultTuple();
           
            ResourceFieldSchema stringfs = new ResourceFieldSchema();
            stringfs.setType(DataType.CHARARRAY);
            ResourceFieldSchema intfs = new ResourceFieldSchema();
            intfs.setType(DataType.INTEGER);
           
            ResourceSchema tupleSchema = new ResourceSchema();
            tupleSchema.setFields(new ResourceFieldSchema[]{stringfs, intfs});
            ResourceFieldSchema tuplefs = new ResourceFieldSchema();
            tuplefs.setSchema(tupleSchema);
            tuplefs.setType(DataType.TUPLE);
           
            ResourceSchema bagSchema = new ResourceSchema();
            bagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
            ResourceFieldSchema bagfs = new ResourceFieldSchema();
            bagfs.setSchema(bagSchema);
            bagfs.setType(DataType.BAG);
           
            t.append(flds[0].compareTo("")!=0 ? ps.getLoadCaster().bytesToBag(flds[0].getBytes(), bagfs) : null);
            t.append(flds[1].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[1].getBytes()) : null);
            t.append(flds[2].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[2].getBytes()) : null);
            t.append(flds[3].compareTo("")!=0 ? ps.getLoadCaster().bytesToDouble(flds[3].getBytes()) : null);
View Full Code Here


  public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException {

    List<ResourceFieldSchema> rfSchemaList = new ArrayList<ResourceFieldSchema>();
    for (HCatFieldSchema hfs : hcatSchema.getFields()) {
      ResourceFieldSchema rfSchema;
      rfSchema = getResourceSchemaFromFieldSchema(hfs);
      rfSchemaList.add(rfSchema);
    }
    ResourceSchema rSchema = new ResourceSchema();
    rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[0]));
View Full Code Here

  }

  private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs)
    throws IOException {
    ResourceFieldSchema rfSchema;
    // if we are dealing with a bag or tuple column - need to worry about subschema
    if (hfs.getType() == Type.STRUCT) {
      rfSchema = new ResourceFieldSchema()
        .setName(hfs.getName())
        .setDescription(hfs.getComment())
        .setType(getPigType(hfs))
        .setSchema(getTupleSubSchema(hfs));
    } else if (hfs.getType() == Type.ARRAY) {
      rfSchema = new ResourceFieldSchema()
        .setName(hfs.getName())
        .setDescription(hfs.getComment())
        .setType(getPigType(hfs))
        .setSchema(getBagSubSchema(hfs));
    } else {
      rfSchema = new ResourceFieldSchema()
        .setName(hfs.getName())
        .setDescription(hfs.getComment())
        .setType(getPigType(hfs))
        .setSchema(null); // no munging inner-schemas
    }
View Full Code Here

      innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)
        .replaceAll("FIELDNAME", hfs.getName());
    }

    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName)
      .setDescription("The tuple in the bag")
      .setType(DataType.TUPLE);
    HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
    if (arrayElementFieldSchema.getType() == Type.STRUCT) {
      bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
    } else if (arrayElementFieldSchema.getType() == Type.ARRAY) {
      ResourceSchema s = new ResourceSchema();
      List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema));
      s.setFields(lrfs.toArray(new ResourceFieldSchema[0]));
      bagSubFieldSchemas[0].setSchema(s);
    } else {
      ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
      innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName)
        .setDescription("The inner field in the tuple in the bag")
        .setType(getPigType(arrayElementFieldSchema))
        .setSchema(null); // the element type is not a tuple - so no subschema
      bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
    }
View Full Code Here

  public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException {

    List<ResourceFieldSchema> rfSchemaList = new ArrayList<ResourceFieldSchema>();
    for (HCatFieldSchema hfs : hcatSchema.getFields()) {
      ResourceFieldSchema rfSchema;
      rfSchema = getResourceSchemaFromFieldSchema(hfs);
      rfSchemaList.add(rfSchema);
    }
    ResourceSchema rSchema = new ResourceSchema();
    rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[0]));
View Full Code Here

  }

  private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs)
    throws IOException {
    ResourceFieldSchema rfSchema;
    // if we are dealing with a bag or tuple column - need to worry about subschema
    if (hfs.getType() == Type.STRUCT) {
      rfSchema = new ResourceFieldSchema()
        .setName(hfs.getName())
        .setDescription(hfs.getComment())
        .setType(getPigType(hfs))
        .setSchema(getTupleSubSchema(hfs));
    } else if (hfs.getType() == Type.ARRAY) {
      rfSchema = new ResourceFieldSchema()
        .setName(hfs.getName())
        .setDescription(hfs.getComment())
        .setType(getPigType(hfs))
        .setSchema(getBagSubSchema(hfs));
    } else {
      rfSchema = new ResourceFieldSchema()
        .setName(hfs.getName())
        .setDescription(hfs.getComment())
        .setType(getPigType(hfs))
        .setSchema(null); // no munging inner-schemas
    }
View Full Code Here

      innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)
        .replaceAll("FIELDNAME", hfs.getName());
    }

    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName)
      .setDescription("The tuple in the bag")
      .setType(DataType.TUPLE);
    HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
    if (arrayElementFieldSchema.getType() == Type.STRUCT) {
      bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
    } else if (arrayElementFieldSchema.getType() == Type.ARRAY) {
      ResourceSchema s = new ResourceSchema();
      List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema));
      s.setFields(lrfs.toArray(new ResourceFieldSchema[0]));
      bagSubFieldSchemas[0].setSchema(s);
    } else {
      ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
      innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName)
        .setDescription("The inner field in the tuple in the bag")
        .setType(getPigType(arrayElementFieldSchema))
        .setSchema(null); // the element type is not a tuple - so no subschema
      bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
    }
View Full Code Here

        // get default marshallers and validators
        List<AbstractType> marshallers = getDefaultMarshallers(cfDef);
        Map<ByteBuffer,AbstractType> validators = getValidatorMap(cfDef);

        // add key
        ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
        keyFieldSchema.setName("key");
        keyFieldSchema.setType(getPigType(marshallers.get(2)));

        ResourceSchema bagSchema = new ResourceSchema();
        ResourceFieldSchema bagField = new ResourceFieldSchema();
        bagField.setType(DataType.BAG);
        bagField.setName("columns");
        // inside the bag, place one tuple with the default comparator/validator schema
        ResourceSchema bagTupleSchema = new ResourceSchema();
        ResourceFieldSchema bagTupleField = new ResourceFieldSchema();
        bagTupleField.setType(DataType.TUPLE);
        ResourceFieldSchema bagcolSchema = new ResourceFieldSchema();
        ResourceFieldSchema bagvalSchema = new ResourceFieldSchema();
        bagcolSchema.setName("name");
        bagvalSchema.setName("value");
        bagcolSchema.setType(getPigType(marshallers.get(0)));
        bagvalSchema.setType(getPigType(marshallers.get(1)));
        bagTupleSchema.setFields(new ResourceFieldSchema[] { bagcolSchema, bagvalSchema });
        bagTupleField.setSchema(bagTupleSchema);
        bagSchema.setFields(new ResourceFieldSchema[] { bagTupleField });
        bagField.setSchema(bagSchema);

        // will contain all fields for this schema
        List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>();
        // add the key first, then the indexed columns, and finally the bag
        allSchemaFields.add(keyFieldSchema);

        // defined validators/indexes
        for (ColumnDef cdef : cfDef.column_metadata)
        {
            // make a new tuple for each col/val pair
            ResourceSchema innerTupleSchema = new ResourceSchema();
            ResourceFieldSchema innerTupleField = new ResourceFieldSchema();
            innerTupleField.setType(DataType.TUPLE);
            innerTupleField.setSchema(innerTupleSchema);
            innerTupleField.setName(new String(cdef.getName()));

            ResourceFieldSchema idxColSchema = new ResourceFieldSchema();
            idxColSchema.setName("name");
            idxColSchema.setType(getPigType(marshallers.get(0)));

            ResourceFieldSchema valSchema = new ResourceFieldSchema();
            AbstractType validator = validators.get(cdef.name);
            if (validator == null)
                validator = marshallers.get(1);
            valSchema.setName("value");
            valSchema.setType(getPigType(validator));

            innerTupleSchema.setFields(new ResourceFieldSchema[] { idxColSchema, valSchema });
            allSchemaFields.add(innerTupleField);
        }
        // bag at the end for unknown columns
        allSchemaFields.add(bagField);

        // add top-level index elements if needed
        if (usePartitionFilter)
        {
            for (ColumnDef cdef : getIndexes())
            {
                ResourceFieldSchema idxSchema = new ResourceFieldSchema();
                idxSchema.setName("index_" + new String(cdef.getName()));
                AbstractType validator = validators.get(cdef.name);
                if (validator == null)
                    validator = marshallers.get(1);
                idxSchema.setType(getPigType(validator));
                allSchemaFields.add(idxSchema);
            }
        }
        // top level schema contains everything
        schema.setFields(allSchemaFields.toArray(new ResourceFieldSchema[allSchemaFields.size()]));
View Full Code Here

                throw new IOException("Unexpect end of bag");
            }
        }
        if (fss.length!=1)
            throw new IOException("Only tuple is allowed inside bag schema");
        ResourceFieldSchema fs = fss[0];
        DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
        while (true) {
            t = consumeTuple(in, fs);
            if (t!=null)
                db.add(t);
View Full Code Here

        if (fieldSchema.getSchema()!=null && fieldSchema.getSchema().getFields().length!=0) {
            ResourceFieldSchema[] fss = fieldSchema.getSchema().getFields();
            // Interpret item inside tuple one by one based on the inner schema
            for (int i=0;i<fss.length;i++) {
                Object field;
                ResourceFieldSchema fs = fss[i];
                int delimit = ',';
                if (i==fss.length-1)
                    delimit = ')';
               
                if (DataType.isComplex(fs.getType())) {
                    field = consumeComplexType(in, fs);
                    while ((buf=in.read())!=delimit) {
                        if (buf==-1) {
                            throw new IOException("Unexpect end of tuple");
                        }
View Full Code Here

TOP

Related Classes of org.apache.pig.ResourceSchema.ResourceFieldSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.