Package org.broadinstitute.gatk.tools.walkers.readutils

Source Code of org.broadinstitute.gatk.tools.walkers.readutils.SplitSamFile

/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package org.broadinstitute.gatk.tools.walkers.readutils;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.engine.walkers.DataSource;
import org.broadinstitute.gatk.engine.walkers.ReadWalker;
import org.broadinstitute.gatk.engine.walkers.Requires;
import org.broadinstitute.gatk.engine.walkers.WalkerName;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.sam.ReadUtils;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* Divides the input data set into separate BAM files, one for each sample in the input data set.  The split
* files are named concatenating the sample name to the end of the provided outputRoot command-line argument.
*/
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_DATA, extraDocs = {CommandLineGATK.class} )
@WalkerName("SplitSamFile")
@Requires({DataSource.READS})
public class SplitSamFile extends ReadWalker<SAMRecord, Map<String, SAMFileWriter>> {
    @Argument(fullName="outputRoot", doc="output BAM file", required=false)
    public String outputRoot = "";

    private static final Logger logger = Logger.getLogger(SplitSamFile.class);
    private static final String VERSION = "0.0.1";

    @Override
    public void initialize() {
        logger.info("SplitSamFile version: " + VERSION);
    }

    @Override
    public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) {
        return read;
    }

    // --------------------------------------------------------------------------------------------------------------
    //
    // Standard I/O routines
    //
    // --------------------------------------------------------------------------------------------------------------
    @Override
    public void onTraversalDone(Map<String, SAMFileWriter> outputs) {
        for ( SAMFileWriter output : outputs.values() ) {
            output.close();
        }
    }

    @Override
    public Map<String, SAMFileWriter> reduceInit() {
        HashMap<String, SAMFileHeader> headers = new HashMap<>();
        for ( SAMReadGroupRecord readGroup : this.getToolkit().getSAMFileHeader().getReadGroups()) {
            final String sample = readGroup.getSample();
            if ( ! headers.containsKey(sample) ) {
                SAMFileHeader header = duplicateSAMFileHeader(this.getToolkit().getSAMFileHeader());
                logger.debug(String.format("Creating BAM header for sample %s", sample));
                ArrayList<SAMReadGroupRecord> readGroups = new ArrayList<>();
                header.setReadGroups(readGroups);
                headers.put(sample, header);
            }

            SAMFileHeader header = headers.get(sample);
            List<SAMReadGroupRecord> newReadGroups = new ArrayList<>(header.getReadGroups());
            newReadGroups.add(readGroup);
            header.setReadGroups(newReadGroups);
        }

        HashMap<String, SAMFileWriter> outputs = new HashMap<>();
        for ( Map.Entry<String, SAMFileHeader> elt : headers.entrySet() ) {
            final String sample = elt.getKey();
            final String filename = outputRoot + sample + ".bam";
            logger.info(String.format("Creating BAM output file %s for sample %s", filename, sample));

            final SAMFileWriter output = ReadUtils.createSAMFileWriter(filename, getToolkit(), elt.getValue());
            outputs.put(sample, output);
        }

        return outputs;
    }

    /**
     * Write out the read
     */
    @Override
    public Map<String, SAMFileWriter> reduce(SAMRecord read, Map<String, SAMFileWriter> outputs) {
        final String sample = read.getReadGroup().getSample();
        SAMFileWriter output = outputs.get(sample);

        if ( output != null ) {
            output.addAlignment(read);
        } else {
            throw new RuntimeException(String.format("Read group %s not present in header but found in read %s", read.getReadGroup().getReadGroupId(), read.getReadName()));
        }

        return outputs;
    }

    public static SAMFileHeader duplicateSAMFileHeader(SAMFileHeader toCopy) {
        SAMFileHeader copy = new SAMFileHeader();

        copy.setSortOrder(toCopy.getSortOrder());
        copy.setGroupOrder(toCopy.getGroupOrder());
        copy.setProgramRecords(toCopy.getProgramRecords());
        copy.setReadGroups(toCopy.getReadGroups());
        copy.setSequenceDictionary(toCopy.getSequenceDictionary());

        for (Map.Entry<String, String> e : toCopy.getAttributes())
            copy.setAttribute(e.getKey(), e.getValue());

        return copy;
    }

}
TOP

Related Classes of org.broadinstitute.gatk.tools.walkers.readutils.SplitSamFile

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.