Package abra.utils

Source Code of abra.utils.BaseQualityByRegion

/* Copyright 2013 University of North Carolina at Chapel Hill.  All rights reserved. */
package abra.utils;

import java.io.File;
import java.io.IOException;
import java.util.List;

import abra.Feature;
import abra.RegionLoader;
import abra.ReAligner;

import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;

/**
* Utility class for exploring base quality by region
*
* @author Lisle E. Mose (lmose at unc dot edu)
*/
public class BaseQualityByRegion {

  //TODO: Move to utils package
  public void run(String input, String regionsGtf) throws IOException {
   
    SAMFileReader reader = new SAMFileReader(new File(input));
   
    RegionLoader loader = new RegionLoader();
    List<Feature> regions = loader.load(regionsGtf);
   
    regions = ReAligner.splitRegions(regions);

    for (Feature region : regions) {
     
      long numReads = 0;
      long numBases = 0;
      long totalQuality = 0;
      long numBasesLt5 = 0;
      long numBasesLt10 = 0;
      long numBasesLt20 = 0;
      long numReadsLt5 = 0;
      long numReadsLt10 = 0;
      long numReadsLt20 = 0;
      long numReadsLt30 = 0;
      long numReads5X10 = 0;
      long numInternalReadsLt20 = 0;
      long numReadsWithAmbiguousBases = 0;
      long numReadsIntersectLt20Ambiguous = 0;
      long minMapq = 1000;
      long totalMapq = 0;
     
      CloseableIterator<SAMRecord> iter = reader.queryOverlapping(region.getSeqname(), (int) region.getStart(), (int) region.getEnd());
     
      while (iter.hasNext()) {
        SAMRecord read = iter.next();
       
        if (read.getMappingQuality() >= 10) {
          String qualStr = read.getBaseQualityString();
         
          boolean readLt5 = false;
          boolean readLt10 = false;
          boolean readLt20 = false;
          boolean readLt30 = false;
          boolean internalReadLt20 = false;
         
          numReads++;
          numBases += qualStr.length();
          int readBasesLt5 = 0;
 
          for (int i=0; i<qualStr.length(); i++) {
           
            // Assuming phred33
            int qual = qualStr.charAt(i) - '!';
            totalQuality += qual;
           
            if (qual < 5) {
              numBasesLt5++;
              readBasesLt5++;
              readLt5 = true;
            }
           
            if (qual < 10) {
              numBasesLt10++;
              readLt10 = true;
            }
           
            if (qual < 20) {
              numBasesLt20++;
              readLt20 = true;
             
              if ((i>=10) && (i<90)) {
                internalReadLt20 = true;
              }
            }
           
            if (qual < 30) {
              readLt30 = true;
            }
          }
         
          if (readLt5) {
            numReadsLt5++;
          }
         
          if (readLt10) {
            numReadsLt10++;
          }
         
          if (readLt20) {
            numReadsLt20++;
          }
         
          if (readLt30) {
            numReadsLt30++;
          }
         
          if (readBasesLt5 >= 10) {
            numReads5X10++;
          }
         
          if (internalReadLt20 == true) {
            numInternalReadsLt20++;
          }
         
          if (read.getReadString().contains("N")) {
            numReadsWithAmbiguousBases++;
          }
         
          if (readLt20 || read.getReadString().contains("N")) {
            numReadsIntersectLt20Ambiguous++;
          }
         
          if (read.getMappingQuality() < minMapq) {
            minMapq = read.getMappingQuality();
          }
         
          totalMapq += read.getMappingQuality();
        }
      }
     
      iter.close();
     
      System.out.println(region.getDescriptor() + "\t" +
          numBases + "\t" +
          numReads + "\t" +
          avg(numBasesLt5, numBases) + "\t" +
          avg(numBasesLt10, numBases) + "\t" +
          avg(numBasesLt20, numBases) + "\t" +
          avg(totalQuality, numBases) + "\t" +
          avg(numReadsLt5, numReads) + "\t" +
          avg(numReadsLt10, numReads) + "\t" +
          avg(numReadsLt20, numReads) + "\t" +
          avg(numReadsLt30, numReads) + "\t" +
          avg(numReads5X10, numReads) + "\t" +
          avg(numInternalReadsLt20, numReads) + "\t" +
          avg(numReadsWithAmbiguousBases, numReads) + "\t" +
          avg(numReadsIntersectLt20Ambiguous, numReads) + "\t" +
          avg(totalMapq, numReads) + "\t" +
          minMapq);
    }

    reader.close();
  }
 
  private double avg(long num, long dem) {
    return (double) num / (double) dem;
  }
 
  public static void main(String[] args) throws Exception {
    BaseQualityByRegion bq = new BaseQualityByRegion();
    bq.run(args[0],  args[1]);
  }
}
TOP

Related Classes of abra.utils.BaseQualityByRegion

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.