package eu.isas.peptideshaker.fileimport;
import com.compomics.mascotdatfile.util.io.MascotIdfileReader;
import com.compomics.util.db.ObjectsCache;
import com.compomics.util.exceptions.ExceptionHandler;
import com.compomics.util.experiment.biology.PTM;
import com.compomics.util.experiment.biology.PTMFactory;
import com.compomics.util.experiment.biology.Peptide;
import com.compomics.util.experiment.identification.Advocate;
import com.compomics.util.experiment.identification.Identification;
import com.compomics.util.experiment.identification.IdentificationAlgorithmParameter;
import com.compomics.util.experiment.identification.PeptideAssumption;
import com.compomics.util.experiment.identification.SearchParameters;
import com.compomics.util.experiment.identification.SequenceFactory;
import com.compomics.util.experiment.identification.SpectrumIdentificationAssumption;
import com.compomics.util.experiment.identification.TagAssumption;
import com.compomics.util.experiment.identification.identification_parameters.XtandemParameters;
import com.compomics.util.experiment.identification.matches.ModificationMatch;
import com.compomics.util.experiment.identification.matches.SpectrumMatch;
import com.compomics.util.experiment.identification.protein_inference.proteintree.ProteinTreeComponentsFactory;
import com.compomics.util.experiment.identification.ptm.PtmSiteMapping;
import com.compomics.util.experiment.io.identifications.IdfileReader;
import com.compomics.util.experiment.io.identifications.idfilereaders.DirecTagIdfileReader;
import com.compomics.util.experiment.io.identifications.idfilereaders.MsAmandaIdfileReader;
import com.compomics.util.experiment.io.identifications.idfilereaders.MzIdentMLIdfileReader;
import com.compomics.util.experiment.io.identifications.idfilereaders.PepxmlIdfileReader;
import com.compomics.util.experiment.massspectrometry.Spectrum;
import com.compomics.util.experiment.massspectrometry.SpectrumFactory;
import com.compomics.util.memory.MemoryConsumptionStatus;
import com.compomics.util.preferences.IdFilter;
import com.compomics.util.preferences.ModificationProfile;
import com.compomics.util.preferences.ProcessingPreferences;
import com.compomics.util.preferences.SequenceMatchingPreferences;
import com.compomics.util.waiting.WaitingHandler;
import de.proteinms.omxparser.util.OMSSAIdfileReader;
import de.proteinms.xtandemparser.parser.XTandemIdfileReader;
import static eu.isas.peptideshaker.fileimport.FileImporter.ptmMassTolerance;
import eu.isas.peptideshaker.scoring.InputMap;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import uk.ac.ebi.jmzml.xml.io.MzMLUnmarshallerException;
/**
* This class can be used to import PSMs from search engine results.
*
* @author Marc Vaudel
*/
public class PsmImporter {
/**
* The protein sequence factory.
*/
private SequenceFactory sequenceFactory = SequenceFactory.getInstance();
/**
* The PTM factory.
*/
private PTMFactory ptmFactory = PTMFactory.getInstance();
/**
* The spectrum factory.
*/
private SpectrumFactory spectrumFactory = SpectrumFactory.getInstance();
/**
* The object cache to use when encountering memory issues.
*/
private ObjectsCache peptideShakerCache;
/**
* Indicates whether the check for X!Tandem modifications was done.
*/
private boolean xTandemPtmsCheck = false;
/**
* The number of first hits.
*/
private long nPSMs = 0;
/**
* The number of secondary hits.
*/
private long nSecondary = 0;
/**
* The identification filter.
*/
private IdFilter idFilter;
/**
* The sequence matching preferences.
*/
private SequenceMatchingPreferences sequenceMatchingPreferences;
/**
* The search parameters.
*/
private SearchParameters searchParameters;
/**
* The processing preferences.
*/
private ProcessingPreferences processingPreferences;
/**
* The progress of the import.
*/
int progress = 0;
/**
* The number of PSMs which did not pass the import filters.
*/
int psmsRejected = 0;
/**
* The number of PSMs which were rejected due to a protein issue.
*/
int proteinIssue = 0;
/**
* The number of PSMs which were rejected due to a peptide issue.
*/
int peptideIssue = 0;
/**
* The number of PSMs which were rejected due to a precursor issue.
*/
int precursorIssue = 0;
/**
* The number of PSMs which were rejected due to a PTM issue.
*/
int ptmIssue = 0;
/**
* The number of retained first hits.
*/
private int nRetained = 0;
/**
* The id file reader where the PSMs are from.
*/
private IdfileReader fileReader;
/**
* The identification file where the PSMs are from.
*/
private File idFile;
/**
* List of ignored OMSSA modifications.
*/
private ArrayList<Integer> ignoredOMSSAModifications = new ArrayList<Integer>();
/**
* The maximal peptide mass error found in ppm.
*/
double maxPeptideErrorPpm = 0;
/**
* The maximal peptide mass error found in Da.
*/
double maxPeptideErrorDa = 0;
/**
* The maximal tag mass error found in ppm.
*/
double maxTagErrorPpm = 0;
/**
* The maximal tag mass error found in Da.
*/
double maxTagErrorDa = 0;
/**
* List of charges found.
*/
HashSet<Integer> charges = new HashSet<Integer>();
/**
* List of one hit wonders.
*/
private HashSet<String> singleProteinList;
/**
* Map of proteins found several times with the number of times they
* appeared as first hit.
*/
private HashMap<String, Integer> proteinCount;
/**
* The database connection.
*/
private Identification identification;
/**
* The input map.
*/
private InputMap inputMap;
/**
* The exception handler.
*/
private ExceptionHandler exceptionHandler;
/**
* Constructor.
*
* @param peptideShakerCache the cache to use when memory issues are
* encountered
* @param idFilter the id import filter to use
* @param sequenceMatchingPreferences the sequence matching preferences
* @param searchParameters the search parameters
* @param processingPreferences the processing preferences
* @param fileReader the reader of the file which the matches are imported
* from
* @param idFile the file which the matches are imported from
* @param identification the identification object where to store the
* matches
* @param inputMap the input map to use for scoring
* @param proteinCount the protein count of this project
* @param singleProteinList list of one hit wonders for this project
* @param exceptionHandler handler for exceptions
*/
public PsmImporter(ObjectsCache peptideShakerCache, IdFilter idFilter, SequenceMatchingPreferences sequenceMatchingPreferences,
SearchParameters searchParameters, ProcessingPreferences processingPreferences, IdfileReader fileReader, File idFile,
Identification identification, InputMap inputMap, HashMap<String, Integer> proteinCount, HashSet<String> singleProteinList,
ExceptionHandler exceptionHandler) {
this.peptideShakerCache = peptideShakerCache;
this.idFilter = idFilter;
this.sequenceMatchingPreferences = sequenceMatchingPreferences;
this.searchParameters = searchParameters;
this.processingPreferences = processingPreferences;
this.fileReader = fileReader;
this.idFile = idFile;
this.identification = identification;
this.inputMap = inputMap;
this.proteinCount = proteinCount;
this.singleProteinList = singleProteinList;
this.exceptionHandler = exceptionHandler;
}
/**
* Imports PSMs.
*
* @param idFileSpectrumMatches the PSMs to import
* @param nThreads the number of threads to use
* @param waitingHandler waiting handler to display progress and allow
* canceling the import
*
* @throws IOException
* @throws SQLException
* @throws FileNotFoundException
* @throws InterruptedException
* @throws ClassNotFoundException
* @throws MzMLUnmarshallerException
*/
public void importPsms(LinkedList<SpectrumMatch> idFileSpectrumMatches, int nThreads, WaitingHandler waitingHandler)
throws IOException, SQLException, FileNotFoundException, InterruptedException, ClassNotFoundException, MzMLUnmarshallerException {
if (nThreads == 1) {
importPsmsSingleThread(idFileSpectrumMatches, waitingHandler);
} else {
importPsmsMultipleThreads(idFileSpectrumMatches, nThreads, waitingHandler);
}
}
/**
* Imports PSMs using multiple threads.
*
* @param idFileSpectrumMatches the PSMs to import
* @param nThreads the number of threads to use
* @param waitingHandler waiting handler to display progress and allow
* canceling the import
*
* @throws IOException
* @throws SQLException
* @throws FileNotFoundException
* @throws InterruptedException
* @throws ClassNotFoundException
* @throws MzMLUnmarshallerException
*/
public void importPsmsMultipleThreads(LinkedList<SpectrumMatch> idFileSpectrumMatches, int nThreads, WaitingHandler waitingHandler)
throws IOException, SQLException, FileNotFoundException, InterruptedException, ClassNotFoundException, MzMLUnmarshallerException {
ExecutorService pool = Executors.newFixedThreadPool(nThreads);
while (!idFileSpectrumMatches.isEmpty()) {
SpectrumMatch match = idFileSpectrumMatches.pollLast();
PsmImporterRunnable psmImporterRunnable = new PsmImporterRunnable(match, waitingHandler);
pool.submit(psmImporterRunnable);
if (waitingHandler.isRunCanceled()) {
pool.shutdownNow();
return;
}
}
pool.shutdown();
if (!pool.awaitTermination(12, TimeUnit.HOURS)) {
throw new InterruptedException("PSM import timed out. Please contact the developers.");
}
}
/**
* Imports PSMs using a single thread
*
* @param idFileSpectrumMatches the PSMs to import
* @param waitingHandler waiting handler to display progress and allow
* canceling the import
*
* @throws IOException
* @throws SQLException
* @throws FileNotFoundException
* @throws InterruptedException
* @throws ClassNotFoundException
* @throws MzMLUnmarshallerException
*/
private void importPsmsSingleThread(LinkedList<SpectrumMatch> idFileSpectrumMatches, WaitingHandler waitingHandler)
throws IOException, SQLException, FileNotFoundException, InterruptedException, ClassNotFoundException, MzMLUnmarshallerException {
while (!idFileSpectrumMatches.isEmpty()) {
SpectrumMatch match = idFileSpectrumMatches.pollLast();
importPsm(match, waitingHandler);
}
}
/**
* Imports a PSM.
*
* @param spectrumMatch the spectrum match to import
* @param waitingHandler waiting handler to display progress and allow
* canceling the import
*
* @throws IOException
* @throws SQLException
* @throws FileNotFoundException
* @throws InterruptedException
* @throws ClassNotFoundException
* @throws MzMLUnmarshallerException
*/
private void importPsm(SpectrumMatch spectrumMatch, WaitingHandler waitingHandler)
throws IOException, SQLException, FileNotFoundException, InterruptedException, ClassNotFoundException, MzMLUnmarshallerException {
// free memory if needed
if (MemoryConsumptionStatus.memoryUsed() > 0.9 && !peptideShakerCache.isEmpty()) {
peptideShakerCache.reduceMemoryConsumption(0.5, null);
}
// free memory if needed
if (MemoryConsumptionStatus.memoryUsed() > 0.9 && !ProteinTreeComponentsFactory.getInstance().getCache().isEmpty()) {
ProteinTreeComponentsFactory.getInstance().getCache().reduceMemoryConsumption(0.5, null);
}
if (!MemoryConsumptionStatus.halfGbFree() && sequenceFactory.getNodesInCache() > 0) {
sequenceFactory.reduceNodeCacheSize(0.5);
}
for (int advocateId : spectrumMatch.getAdvocates()) {
if (advocateId == Advocate.xtandem.getIndex()) {
verifyXTandemPtms();
}
nPSMs++;
nSecondary += spectrumMatch.getAllAssumptions().size() - 1;
String spectrumKey = spectrumMatch.getKey();
String fileName = Spectrum.getSpectrumFile(spectrumKey);
String spectrumTitle = Spectrum.getSpectrumTitle(spectrumKey);
for (SpectrumIdentificationAssumption assumption : spectrumMatch.getAllAssumptions()) {
if (assumption instanceof PeptideAssumption) {
PeptideAssumption peptideAssumption = (PeptideAssumption) assumption;
if (!idFilter.validatePeptide(peptideAssumption.getPeptide(), sequenceMatchingPreferences)) {
spectrumMatch.removeAssumption(assumption);
peptideIssue++;
}
}
}
if (!spectrumMatch.hasAssumption(advocateId)) {
psmsRejected++;
} else {
if (spectrumMatch.hasAssumption(advocateId)) {
// Check whether there is a potential first hit which does not belong to the target and the decoy database
ArrayList<Double> eValues = new ArrayList<Double>(spectrumMatch.getAllAssumptions(advocateId).keySet());
Collections.sort(eValues);
for (Double eValue : eValues) {
ArrayList<SpectrumIdentificationAssumption> tempAssumptions
= new ArrayList<SpectrumIdentificationAssumption>(spectrumMatch.getAllAssumptions(advocateId).get(eValue));
for (SpectrumIdentificationAssumption assumption : tempAssumptions) {
if (assumption instanceof PeptideAssumption) {
PeptideAssumption peptideAssumption = (PeptideAssumption) assumption;
Peptide peptide = peptideAssumption.getPeptide();
String peptideSequence = peptide.getSequence();
// map the algorithm specific modifications on utilities modifications
// If there are not enough sites to put them all on the sequence, add an unknown modifcation
// Note: this needs to be done for tag based assumptions as well since the protein mapping can return erroneous modifications for some pattern based PTMs
ModificationProfile modificationProfile = searchParameters.getModificationProfile();
boolean fixedPtmIssue = false;
try {
ptmFactory.checkFixedModifications(modificationProfile, peptide, sequenceMatchingPreferences);
} catch (IllegalArgumentException e) {
if (idFilter.removeUnknownPTMs()) {
// Exclude peptides with aberrant PTM mapping
System.out.println(e.getMessage());
spectrumMatch.removeAssumption(assumption);
ptmIssue++;
fixedPtmIssue = true;
} else {
throw e;
}
}
if (!fixedPtmIssue) {
HashMap<Integer, ArrayList<String>> expectedNames = new HashMap<Integer, ArrayList<String>>();
HashMap<ModificationMatch, ArrayList<String>> modNames = new HashMap<ModificationMatch, ArrayList<String>>();
for (ModificationMatch modMatch : peptide.getModificationMatches()) {
HashMap<Integer, ArrayList<String>> tempNames = new HashMap<Integer, ArrayList<String>>();
if (modMatch.isVariable()) {
String sePTM = modMatch.getTheoreticPtm();
if (fileReader instanceof OMSSAIdfileReader) {
Integer omssaIndex = null;
try {
omssaIndex = new Integer(sePTM);
} catch (Exception e) {
waitingHandler.appendReport("Impossible to parse OMSSA modification " + sePTM + ".", true, true);
}
if (omssaIndex != null) {
String omssaName = modificationProfile.getModification(omssaIndex);
if (omssaName == null) {
if (!ignoredOMSSAModifications.contains(omssaIndex)) {
waitingHandler.appendReport("Impossible to find OMSSA modification of index "
+ omssaIndex + ". The corresponding peptides will be ignored.", true, true);
ignoredOMSSAModifications.add(omssaIndex);
}
omssaName = PTMFactory.unknownPTM.getName();
}
tempNames = ptmFactory.getExpectedPTMs(modificationProfile, peptide, omssaName, ptmMassTolerance, sequenceMatchingPreferences);
}
} else if (fileReader instanceof MascotIdfileReader
|| fileReader instanceof XTandemIdfileReader
|| fileReader instanceof MsAmandaIdfileReader
|| fileReader instanceof MzIdentMLIdfileReader
|| fileReader instanceof PepxmlIdfileReader) {
String[] parsedName = sePTM.split("@");
double seMass = 0;
try {
seMass = new Double(parsedName[0]);
} catch (Exception e) {
throw new IllegalArgumentException("Impossible to parse \'" + sePTM + "\' as a tagged modification.\n"
+ "Error encountered in peptide " + peptideSequence + " spectrum " + spectrumTitle + " in spectrum file " + fileName + ".\n"
+ "Identification file: " + idFile.getName());
}
tempNames = ptmFactory.getExpectedPTMs(modificationProfile, peptide, seMass, ptmMassTolerance, sequenceMatchingPreferences);
} else if (fileReader instanceof DirecTagIdfileReader) {
PTM ptm = ptmFactory.getPTM(sePTM);
if (ptm == PTMFactory.unknownPTM) {
throw new IllegalArgumentException("PTM not recognized spectrum " + spectrumTitle + " of file " + fileName + ".");
}
tempNames = ptmFactory.getExpectedPTMs(modificationProfile, peptide, ptm.getMass(), ptmMassTolerance, sequenceMatchingPreferences);
} else {
throw new IllegalArgumentException("PTM mapping not implemented for the parsing of " + idFile.getName() + ".");
}
ArrayList<String> allNames = new ArrayList<String>();
for (ArrayList<String> namesAtAA : tempNames.values()) {
for (String name : namesAtAA) {
if (!allNames.contains(name)) {
allNames.add(name);
}
}
}
modNames.put(modMatch, allNames);
for (int pos : tempNames.keySet()) {
ArrayList<String> namesAtPosition = expectedNames.get(pos);
if (namesAtPosition == null) {
namesAtPosition = new ArrayList<String>(2);
expectedNames.put(pos, namesAtPosition);
}
for (String ptmName : tempNames.get(pos)) {
if (!namesAtPosition.contains(ptmName)) {
namesAtPosition.add(ptmName);
}
}
}
}
}
// If a terminal modification cannot be elsewhere lock the terminus
ModificationMatch nTermModification = null;
for (ModificationMatch modMatch : peptide.getModificationMatches()) {
if (modMatch.isVariable() && !modMatch.getTheoreticPtm().equals(PTMFactory.unknownPTM.getName())) {
double refMass = getRefMass(modMatch.getTheoreticPtm(), modificationProfile);
int modSite = modMatch.getModificationSite();
if (modSite == 1) {
ArrayList<String> expectedNamesAtSite = expectedNames.get(modSite);
if (expectedNamesAtSite != null) {
ArrayList<String> filteredNamesAtSite = new ArrayList<String>(expectedNamesAtSite.size());
for (String ptmName : expectedNamesAtSite) {
PTM ptm = ptmFactory.getPTM(ptmName);
if (Math.abs(ptm.getMass() - refMass) < searchParameters.getFragmentIonAccuracy()) {
filteredNamesAtSite.add(ptmName);
}
}
for (String modName : filteredNamesAtSite) {
PTM ptm = ptmFactory.getPTM(modName);
if (ptm.isNTerm()) {
boolean otherPossibleMod = false;
for (String tempName : modificationProfile.getAllNotFixedModifications()) {
if (!tempName.equals(modName)) {
PTM tempPTM = ptmFactory.getPTM(tempName);
if (tempPTM.getMass() == ptm.getMass() && !tempPTM.isNTerm()) {
otherPossibleMod = true;
break;
}
}
}
if (!otherPossibleMod) {
nTermModification = modMatch;
modMatch.setTheoreticPtm(modName);
break;
}
}
}
if (nTermModification != null) {
break;
}
}
}
}
}
ModificationMatch cTermModification = null;
for (ModificationMatch modMatch : peptide.getModificationMatches()) {
if (modMatch.isVariable() && !modMatch.getTheoreticPtm().equals(PTMFactory.unknownPTM.getName()) && modMatch != nTermModification) {
double refMass = getRefMass(modMatch.getTheoreticPtm(), modificationProfile);
int modSite = modMatch.getModificationSite();
if (modSite == peptideSequence.length()) {
ArrayList<String> expectedNamesAtSite = expectedNames.get(modSite);
if (expectedNamesAtSite != null) {
ArrayList<String> filteredNamesAtSite = new ArrayList<String>(expectedNamesAtSite.size());
for (String ptmName : expectedNamesAtSite) {
PTM ptm = ptmFactory.getPTM(ptmName);
if (Math.abs(ptm.getMass() - refMass) < searchParameters.getFragmentIonAccuracy()) {
filteredNamesAtSite.add(ptmName);
}
}
for (String modName : filteredNamesAtSite) {
PTM ptm = ptmFactory.getPTM(modName);
if (ptm.isCTerm()) {
boolean otherPossibleMod = false;
for (String tempName : modificationProfile.getAllNotFixedModifications()) {
if (!tempName.equals(modName)) {
PTM tempPTM = ptmFactory.getPTM(tempName);
if (tempPTM.getMass() == ptm.getMass() && !tempPTM.isCTerm()) {
otherPossibleMod = true;
break;
}
}
}
if (!otherPossibleMod) {
cTermModification = modMatch;
modMatch.setTheoreticPtm(modName);
break;
}
}
}
if (cTermModification != null) {
break;
}
}
}
}
}
// Map the modifications according to search engine localization
HashMap<Integer, ArrayList<String>> siteToPtmMap = new HashMap<Integer, ArrayList<String>>(); // Site to ptm name including termini
HashMap<Integer, ModificationMatch> siteToMatchMap = new HashMap<Integer, ModificationMatch>(); // Site to Modification match excluding termini
HashMap<ModificationMatch, Integer> matchToSiteMap = new HashMap<ModificationMatch, Integer>(); // Modification match to site excluding termini
boolean allMapped = true;
for (ModificationMatch modMatch : peptide.getModificationMatches()) {
boolean mapped = false;
if (modMatch.isVariable() && modMatch != nTermModification && modMatch != cTermModification && !modMatch.getTheoreticPtm().equals(PTMFactory.unknownPTM.getName())) {
double refMass = getRefMass(modMatch.getTheoreticPtm(), modificationProfile);
int modSite = modMatch.getModificationSite();
boolean terminal = false;
ArrayList<String> expectedNamesAtSite = expectedNames.get(modSite);
if (expectedNamesAtSite != null) {
ArrayList<String> filteredNamesAtSite = new ArrayList<String>(expectedNamesAtSite.size());
ArrayList<String> modificationAtSite = siteToPtmMap.get(modSite);
for (String ptmName : expectedNamesAtSite) {
PTM ptm = ptmFactory.getPTM(ptmName);
if (Math.abs(ptm.getMass() - refMass) < searchParameters.getFragmentIonAccuracy()
&& (modificationAtSite == null || !modificationAtSite.contains(ptmName))) {
filteredNamesAtSite.add(ptmName);
}
}
if (filteredNamesAtSite.size() == 1) {
String ptmName = filteredNamesAtSite.get(0);
PTM ptm = ptmFactory.getPTM(ptmName);
if (ptm.isNTerm() && nTermModification == null) {
nTermModification = modMatch;
mapped = true;
} else if (ptm.isCTerm() && cTermModification == null) {
cTermModification = modMatch;
mapped = true;
} else if (!ptm.isNTerm() && !ptm.isCTerm()) {
matchToSiteMap.put(modMatch, modSite);
siteToMatchMap.put(modSite, modMatch);
mapped = true;
}
if (mapped) {
modMatch.setTheoreticPtm(ptmName);
if (modificationAtSite == null) {
modificationAtSite = new ArrayList<String>(2);
siteToPtmMap.put(modSite, modificationAtSite);
}
modificationAtSite.add(ptmName);
}
}
if (!mapped) {
if (filteredNamesAtSite.isEmpty()) {
filteredNamesAtSite = expectedNamesAtSite;
}
if (modSite == 1) {
Double minDiff = null;
String bestPtmName = null;
for (String modName : filteredNamesAtSite) {
PTM ptm = ptmFactory.getPTM(modName);
if (ptm.isNTerm() && nTermModification == null) {
double massError = Math.abs(refMass - ptm.getMass());
if (massError <= searchParameters.getFragmentIonAccuracy()
&& (minDiff == null || massError < minDiff)) {
bestPtmName = modName;
minDiff = massError;
}
}
}
if (bestPtmName != null) {
nTermModification = modMatch;
modMatch.setTheoreticPtm(bestPtmName);
terminal = true;
if (modificationAtSite == null) {
modificationAtSite = new ArrayList<String>(2);
siteToPtmMap.put(modSite, modificationAtSite);
}
modificationAtSite.add(bestPtmName);
mapped = true;
}
} else if (modSite == peptideSequence.length()) {
Double minDiff = null;
String bestPtmName = null;
for (String modName : filteredNamesAtSite) {
PTM ptm = ptmFactory.getPTM(modName);
if (ptm.isCTerm() && cTermModification == null) {
double massError = Math.abs(refMass - ptm.getMass());
if (massError <= searchParameters.getFragmentIonAccuracy()
&& (minDiff == null || massError < minDiff)) {
bestPtmName = modName;
minDiff = massError;
}
}
}
if (bestPtmName != null) {
cTermModification = modMatch;
modMatch.setTheoreticPtm(bestPtmName);
terminal = true;
if (modificationAtSite == null) {
modificationAtSite = new ArrayList<String>(2);
siteToPtmMap.put(modSite, modificationAtSite);
}
modificationAtSite.add(bestPtmName);
mapped = true;
}
}
if (!terminal) {
Double minDiff = null;
String bestPtmName = null;
for (String modName : filteredNamesAtSite) {
PTM ptm = ptmFactory.getPTM(modName);
if (!ptm.isCTerm() && !ptm.isNTerm() && modNames.get(modMatch).contains(modName) && !siteToMatchMap.containsKey(modSite)) {
double massError = Math.abs(refMass - ptm.getMass());
if (massError <= searchParameters.getFragmentIonAccuracy()
&& (minDiff == null || massError < minDiff)) {
bestPtmName = modName;
minDiff = massError;
}
}
}
if (bestPtmName != null) {
modMatch.setTheoreticPtm(bestPtmName);
if (modificationAtSite == null) {
modificationAtSite = new ArrayList<String>(2);
siteToPtmMap.put(modSite, modificationAtSite);
}
modificationAtSite.add(bestPtmName);
matchToSiteMap.put(modMatch, modSite);
siteToMatchMap.put(modSite, modMatch);
mapped = true;
}
}
}
}
}
if (!mapped) {
allMapped = false;
}
}
if (!allMapped) {
// Try to correct incompatible localizations
HashMap<Integer, ArrayList<Integer>> remap = new HashMap<Integer, ArrayList<Integer>>();
for (ModificationMatch modMatch : peptide.getModificationMatches()) {
if (modMatch.isVariable() && modMatch != nTermModification && modMatch != cTermModification && !matchToSiteMap.containsKey(modMatch) && !modMatch.getTheoreticPtm().equals(PTMFactory.unknownPTM.getName())) {
int modSite = modMatch.getModificationSite();
for (int candidateSite : expectedNames.keySet()) {
if (!siteToMatchMap.containsKey(candidateSite)) {
for (String modName : expectedNames.get(candidateSite)) {
if (modNames.get(modMatch).contains(modName)) {
PTM ptm = ptmFactory.getPTM(modName);
if ((!ptm.isCTerm() || cTermModification == null)
&& (!ptm.isNTerm() || nTermModification == null)) {
ArrayList<Integer> ptmSites = remap.get(modSite);
if (ptmSites == null) {
ptmSites = new ArrayList<Integer>(4);
remap.put(modSite, ptmSites);
}
if (!ptmSites.contains(candidateSite)) {
ptmSites.add(candidateSite);
}
}
}
}
}
}
}
}
HashMap<Integer, Integer> correctedIndexes = PtmSiteMapping.alignAll(remap);
for (ModificationMatch modMatch : peptide.getModificationMatches()) {
if (modMatch.isVariable() && modMatch != nTermModification && modMatch != cTermModification && !matchToSiteMap.containsKey(modMatch) && !modMatch.getTheoreticPtm().equals(PTMFactory.unknownPTM.getName())) {
Integer modSite = correctedIndexes.get(modMatch.getModificationSite());
if (modSite != null) {
if (expectedNames.containsKey(modSite)) {
for (String modName : expectedNames.get(modSite)) {
if (modNames.get(modMatch).contains(modName)) {
ArrayList<String> taken = siteToPtmMap.get(modSite);
if (taken == null || !taken.contains(modName)) {
matchToSiteMap.put(modMatch, modSite);
modMatch.setTheoreticPtm(modName);
modMatch.setModificationSite(modSite);
if (taken == null) {
taken = new ArrayList<String>(2);
siteToPtmMap.put(modSite, taken);
}
taken.add(modName);
break;
}
}
}
}
} else {
matchToSiteMap.put(modMatch, modSite);
modMatch.setTheoreticPtm(PTMFactory.unknownPTM.getName());
}
if (!matchToSiteMap.containsKey(modMatch)) {
modMatch.setTheoreticPtm(PTMFactory.unknownPTM.getName());
}
}
}
}
if (idFilter.validateModifications(peptide, sequenceMatchingPreferences, searchParameters.getModificationProfile())) {
// Estimate the theoretic mass with the new modifications
peptide.estimateTheoreticMass();
if (!idFilter.validatePrecursor(peptideAssumption, spectrumKey, spectrumFactory)) {
spectrumMatch.removeAssumption(assumption);
precursorIssue++;
} else if (!idFilter.validateProteins(peptideAssumption.getPeptide(), sequenceMatchingPreferences)) {
// Check whether there is a potential first hit which does not belong to both the target and the decoy database
spectrumMatch.removeAssumption(assumption);
proteinIssue++;
}
} else {
spectrumMatch.removeAssumption(assumption);
ptmIssue++;
}
}
}
}
}
}
if (spectrumMatch.hasAssumption(advocateId)) {
// try to find the best peptide hit
PeptideAssumption firstPeptideHit = null;
ArrayList<Double> eValues = new ArrayList<Double>(spectrumMatch.getAllAssumptions(advocateId).keySet());
Collections.sort(eValues);
for (Double eValue : eValues) {
for (SpectrumIdentificationAssumption assumption : spectrumMatch.getAllAssumptions(advocateId).get(eValue)) {
if (assumption instanceof PeptideAssumption) {
PeptideAssumption peptideAssumption = (PeptideAssumption) assumption;
firstPeptideHit = peptideAssumption;
spectrumMatch.setFirstHit(advocateId, assumption);
checkPeptidesMassErrorsAndCharges(spectrumKey, peptideAssumption);
if (!processingPreferences.isScoringNeeded(advocateId)) {
inputMap.addEntry(advocateId, fileName, firstPeptideHit.getScore(), firstPeptideHit.getPeptide().isDecoy(sequenceMatchingPreferences));
}
identification.addSpectrumMatch(spectrumMatch, false); //@TODO: adapt to the different scores
nRetained++;
break;
}
}
if (firstPeptideHit != null) {
break;
}
}
if (firstPeptideHit == null) {
// Try to find the best tag hit
TagAssumption firstTagHit = null;
for (Double eValue : eValues) {
for (SpectrumIdentificationAssumption assumption : spectrumMatch.getAllAssumptions(advocateId).get(eValue)) {
if (assumption instanceof TagAssumption) {
TagAssumption tagAssumption = (TagAssumption) assumption;
firstTagHit = tagAssumption;
spectrumMatch.setFirstHit(advocateId, assumption);
checkTagMassErrorsAndCharge(spectrumKey, tagAssumption);
identification.addSpectrumMatch(spectrumMatch, false); //@TODO: adapt to the different scores
nRetained++;
break;
}
}
if (firstTagHit != null) {
break;
}
}
}
} else {
psmsRejected++;
}
}
if (waitingHandler.isRunCanceled()) {
return;
}
waitingHandler.setSecondaryProgressCounter(++progress);
}
}
/**
* Saves the peptide maximal mass error and found charge.
*
* @param spectrumKey the key of the spectrum match
* @param peptideAssumption the peptide assumption
*
* @throws IOException
* @throws InterruptedException
* @throws SQLException
* @throws ClassNotFoundException
* @throws MzMLUnmarshallerException
*/
private synchronized void checkPeptidesMassErrorsAndCharges(String spectrumKey, PeptideAssumption peptideAssumption)
throws IOException, InterruptedException, SQLException, ClassNotFoundException, MzMLUnmarshallerException {
double precursorMz = spectrumFactory.getPrecursor(spectrumKey).getMz();
double error = Math.abs(peptideAssumption.getDeltaMass(precursorMz, true));
if (error > maxPeptideErrorPpm) {
maxPeptideErrorPpm = error;
}
error = Math.abs(peptideAssumption.getDeltaMass(precursorMz, false));
if (error > maxPeptideErrorDa) {
maxPeptideErrorDa = error;
}
int currentCharge = peptideAssumption.getIdentificationCharge().value;
if (!charges.contains(currentCharge)) {
charges.add(currentCharge);
}
ArrayList<String> accessions = peptideAssumption.getPeptide().getParentProteins(sequenceMatchingPreferences);
for (String protein : accessions) {
Integer count = proteinCount.get(protein);
if (count != null) {
proteinCount.put(protein, count + 1);
} else {
boolean oneHitWonder = singleProteinList.contains(protein);
if (oneHitWonder) {
singleProteinList.remove(protein);
proteinCount.put(protein, 2);
} else {
singleProteinList.add(protein);
}
}
}
}
/**
* Saves the maximal precursor error and charge.
*
* @param spectrumKey the key of the spectrum match
* @param tagAssumption the tag assumption
*
* @throws MzMLUnmarshallerException
* @throws IOException
*/
private synchronized void checkTagMassErrorsAndCharge(String spectrumKey, TagAssumption tagAssumption) throws MzMLUnmarshallerException, IOException {
double precursorMz = spectrumFactory.getPrecursor(spectrumKey).getMz();
double error = Math.abs(tagAssumption.getDeltaMass(precursorMz, true));
if (error > maxTagErrorPpm) {
maxTagErrorPpm = error;
}
error = Math.abs(tagAssumption.getDeltaMass(precursorMz, false));
if (error > maxTagErrorDa) {
maxTagErrorDa = error;
}
int currentCharge = tagAssumption.getIdentificationCharge().value;
if (!charges.contains(currentCharge)) {
charges.add(currentCharge);
}
}
/**
* Verifies that the modifications targeted by the quick acetyl and quick
* pyrolidone are included in the search parameters.
*/
private synchronized void verifyXTandemPtms() {
if (!xTandemPtmsCheck) {
ModificationProfile modificationProfile = searchParameters.getModificationProfile();
IdentificationAlgorithmParameter algorithmParameter = searchParameters.getIdentificationAlgorithmParameter(Advocate.xtandem.getIndex());
if (algorithmParameter != null) {
XtandemParameters xtandemParameters = (XtandemParameters) algorithmParameter;
if (xtandemParameters.isProteinQuickAcetyl() && !modificationProfile.contains("acetylation of protein n-term")) {
PTM ptm = PTMFactory.getInstance().getPTM("acetylation of protein n-term");
modificationProfile.addVariableModification(ptm);
}
String[] pyroMods = {"pyro-cmc", "pyro-glu from n-term e", "pyro-glu from n-term q"};
if (xtandemParameters.isQuickPyrolidone()) {
for (String ptmName : pyroMods) {
if (!modificationProfile.getVariableModifications().contains(ptmName)) {
PTM ptm = PTMFactory.getInstance().getPTM(ptmName);
modificationProfile.addVariableModification(ptm);
}
}
}
}
xTandemPtmsCheck = true;
}
}
/**
* Returns the mass indicated by the identification algorithm for the given
* PTM. 0 if not found.
*
* @param sePtmName the name according to the identification algorithm
* @param modificationProfile the modification profile of the identification
*
* @return the mass of the PTM
*/
private double getRefMass(String sePtmName, ModificationProfile modificationProfile) {
Double refMass = 0.0;
// Try utilities modifications
PTM refPtm = ptmFactory.getPTM(sePtmName);
if (refPtm == PTMFactory.unknownPTM) {
// Try mass@AA
int atIndex = sePtmName.indexOf("@");
if (atIndex > 0) {
refMass = new Double(sePtmName.substring(0, atIndex));
} else {
// Try OMSSA indexes
try {
int omssaIndex = new Integer(sePtmName);
String omssaName = modificationProfile.getModification(omssaIndex);
if (omssaName != null) {
refPtm = ptmFactory.getPTM(omssaName);
if (refPtm != PTMFactory.unknownPTM) {
refMass = refPtm.getMass();
}
}
} catch (Exception e) {
// ignore
}
}
} else {
refMass = refPtm.getMass();
}
return refMass;
}
/**
* Returns the number of PSMs processed.
*
* @return the number of PSMs processed
*/
public long getnPSMs() {
return nPSMs;
}
/**
* Returns the number of secondary hits processed.
*
* @return the number of secondary hits processed
*/
public long getnSecondary() {
return nSecondary;
}
/**
* Returns the number of PSMs which did not pass the import filters.
*
* @return the number of PSMs which did not pass the import filters
*/
public int getPsmsRejected() {
return psmsRejected;
}
/**
* Returns the number of PSMs which did not pass the import filters due to a
* protein issue.
*
* @return the number of PSMs which did not pass the import filters due to a
* protein issue
*/
public int getProteinIssue() {
return proteinIssue;
}
/**
* Returns the number of PSMs which did not pass the import filters due to a
* peptide issue.
*
* @return the number of PSMs which did not pass the import filters due to a
* peptide issue
*/
public int getPeptideIssue() {
return peptideIssue;
}
/**
* Returns the number of PSMs which did not pass the import filters due to a
* precursor issue.
*
* @return the number of PSMs which did not pass the import filters due to a
* precursor issue
*/
public int getPrecursorIssue() {
return precursorIssue;
}
/**
* Returns the number of PSMs which did not pass the import filters due to a
* PTM issue.
*
* @return the number of PSMs which did not pass the import filters due to a
* PTM issue
*/
public int getPtmIssue() {
return ptmIssue;
}
/**
* Returns the number of PSMs retained after filtering.
*
* @return the number of PSMs retained after filtering
*/
public int getnRetained() {
return nRetained;
}
/**
* Returns the different charges found.
*
* @return the different charges found
*/
public HashSet<Integer> getCharges() {
return charges;
}
/**
* Returns the maximal peptide mass error found in ppm.
*
* @return the maximal peptide mass error found in ppm
*/
public double getMaxPeptideErrorPpm() {
return maxPeptideErrorPpm;
}
/**
* Returns the maximal peptide mass error found in Da.
*
* @return the maximal peptide mass error found in Da
*/
public double getMaxPeptideErrorDa() {
return maxPeptideErrorDa;
}
/**
* Returns the maximal tag mass error found in ppm.
*
* @return the maximal tag mass error found in ppm
*/
public double getMaxTagErrorPpm() {
return maxTagErrorPpm;
}
/**
* Returns the maximal tag mass error found in Da.
*
* @return the maximal tag mass error found in Da
*/
public double getMaxTagErrorDa() {
return maxTagErrorDa;
}
/**
* Private runnable to import PSMs.
*/
private class PsmImporterRunnable implements Runnable {
/**
* The spectrum match to import
*/
private SpectrumMatch spectrumMatch;
/**
* The waiting handler
*/
private WaitingHandler waitingHandler;
/**
* Constructor
*
* @param spectrumMatch the match to import
* @param waitingHandler a waiting handler to display progress and allow
* canceling the process
*/
public PsmImporterRunnable(SpectrumMatch spectrumMatch, WaitingHandler waitingHandler) {
this.spectrumMatch = spectrumMatch;
this.waitingHandler = waitingHandler;
}
@Override
public void run() {
try {
if (!waitingHandler.isRunCanceled()) {
importPsm(spectrumMatch, waitingHandler);
}
} catch (Exception e) {
if (!waitingHandler.isRunCanceled()) {
exceptionHandler.catchException(e);
}
}
}
}
}