Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spectral decon #675

Open
wants to merge 48 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
6a4571d
Moved Proteomics to Mass Spectrometry, adjusted namespace
Alexander-Sol Nov 6, 2022
1695531
Changed using directives and issues with proteases.tsv path
Alexander-Sol Nov 6, 2022
f4c0c14
Merged the proteomics refactor
Alexander-Sol Nov 6, 2022
98103c2
Fixed accidental deletino in ProteaseDictionary
Alexander-Sol Nov 6, 2022
db1c7a8
Added fields to SpectralDeconParams
Alexander-Sol Nov 6, 2022
36fabb1
Further changes to spectralDeconAlgorithm
Alexander-Sol Nov 7, 2022
3866579
Restructured checks for DeconParameters
Alexander-Sol Nov 7, 2022
6937520
Changed Masses and Intesities field in IsotopicDistribution to return…
Alexander-Sol Nov 7, 2022
18bd83f
Changed syntax
Alexander-Sol Nov 7, 2022
c04dff3
minor commit
Alexander-Sol Nov 9, 2022
70fcbf0
minor commit
Alexander-Sol Nov 10, 2022
0ed3103
Change CompareTo method within DoubleRange to adhere to conventions
Alexander-Sol Nov 10, 2022
e0dc36c
Amended comment
Alexander-Sol Nov 10, 2022
7ac1c9b
Fixed merge conflicts
Alexander-Sol Nov 10, 2022
e1a6fb2
minor commit
Alexander-Sol Nov 10, 2022
bc5a19a
Fixed merge conflicts
Alexander-Sol Nov 10, 2022
db7bacb
Created MinimalSpectrum class
Alexander-Sol Nov 10, 2022
2f5befb
Minor commit
Alexander-Sol Nov 10, 2022
ce6cb0d
Tenatively finished IndexEnvelopes method in SpectralDeconAlgo
Alexander-Sol Nov 10, 2022
f9fba02
Started testing
Alexander-Sol Nov 10, 2022
a665eb6
Minor commit
Alexander-Sol Nov 10, 2022
82895d2
Fleshed out tests
Alexander-Sol Nov 11, 2022
902bc46
Broke some tests
Alexander-Sol Nov 14, 2022
8e34eaa
Fixed broken tests
Alexander-Sol Nov 14, 2022
9ff659b
Minor commit
Alexander-Sol Nov 14, 2022
543482a
Fixed errors in envelope binning/indexing
Alexander-Sol Nov 14, 2022
377d9a3
Changed MostAbundantMonoisotopicMass to MostAbundantMass in PeptideWi…
Alexander-Sol Nov 14, 2022
0bdfa43
Changed MostAbundantMonoisotopicMass to MostAbundantMass in PeptideWi…
Alexander-Sol Nov 14, 2022
b175da7
Finished first test for Spectral Deconvolution
Alexander-Sol Nov 14, 2022
f7751d7
Fixed typo
Alexander-Sol Nov 14, 2022
41cbf07
minor commit
Alexander-Sol Nov 16, 2022
6bdd270
First stab at spectral Deconvolute function
Alexander-Sol Nov 16, 2022
efdb317
minor commit before 2D Array Refactor
Alexander-Sol Nov 16, 2022
15918db
2D array refactor
Alexander-Sol Nov 16, 2022
37233a5
Started Scorer and ScoringAlgo classes
Alexander-Sol Nov 17, 2022
53c47ad
Edited normalization methods within SpectralSimilarity to remove side…
Alexander-Sol Nov 17, 2022
0f14018
Fixed Merge Conflicts
Alexander-Sol Nov 17, 2022
096d62d
Minor commit
Alexander-Sol Nov 17, 2022
d6fe050
Finished score strategy
Alexander-Sol Nov 17, 2022
3bfa606
Commit before testing begins
Alexander-Sol Nov 18, 2022
a974000
SpectralDecon passing first test
Alexander-Sol Nov 18, 2022
83fb03d
minor refactoring
Alexander-Sol Nov 30, 2022
54caec7
Merge branch 'master' into SpectralDecon
Alexander-Sol Nov 30, 2022
c6961db
Added dummy test
Alexander-Sol Dec 1, 2022
1193d17
Updated nuspec file to reflect refactor
Alexander-Sol Dec 12, 2022
c9174ea
Changed MassSpectrometry.Proteomics* namespace back to Proteomics*
Alexander-Sol Dec 12, 2022
b744f49
Added ScorerTest
Alexander-Sol Dec 12, 2022
f1a4f28
Merge branch 'master' into SpectralDecon
Alexander-Sol Jan 24, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion mzLib/Chemistry/IsotopicDistribution.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,22 @@ private IsotopicDistribution(int count)
intensities = new double[count];
}

// Clone() produces shallow copies, but because double is a primitive type, this is acceptable
public double MostAbundantMass
{
get
{
double maxIntensity = intensities.Max();
for (int i = 0; i < masses.Length; i++)
{
if (Math.Abs(intensities[i] - maxIntensity) < 0.0001)
{
return (masses[i]);
}
}
return Double.NaN;
}
}
public double MonoIsotopicMass => masses[0];
public double[] Masses => (double[]) masses.Clone();
public double[] Intensities => (double[]) intensities.Clone();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@ public ClassicDeconvolutionAlgorithm(DeconvolutionParameters deconParameters) :
/// <returns></returns>
public override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrumToDeconvolute, MzRange range)
{
var deconParams = DeconvolutionParameters as ClassicDeconvolutionParameters ?? throw new MzLibException("Deconvolution params and algorithm do not match");
var deconParams = DeconvolutionParameters as ClassicDeconvolutionParameters;
if (deconParams == null)
{
throw new MzLibException("Deconvolution params and algorithm do not match");
}

spectrum = spectrumToDeconvolute;
//if no peaks, stop
if (spectrum.Size == 0)
Expand Down Expand Up @@ -175,7 +180,9 @@ public override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrumToD
}
}

private IsotopicEnvelope FindIsotopicEnvelope(int massIndex, double candidateForMostIntensePeakMz, double candidateForMostIntensePeakIntensity, double testMostIntenseMass, int chargeState, double deconvolutionTolerancePpm, double intensityRatioLimit, List<double> monoisotopicMassPredictions)
private IsotopicEnvelope FindIsotopicEnvelope(int massIndex, double candidateForMostIntensePeakMz, double candidateForMostIntensePeakIntensity,
double testMostIntenseMass, int chargeState, double deconvolutionTolerancePpm, double intensityRatioLimit,
List<double> monoisotopicMassPredictions)
{
double[] theoreticalMasses = allMasses[massIndex];
double[] theoreticalIntensities = allIntensities[massIndex];
Expand Down Expand Up @@ -216,7 +223,9 @@ private IsotopicEnvelope FindIsotopicEnvelope(int massIndex, double candidateFor
return new IsotopicEnvelope(listOfObservedPeaks, monoisotopicMass, chargeState, totalIntensity, Statistics.StandardDeviation(listOfRatios), massIndex);
}

private int ObserveAdjacentChargeStates(IsotopicEnvelope originalEnvelope, double mostIntensePeakMz, int massIndex, double deconvolutionTolerancePpm, double intensityRatioLimit, double minChargeToLookFor, double maxChargeToLookFor, List<double> monoisotopicMassPredictions)
private int ObserveAdjacentChargeStates(IsotopicEnvelope originalEnvelope, double mostIntensePeakMz, int massIndex,
double deconvolutionTolerancePpm, double intensityRatioLimit, double minChargeToLookFor, double maxChargeToLookFor,
List<double> monoisotopicMassPredictions)
{
//look for the higher and lower charge states using the proposed mass
int numAdjacentChargeStatesObserved = 0;
Expand Down Expand Up @@ -251,7 +260,8 @@ private int ObserveAdjacentChargeStates(IsotopicEnvelope originalEnvelope, doubl
return numAdjacentChargeStatesObserved;
}

private bool FindChargeStateOfMass(IsotopicEnvelope originalEnvelope, int zToInvestigate, double mostAbundantNeutralIsotopeToInvestigate, int massIndex, double deconvolutionTolerancePpm, double intensityRatioLimit, List<double> monoisotopicMassPredictions)
private bool FindChargeStateOfMass(IsotopicEnvelope originalEnvelope, int zToInvestigate, double mostAbundantNeutralIsotopeToInvestigate, int massIndex,
double deconvolutionTolerancePpm, double intensityRatioLimit, List<double> monoisotopicMassPredictions)
{
//we know the mass and the charge that we're looking for, just see if the expected m/z and its isotopes are there or not
double mostAbundantIsotopeMzForThisZTheoretical = mostAbundantNeutralIsotopeToInvestigate.ToMz(zToInvestigate);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Chemistry;
using Easy.Common.Extensions;
using MassSpectrometry.Deconvolution;
using MassSpectrometry.Deconvolution.Scoring;
using Proteomics;
using Proteomics.ProteolyticDigestion;
using MzLibUtil;

namespace MassSpectrometry.Deconvolution.Algorithms
{
public class SpectralDeconvolutionAlgorithm : DeconvolutionAlgorithm
{
// TODO: Make a charge state envelope class, complete with "MostAbundantChargeState"
public Dictionary<PeptideWithSetModifications, List<IsotopicEnvelope>> EnvelopeDictionary { get; private set; }

// Consider defining this as a jagged array to increase performance
public List<MinimalSpectrum>[,] IndexedLibrarySpectra { get; private set; }
// SpectrumIndexToPwsmMap maps the location of each spectrum within IndexedLibrarySpectra to its respective PeptideWithSetMods and charge
public Dictionary<(int, int, int), PeptideWithSetModifications> SpectrumIndexToPwsmMap { get; private set; }
public int MaxThreads; // This should be in the Parameters abstract
public SpectralDeconvolutionParameters SpectralParams { get; }
public PpmTolerance PpmTolerance { get; }
public Scorer Scorer { get; }

public SpectralDeconvolutionAlgorithm(DeconvolutionParameters parameters) : base(parameters)
{
var deconvolutionParameters = DeconvolutionParameters as SpectralDeconvolutionParameters;
if (deconvolutionParameters == null)
{
throw new MzLibException(
"Improper Deconvolution Parameters were pass to the SpectralDeconvolutionAlgorithm");
}
else
{
SpectralParams = deconvolutionParameters;
}

PpmTolerance = new PpmTolerance(parameters.DeconvolutionTolerancePpm);

FindLibraryEnvelopes();
IndexEnvelopes();
Scorer = new Scorer(Scorer.ScoringMethods.SpectralContrastAngle, PpmTolerance);

}

public override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrum)
{

if (spectrum == null || spectrum.Size == 0)
{
yield break;
}

// For each charge state (key) store the indices corresponding to every potential isotopic envelope (value)
Dictionary<int, List<MinimalSpectrum>> potentialEnvelopes = FindPotentialEnvelopes(spectrum);

// iterate through charge states (potentially not necessary/performant. Could flatten)
foreach (var keyValuePair in potentialEnvelopes)
{
int chargeBinIndex = keyValuePair.Key - SpectralParams.MinAssumedChargeState;
// iterate through potential envelopes
foreach (var experimentalSpectrum in keyValuePair.Value)
{
double mostAbundantMz = experimentalSpectrum.MostAbundantMz;
int massBinIndex = (int)Math.Floor((mostAbundantMz - SpectralParams.ScanRange.Minimum) *
SpectralParams.BinsPerDalton);
if (!IndexedLibrarySpectra[massBinIndex, chargeBinIndex].IsNotNullOrEmpty()) continue; // continue if there are no corresponding library spectra

int? bestMatchListPosition = null;
int currentListPosition = 0;
double bestFoundScore = Scorer.PoorScore;
// Score against matching theoretical envelopes
foreach (MinimalSpectrum theoreticalSpectrum in IndexedLibrarySpectra[massBinIndex, chargeBinIndex])
{
// TODO: Rename to FindBestScore
if (Scorer.TestForScoreImprovement(
Scorer.Score(experimentalSpectrum,theoreticalSpectrum),
bestFoundScore,
out double betterScore)
)
{
bestMatchListPosition = currentListPosition;
bestFoundScore = betterScore;
}
currentListPosition++;
}

if (bestMatchListPosition.HasValue &&
SpectrumIndexToPwsmMap.TryGetValue((massBinIndex, chargeBinIndex, (int)bestMatchListPosition), out var pwsmMatch))
{
yield return new IsotopicEnvelope(experimentalSpectrum, pwsmMatch, bestFoundScore);
}
else
{
//TODO: Add some averagine bullshit here
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Haha this one's almost worth framing

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you still need to add something here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not on this PR. That would come in subsequent iterations

}

}
}
}

/// <summary>
/// Populates the EnvelopeDictionary by digesting each protein in the parameters into PeptideWithSetMods,
/// then calculating an isotopic envelope for each charge state from min to max assumed charge state
/// </summary>
private void FindLibraryEnvelopes()
{
EnvelopeDictionary = new();


//TODO: Parallelize this section of the code
foreach (Protein protein in SpectralParams.Proteins)
{
// I'm not sure if calling protein.Digest within the foreach statement would call the method anew for every loop
IEnumerable<PeptideWithSetModifications> uniquePeptides = protein.Digest(
SpectralParams.DigestionParams, SpectralParams.FixedModifications,
SpectralParams.VariableModifications, SpectralParams.SilacLabels,
topDownTruncationSearch: SpectralParams.FindTopDownTruncationProducts);

foreach (PeptideWithSetModifications pwsm in uniquePeptides)
{
EnvelopeDictionary.Add(pwsm, new List<IsotopicEnvelope>());
IsotopicDistribution pwsmDistribution = IsotopicDistribution.GetDistribution(pwsm.FullChemicalFormula,
fineResolution: SpectralParams.FineResolutionForIsotopicDistribution,
minProbability: SpectralParams.MinProbabilityForIsotopicDistribution);

// iterates through all possible charge states, from largest to smallest.
// Any isotopic envelope whose most abundant peak would fall within the scan range is written to the envelope dictionary
// Once the mass to charge ratio of the most abundant peak is greater than the scan range maximum, the loop breaks
for (int charge = SpectralParams.MaxAssumedChargeState;
charge >= SpectralParams.MinAssumedChargeState;
charge--)
{
double theoreticalMz = pwsm.MostAbundantMass.ToMz(charge);
if (SpectralParams.ScanRange.Contains(theoreticalMz))
{
EnvelopeDictionary[pwsm].Add(new
IsotopicEnvelope(pwsmDistribution, charge, SpectralParams.AmbiguityThresholdForIsotopicDistribution));
}
else if (SpectralParams.ScanRange.CompareTo(theoreticalMz) < 0)
{
break;
}
}
}

}
}

/// <summary>
/// For each envelope in Envelope Dictionary, indexes it according to mass and charge,
/// resulting in a 2D array of lists of minimal spectra
/// </summary>
private void IndexEnvelopes()
{

int numberOfBinsForIndexing = (int) (SpectralParams.ScanRange.Width * SpectralParams.BinsPerDalton).Ceiling(0);
IndexedLibrarySpectra = new List<MinimalSpectrum>[numberOfBinsForIndexing,
SpectralParams.MaxAssumedChargeState + 1 - SpectralParams.MinAssumedChargeState];
SpectrumIndexToPwsmMap = new();

foreach (var keyValuePair in EnvelopeDictionary)
{
foreach (IsotopicEnvelope envelope in keyValuePair.Value)
{
int massBinIndex = (int)Math.Floor((envelope.MostAbundantObservedIsotopicMz - SpectralParams.ScanRange.Minimum) *
SpectralParams.BinsPerDalton);
int chargeBinIndex = envelope.Charge - SpectralParams.MinAssumedChargeState;
if (IndexedLibrarySpectra[massBinIndex, chargeBinIndex] == null)
{
IndexedLibrarySpectra[massBinIndex, chargeBinIndex] = new();
}
MinimalSpectrum envelopeMinimalSpectrum = new MinimalSpectrum(envelope.MzArray, envelope.IntensityArray, envelope.Charge);
IndexedLibrarySpectra[massBinIndex, chargeBinIndex].Add(envelopeMinimalSpectrum);
SpectrumIndexToPwsmMap.Add(
(massBinIndex, chargeBinIndex, IndexedLibrarySpectra[massBinIndex, chargeBinIndex].Count - 1), // tuple consisting of bin index (mass, charge) and list position of MinimalSpectrum object
keyValuePair.Key // tuple consisting of PeptideWithSetMods and charge state
);

// In situations where the most abundant isotope frequency is close to the second most abundant isotope's frequency
// ( ratio >= IsotopicEnvelope.AmbiguityRatioMinimum),
// The Spectrum is stored in the index of the second most abundant isotope as well
if(envelope.SecondMostAbundantObservedIsotopicMz > 0 )
{
// Ceiling or floor????
int secondBinIndex = (int)Math.Floor(
((double)envelope.SecondMostAbundantObservedIsotopicMz - SpectralParams.ScanRange.Minimum ) * SpectralParams.BinsPerDalton);
if (secondBinIndex != massBinIndex)
{
if (IndexedLibrarySpectra[secondBinIndex, chargeBinIndex] == null) IndexedLibrarySpectra[secondBinIndex, chargeBinIndex] = new();
IndexedLibrarySpectra[secondBinIndex, chargeBinIndex].Add(envelopeMinimalSpectrum);
SpectrumIndexToPwsmMap.Add(
(secondBinIndex, chargeBinIndex, IndexedLibrarySpectra[secondBinIndex, chargeBinIndex].Count - 1),
keyValuePair.Key
);
}
}
}
}
}

/// <summary>
/// Iterates through all peaks in a spectrum to find all potential isotopic envelopes.
/// It does this by examining the spacing of peaks in the m/z domain
/// e.g. for charge of 2, a peak at 200 m/z would result in a search for a peak at 200.5 and 201 m/z
/// if either is found, the process continues until SpectralParams.MaxConsecutiveMissedIsotopicPeaks number of consecutive
/// isotope peaks are missed
/// Anything consistent with an isotopic envelope in a given charge state is stored in the dictionary
/// </summary>
/// <param name="spectrum"></param>
/// <returns></returns>
private Dictionary<int, List<MinimalSpectrum>> FindPotentialEnvelopes(MzSpectrum spectrum)
{

// For each charge state (key) store the indices corresponding to every potential isotopic envelope (value)
Dictionary<int, List<MinimalSpectrum>> potentialEnvelopes = new();

for (int charge = SpectralParams.MinAssumedChargeState; charge <= SpectralParams.MaxAssumedChargeState; charge++)
{
List<int> indicesOfKnownPeaks = new();

// Spectrum Search Loop
for (int i = 0; i < spectrum.Size; i++)
{
if (indicesOfKnownPeaks.Contains(i))
{
continue;
}
List<int> envelopeIndices = new();
envelopeIndices.Add(i);

// Envelope Search Loop
for (int j = i + 1; j < spectrum.Size; j++)
{
if (PpmTolerance.Within(spectrum.XArray[j],
spectrum.XArray[envelopeIndices.Last()] + Constants.C13MinusC12 / charge))
{
envelopeIndices.Add(j);
}
else if (spectrum.XArray[j] > PpmTolerance.GetMaximumValue(spectrum.XArray[envelopeIndices.Last()] +
(1 + SpectralParams.MaxConsecutiveMissedIsotopicPeaks) * Constants.C13MinusC12 / charge))
{
// exit the Envelope loop if we missed more consecutive isotopic peaks than were allowed
break;
}
}

// Convert to MinimalSpectrum here? Write helper function to do so?
if (envelopeIndices.Count > 1)
{
if (!potentialEnvelopes.ContainsKey(charge)) potentialEnvelopes.Add(charge, new());
potentialEnvelopes[charge].Add(GetMinimalSpectrumFromIndices(spectrum, envelopeIndices, charge));
indicesOfKnownPeaks.AddRange(envelopeIndices);
}
}
}

return potentialEnvelopes;
}


private static MinimalSpectrum GetMinimalSpectrumFromIndices(MzSpectrum spectrum, List<int> indices, int charge = 0)
{
double[] mzArray = new double[indices.Count];
double[] intensityArray = new double[indices.Count];
for (int i = 0; i < indices.Count; i++)
{
mzArray[i] = spectrum.XArray[indices[i]];
intensityArray[i] = spectrum.YArray[indices[i]];
}

return new MinimalSpectrum(mzArray, intensityArray, charge);
}

}
}
Loading