Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Micro optimizations #813

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ internal override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrumT
yield break;
}

var isolatedMassesAndCharges = new List<IsotopicEnvelope>();
var isolatedMassesAndCharges = new List<IsotopicEnvelope>(10);

(int start, int end) indexes = ExtractIndices(range.Minimum, range.Maximum);

Expand All @@ -48,6 +48,7 @@ internal override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrumT

//go through each peak in the selected range and assume it is the most intense peak of its isotopic envelope (if it's not, it will hopefully get a low score)
//cycle through possible charge states and select the one that has the best score (fit) with the averagine model
HashSet<int> allPossibleChargeStates = new HashSet<int>();
for (int candidateForMostIntensePeakIndex = indexes.start;
candidateForMostIntensePeakIndex < indexes.end;
candidateForMostIntensePeakIndex++)
Expand All @@ -61,7 +62,7 @@ internal override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrumT
double candidateForMostIntensePeakMz = spectrum.XArray[candidateForMostIntensePeakIndex];

//Find what charge states this peak might be based on the spacing of nearby peaks (assumes isotopic resolution)
HashSet<int> allPossibleChargeStates = new HashSet<int>();
allPossibleChargeStates.Clear();
for (int i = candidateForMostIntensePeakIndex + 1;
i < spectrum.XArray.Length;
i++) //look at peaks of higher m/z
Expand Down Expand Up @@ -169,8 +170,9 @@ private IsotopicEnvelope FindIsotopicEnvelope(int massIndex, double candidateFor
double[] theoreticalMasses = allMasses[massIndex];
double[] theoreticalIntensities = allIntensities[massIndex];
//add "most intense peak"
var listOfObservedPeaks = new List<(double, double)> { (candidateForMostIntensePeakMz, candidateForMostIntensePeakIntensity) };
var listOfRatios = new List<double> { theoreticalIntensities[0] / candidateForMostIntensePeakIntensity }; // theoreticalIntensities and theoreticalMasses are sorted by intensity, so first is most intense
int estimatedSize = theoreticalIntensities.Length;
var listOfObservedPeaks = new List<(double, double)>(estimatedSize) { (candidateForMostIntensePeakMz, candidateForMostIntensePeakIntensity) };
var listOfRatios = new List<double>(estimatedSize) { theoreticalIntensities[0] / candidateForMostIntensePeakIntensity }; // theoreticalIntensities and theoreticalMasses are sorted by intensity, so first is most intense
// Assuming the test peak is most intense...
// Try to find the rest of the isotopes!
double differenceBetweenTheorAndActualMass = testMostIntenseMass - theoreticalMasses[0]; //mass difference actual-theoretical for the tallest peak (not necessarily the monoisotopic)
Expand Down
31 changes: 13 additions & 18 deletions mzLib/MzLibUtil/PpmTolerance.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,40 +25,35 @@ namespace MzLibUtil
/// </summary>
public class PpmTolerance : Tolerance
{
private readonly double _factor;

/// <summary>
/// Creates a new tolerance given a unit, value, and whether the tolerance is ±
/// Creates a new tolerance given value
/// </summary>
/// <param name="unit">The units for this tolerance</param>
/// <param name="value">The numerical value of the tolerance</param>
public PpmTolerance(double value)
public PpmTolerance(double value)
: base(value)
{
_factor = value / 1e6;
}

public override string ToString()
{
return $"{"±"}{Value.ToString("f4", System.Globalization.CultureInfo.InvariantCulture)} PPM";
}
public override string ToString() => $"\u00b1{Value.ToString("f4", System.Globalization.CultureInfo.InvariantCulture)} PPM";

public override DoubleRange GetRange(double mean)
{
double tol = Value * mean / 1e6;
double tol = _factor * mean;
return new DoubleRange(mean - tol, mean + tol);
}

public override double GetMinimumValue(double mean)
{
return mean * (1 - (Value / 1e6));
}
public override double GetMinimumValue(double mean) => mean * (1 - _factor);

public override double GetMaximumValue(double mean)
{
return mean * (1 + (Value / 1e6));
}
public override double GetMaximumValue(double mean) => mean * (1 + _factor);

public override bool Within(double experimental, double theoretical)
{
return Math.Abs((experimental - theoretical) / theoretical * 1e6) <= Value;
double diff = experimental - theoretical;
double scaledTolerance = theoretical * _factor;
return -scaledTolerance <= diff && diff <= scaledTolerance;
}
}
}
}
3 changes: 2 additions & 1 deletion mzLib/Omics/BioPolymerWithSetModsExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ public static string EssentialSequence(this IBioPolymerWithSetMods withSetMods,
/// <returns></returns>
public static string DetermineFullSequence(this IBioPolymerWithSetMods withSetMods)
{
var subSequence = new StringBuilder();
// start string builder with initial capacity to avoid resizing costs.
var subSequence = new StringBuilder(withSetMods.BaseSequence.Length + withSetMods.AllModsOneIsNterminus.Count * 20);

// modification on peptide N-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod))
Expand Down
44 changes: 24 additions & 20 deletions mzLib/Omics/Digestion/DigestionProduct.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,17 @@ protected static IEnumerable<Dictionary<int, Modification>> GetVariableModificat
var possible_variable_modifications = new Dictionary<int, List<Modification>>(possibleVariableModifications);

int[] base_variable_modification_pattern = new int[peptideLength + 4];
var totalAvailableMods = possible_variable_modifications.Sum(b => b.Value == null ? 0 : b.Value.Count);
for (int variable_modifications = 0; variable_modifications <= Math.Min(totalAvailableMods, maxModsForPeptide); variable_modifications++)
int totalAvailableMods = 0;
foreach (var kvp in possible_variable_modifications)
{
if (kvp.Value != null)
{
totalAvailableMods += kvp.Value.Count;
}
}

int maxVariableMods = Math.Min(totalAvailableMods, maxModsForPeptide);
for (int variable_modifications = 0; variable_modifications <= maxVariableMods; variable_modifications++)
{
foreach (int[] variable_modification_pattern in GetVariableModificationPatterns(new List<KeyValuePair<int, List<Modification>>>(possible_variable_modifications),
possible_variable_modifications.Count - variable_modifications, base_variable_modification_pattern, 0))
Expand All @@ -76,18 +85,17 @@ protected Dictionary<int, Modification> GetFixedModsOneIsNorFivePrimeTerminus(in
case "Oligo 5'-terminal.":
case "N-terminal.":
case "Peptide N-terminal.":

//the modification is protease associated and is applied to the n-terminal cleaved residue, not at the beginign of the protein
if (mod.ModificationType == "Protease" && ModificationLocalization.ModFits(mod, Parent.BaseSequence, 1, length, OneBasedStartResidue))
if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, 1, length, OneBasedStartResidue))
{
if (OneBasedStartResidue != 1)
if (mod.ModificationType == "Protease")
{
fixedModsOneIsNterminus[2] = mod;
if (OneBasedStartResidue != 1)
fixedModsOneIsNterminus[2] = mod;
}
}
//Normal N-terminal peptide modification
else if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, 1, length, OneBasedStartResidue))
{
fixedModsOneIsNterminus[1] = mod;
else //Normal N-terminal peptide modification
fixedModsOneIsNterminus[1] = mod;
}
break;

Expand All @@ -106,17 +114,15 @@ protected Dictionary<int, Modification> GetFixedModsOneIsNorFivePrimeTerminus(in
case "C-terminal.":
case "Peptide C-terminal.":
//the modification is protease associated and is applied to the c-terminal cleaved residue, not if it is at the end of the protein
if (mod.ModificationType == "Protease" && ModificationLocalization.ModFits(mod, Parent.BaseSequence, length, length, OneBasedStartResidue + length - 1))
if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, length, length, OneBasedStartResidue + length - 1))
{
if (OneBasedEndResidue != Parent.Length)
if (mod.ModificationType == "Protease")
{
fixedModsOneIsNterminus[length + 1] = mod;
if (OneBasedEndResidue != Parent.Length)
fixedModsOneIsNterminus[length + 1] = mod;
}
}
//Normal C-terminal peptide modification
else if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, length, length, OneBasedStartResidue + length - 1))
{
fixedModsOneIsNterminus[length + 2] = mod;
else //Normal C-terminal peptide modification
fixedModsOneIsNterminus[length + 2] = mod;
}
break;

Expand Down Expand Up @@ -188,7 +194,5 @@ private static Dictionary<int, Modification> GetNewVariableModificationPattern(i

return modification_pattern;
}


}
}
67 changes: 36 additions & 31 deletions mzLib/Omics/Modifications/ModificationLocalization.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,42 @@
{
public static class ModificationLocalization
{
// This method is called a ton in MetaMorpheus. If changes are made, ensure they are efficient.
public static bool ModFits(Modification attemptToLocalize, string sequence, int digestionProductOneBasedIndex, int digestionProductLength, int bioPolymerOneBasedIndex)
{
// First find the capital letter...
var motif = attemptToLocalize.Target;
var motifStartLocation = motif.ToString().IndexOf(motif.ToString().First(b => char.IsUpper(b)));
var motif = attemptToLocalize.Target.ToString();
var motifStartLocation = motif.IndexOf(motif.First(char.IsUpper));

// Look up starting at and including the capital letter
var proteinToMotifOffset = bioPolymerOneBasedIndex - motifStartLocation - 1;
var indexUp = 0;
while (indexUp < motif.ToString().Length)
var motifLength = motif.Length;

for (int indexUp = 0; indexUp < motifLength; indexUp++)
{
if (indexUp + proteinToMotifOffset < 0 || indexUp + proteinToMotifOffset >= sequence.Length
|| !MotifMatches(motif.ToString()[indexUp], sequence[indexUp + proteinToMotifOffset]))
int sequenceIndex = indexUp + proteinToMotifOffset;
if (sequenceIndex < 0 || sequenceIndex >= sequence.Length || !MotifMatches(motif[indexUp], sequence[sequenceIndex]))
{
return false;
}
indexUp++;
}
switch (attemptToLocalize.LocationRestriction)
{
case "N-terminal." when bioPolymerOneBasedIndex > 2:
case "Peptide N-terminal." when digestionProductOneBasedIndex > 1:
case "C-terminal." when bioPolymerOneBasedIndex < sequence.Length:
case "Peptide C-terminal." when digestionProductOneBasedIndex < digestionProductLength:
case "5'-terminal." when bioPolymerOneBasedIndex > 2:
// first residue in oligo but not first in nucleic acid
case "Oligo 5'-terminal." when digestionProductOneBasedIndex > 1
|| bioPolymerOneBasedIndex == 1:
case "3'-terminal." when bioPolymerOneBasedIndex < sequence.Length:
// not the last residue in oligo but not in nucleic acid
case "Oligo 3'-terminal." when digestionProductOneBasedIndex < digestionProductLength
|| bioPolymerOneBasedIndex == sequence.Length:
return false;

default:
return attemptToLocalize.LocationRestriction switch
{
"N-terminal." when bioPolymerOneBasedIndex > 2 => false,
"Peptide N-terminal." when digestionProductOneBasedIndex > 1 => false,
"C-terminal." when bioPolymerOneBasedIndex < sequence.Length => false,
"Peptide C-terminal." when digestionProductOneBasedIndex < digestionProductLength => false,
"5'-terminal." when bioPolymerOneBasedIndex > 2 => false,
// first residue in oligo but not first in nucleic acid
"Oligo 5'-terminal." when digestionProductOneBasedIndex > 1 || bioPolymerOneBasedIndex == 1 => false,
"3'-terminal." when bioPolymerOneBasedIndex < sequence.Length => false,
// last residue in oligo but not in nucleic acid
"Oligo 3'-terminal." when digestionProductOneBasedIndex < digestionProductLength || bioPolymerOneBasedIndex == sequence.Length => false,
// I guess Anywhere. and Unassigned. are true since how do you localize anywhere or unassigned.

return true;
}
_ => true,
};
}

public static bool UniprotModExists(IBioPolymer bioPolymer, int i, Modification attemptToLocalize)
Expand All @@ -56,11 +53,19 @@ public static bool UniprotModExists(IBioPolymer bioPolymer, int i, Modification
private static bool MotifMatches(char motifChar, char sequenceChar)
{
char upperMotifChar = char.ToUpper(motifChar);
return upperMotifChar.Equals('X')
|| upperMotifChar.Equals(sequenceChar)
|| upperMotifChar.Equals('B') && new[] { 'D', 'N' }.Contains(sequenceChar)
|| upperMotifChar.Equals('J') && new[] { 'I', 'L' }.Contains(sequenceChar)
|| upperMotifChar.Equals('Z') && new[] { 'E', 'Q' }.Contains(sequenceChar);
switch (upperMotifChar)
{
case 'X':
return true;
case 'B':
return sequenceChar is 'D' or 'N';
case 'J':
return sequenceChar is 'I' or 'L';
case 'Z':
return sequenceChar is 'E' or 'Q';
default:
return upperMotifChar == sequenceChar;
}
}
}
}
Loading