Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scattered Optimizations #815

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ internal override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrumT
yield break;
}

var isolatedMassesAndCharges = new List<IsotopicEnvelope>();
// start with different sizes for ms1 and ms2 deconvolution
var isolatedMassesAndCharges = range.Width > 10
? new List<IsotopicEnvelope>(16)
: new List<IsotopicEnvelope>(128);

(int start, int end) indexes = ExtractIndices(range.Minimum, range.Maximum);

Expand All @@ -48,6 +51,7 @@ internal override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrumT

//go through each peak in the selected range and assume it is the most intense peak of its isotopic envelope (if it's not, it will hopefully get a low score)
//cycle through possible charge states and select the one that has the best score (fit) with the averagine model
HashSet<int> allPossibleChargeStates = IntegerHashSetPool.Get();
for (int candidateForMostIntensePeakIndex = indexes.start;
candidateForMostIntensePeakIndex < indexes.end;
candidateForMostIntensePeakIndex++)
Expand All @@ -61,7 +65,7 @@ internal override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrumT
double candidateForMostIntensePeakMz = spectrum.XArray[candidateForMostIntensePeakIndex];

//Find what charge states this peak might be based on the spacing of nearby peaks (assumes isotopic resolution)
HashSet<int> allPossibleChargeStates = new HashSet<int>();
allPossibleChargeStates.Clear();
for (int i = candidateForMostIntensePeakIndex + 1;
i < spectrum.XArray.Length;
i++) //look at peaks of higher m/z
Expand Down Expand Up @@ -149,7 +153,9 @@ internal override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrumT
}
}

HashSet<double> seen = new HashSet<double>();
IntegerHashSetPool.Return(allPossibleChargeStates);

HashSet<double> seen = DoubleHashSetPool.Get();
foreach (var ok in isolatedMassesAndCharges.OrderByDescending(b => b.Score))
{
if (seen.Overlaps(ok.Peaks.Select(b => b.mz)))
Expand All @@ -162,15 +168,17 @@ internal override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrumT
}
yield return ok;
}
DoubleHashSetPool.Return(seen);
}

private IsotopicEnvelope FindIsotopicEnvelope(int massIndex, double candidateForMostIntensePeakMz, double candidateForMostIntensePeakIntensity, double testMostIntenseMass, int chargeState, double deconvolutionTolerancePpm, double intensityRatioLimit, List<double> monoisotopicMassPredictions)
{
double[] theoreticalMasses = allMasses[massIndex];
double[] theoreticalIntensities = allIntensities[massIndex];
//add "most intense peak"
var listOfObservedPeaks = new List<(double, double)> { (candidateForMostIntensePeakMz, candidateForMostIntensePeakIntensity) };
var listOfRatios = new List<double> { theoreticalIntensities[0] / candidateForMostIntensePeakIntensity }; // theoreticalIntensities and theoreticalMasses are sorted by intensity, so first is most intense
var listOfObservedPeaks = new List<(double, double)>(32) { (candidateForMostIntensePeakMz, candidateForMostIntensePeakIntensity) };
var listOfRatios = DoubleHashSetPool.Get();
listOfRatios.Add(theoreticalIntensities[0] / candidateForMostIntensePeakIntensity); // theoreticalIntensities and theoreticalMasses are sorted by intensity, so first is most intense
// Assuming the test peak is most intense...
// Try to find the rest of the isotopes!
double differenceBetweenTheorAndActualMass = testMostIntenseMass - theoreticalMasses[0]; //mass difference actual-theoretical for the tallest peak (not necessarily the monoisotopic)
Expand Down Expand Up @@ -202,7 +210,9 @@ private IsotopicEnvelope FindIsotopicEnvelope(int massIndex, double candidateFor
}
}

return new IsotopicEnvelope(listOfObservedPeaks, monoisotopicMass, chargeState, totalIntensity, listOfRatios.StandardDeviation());
var stDev = listOfRatios.StandardDeviation();
DoubleHashSetPool.Return(listOfRatios);
return new IsotopicEnvelope(listOfObservedPeaks, monoisotopicMass, chargeState, totalIntensity, stDev);
}

private int ObserveAdjacentChargeStates(IsotopicEnvelope originalEnvelope, double mostIntensePeakMz, int massIndex, double deconvolutionTolerancePpm, double intensityRatioLimit, double minChargeToLookFor, double maxChargeToLookFor, List<double> monoisotopicMassPredictions)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Chemistry;
using MzLibUtil;

Expand Down Expand Up @@ -65,6 +63,9 @@ static DeconvolutionAlgorithm()
#endregion

protected readonly DeconvolutionParameters DeconvolutionParameters;
protected readonly HashSetPool<int> IntegerHashSetPool = new(32);
protected readonly HashSetPool<double> DoubleHashSetPool = new(32);


/// <summary>
/// Constructor for deconvolution algorithms, nothing should be added to child constructors
Expand Down
59 changes: 59 additions & 0 deletions mzLib/MzLibUtil/DictionaryPool.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
using Microsoft.Extensions.ObjectPool;
using System.Collections.Generic;
using System;

namespace MzLibUtil;

// var dictionaryPool = new DictionaryPool<string, int>();
// var dictionary = dictionaryPool.Get();
// dictionary["key"] = 42;
// do work with dictionary
// dictionaryPool.Return(dictionary);

public class DictionaryPool<TKey, TValue>
{
private readonly ObjectPool<Dictionary<TKey, TValue>> _pool;

/// <summary>
/// Initializes a new instance of the <see cref="DictionaryPool{TKey, TValue}"/> class.
/// </summary>
/// <param name="initialCapacity">Initial capacity for the pooled Dictionary instances.</param>
public DictionaryPool(int initialCapacity = 16)
{
var policy = new DictionaryPooledObjectPolicy<TKey, TValue>(initialCapacity);
_pool = new DefaultObjectPool<Dictionary<TKey, TValue>>(policy);
}

/// <summary>
/// Retrieves a Dictionary instance from the pool.
/// </summary>
/// <returns>A Dictionary instance.</returns>
public Dictionary<TKey, TValue> Get() => _pool.Get();

/// <summary>
/// Returns a Dictionary instance back to the pool.
/// </summary>
/// <param name="dictionary">The Dictionary instance to return.</param>
public void Return(Dictionary<TKey, TValue> dictionary)
{
if (dictionary == null) throw new ArgumentNullException(nameof(dictionary));
dictionary.Clear(); // Ensure the Dictionary is clean before returning it to the pool
_pool.Return(dictionary);
}

private class DictionaryPooledObjectPolicy<TKeyItem, TValueItem>(int initialCapacity)
: PooledObjectPolicy<Dictionary<TKeyItem, TValueItem>>
{
public override Dictionary<TKeyItem, TValueItem> Create()
{
return new Dictionary<TKeyItem, TValueItem>(capacity: initialCapacity);
}

public override bool Return(Dictionary<TKeyItem, TValueItem> obj)
{
// Ensure the Dictionary can be safely reused
obj.Clear();
return true;
}
}
}
64 changes: 64 additions & 0 deletions mzLib/MzLibUtil/HashSetPool.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
using System;
using System.Collections.Generic;
using Microsoft.Extensions.ObjectPool;

namespace MzLibUtil;

// Example Usage:
// var pool = new HashSetPool<int>();
// var hashSet = pool.Get();
// hashSet.Add(1);
// Do Work
// pool.Return(hashSet);

// Used to pool HashSet instances to reduce memory allocations
public class HashSetPool<T>
{
private readonly ObjectPool<HashSet<T>> _pool;

/// <summary>
/// Initializes a new instance of the <see cref="HashSetPool{T}"/> class.
/// </summary>
/// <param name="initialCapacity">Initial capacity for the pooled HashSet instances.</param>
public HashSetPool(int initialCapacity = 16)
{
var policy = new HashSetPooledObjectPolicy<T>(initialCapacity);
_pool = new DefaultObjectPool<HashSet<T>>(policy);
}

/// <summary>
/// Retrieves a HashSet instance from the pool.
/// </summary>
/// <returns>A HashSet instance.</returns>
public HashSet<T> Get() => _pool.Get();

/// <summary>
/// Returns a HashSet instance back to the pool.
/// </summary>
/// <param name="hashSet">The HashSet instance to return.</param>
public void Return(HashSet<T> hashSet)
{
if (hashSet == null) throw new ArgumentNullException(nameof(hashSet));
hashSet.Clear(); // Ensure the HashSet is clean before returning it to the pool
_pool.Return(hashSet);
}

private class HashSetPooledObjectPolicy<TItem>(int initialCapacity) : PooledObjectPolicy<HashSet<TItem>>
{
public override HashSet<TItem> Create()
{
return new HashSet<TItem>(capacity: initialCapacity);
}

public override bool Return(HashSet<TItem> obj)
{
// Ensure the HashSet can be safely reused
obj.Clear();
return true;
}
}
}




1 change: 1 addition & 0 deletions mzLib/MzLibUtil/MzLibUtil.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
<PackageReference Include="CsvHelper" Version="32.0.3" />
<PackageReference Include="Easy.Common" Version="6.7.0" />
<PackageReference Include="MathNet.Numerics" Version="5.0.0" />
<PackageReference Include="Microsoft.Extensions.ObjectPool" Version="9.0.0" />
<PackageReference Include="Microsoft.Win32.Registry" Version="5.0.0" />
</ItemGroup>

Expand Down
29 changes: 12 additions & 17 deletions mzLib/MzLibUtil/PpmTolerance.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,40 +25,35 @@ namespace MzLibUtil
/// </summary>
public class PpmTolerance : Tolerance
{
private readonly double _factor;

/// <summary>
/// Creates a new tolerance given a unit, value, and whether the tolerance is ±
/// Creates a new tolerance given value
/// </summary>
/// <param name="unit">The units for this tolerance</param>
/// <param name="value">The numerical value of the tolerance</param>
public PpmTolerance(double value)
public PpmTolerance(double value)
: base(value)
{
_factor = value / 1e6;
}

public override string ToString()
{
return $"{"±"}{Value.ToString("f4", System.Globalization.CultureInfo.InvariantCulture)} PPM";
}
public override string ToString() => $"\u00b1{Value.ToString("f4", System.Globalization.CultureInfo.InvariantCulture)} PPM";

public override DoubleRange GetRange(double mean)
{
double tol = Value * mean / 1e6;
double tol = _factor * mean;
return new DoubleRange(mean - tol, mean + tol);
}

public override double GetMinimumValue(double mean)
{
return mean * (1 - (Value / 1e6));
}
public override double GetMinimumValue(double mean) => mean * (1 - _factor);

public override double GetMaximumValue(double mean)
{
return mean * (1 + (Value / 1e6));
}
public override double GetMaximumValue(double mean) => mean * (1 + _factor);

public override bool Within(double experimental, double theoretical)
{
return Math.Abs((experimental - theoretical) / theoretical * 1e6) <= Value;
double diff = experimental - theoretical;
double scaledTolerance = theoretical * _factor;
return -scaledTolerance <= diff && diff <= scaledTolerance;
}
}
}
34 changes: 19 additions & 15 deletions mzLib/Omics/BioPolymerWithSetModsExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
var subsequence = new StringBuilder();

// modification on peptide N-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod))
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification? mod))
{
subsequence.Append('[' + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
subsequence.Append($"[{mod.MonoisotopicMass.RoundedDouble(6)}]");
}

for (int r = 0; r < withSetMods.Length; r++)
Expand All @@ -32,11 +32,11 @@
{
if (mod.MonoisotopicMass > 0)
{
subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
subsequence.Append($"[+{mod.MonoisotopicMass.RoundedDouble(6)}]");
}
else
{
subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
subsequence.Append($"[{mod.MonoisotopicMass.RoundedDouble(6)}]");

Check warning on line 39 in mzLib/Omics/BioPolymerWithSetModsExtensions.cs

View check run for this annotation

Codecov / codecov/patch

mzLib/Omics/BioPolymerWithSetModsExtensions.cs#L39

Added line #L39 was not covered by tests
}
}
}
Expand All @@ -46,11 +46,11 @@
{
if (mod.MonoisotopicMass > 0)
{
subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
subsequence.Append($"[+{mod.MonoisotopicMass.RoundedDouble(6)}]");
}
else
{
subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
subsequence.Append($"[{mod.MonoisotopicMass.RoundedDouble(6)}]");
}
}
return subsequence.ToString();
Expand All @@ -68,14 +68,15 @@
string essentialSequence = withSetMods.BaseSequence;
if (modstoWritePruned != null)
{
var sbsequence = new StringBuilder();
var sbsequence = new StringBuilder(withSetMods.FullSequence.Length);

// variable modification on peptide N-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification pep_n_term_variable_mod))
{
if (modstoWritePruned.ContainsKey(pep_n_term_variable_mod.ModificationType))
{
sbsequence.Append('[' + pep_n_term_variable_mod.ModificationType + ":" + pep_n_term_variable_mod.IdWithMotif + ']');
sbsequence.Append(
$"[{pep_n_term_variable_mod.ModificationType}:{pep_n_term_variable_mod.IdWithMotif}]");
}
}
for (int r = 0; r < withSetMods.Length; r++)
Expand All @@ -86,7 +87,8 @@
{
if (modstoWritePruned.ContainsKey(residue_variable_mod.ModificationType))
{
sbsequence.Append('[' + residue_variable_mod.ModificationType + ":" + residue_variable_mod.IdWithMotif + ']');
sbsequence.Append(
$"[{residue_variable_mod.ModificationType}:{residue_variable_mod.IdWithMotif}]");
}
}
}
Expand All @@ -96,7 +98,8 @@
{
if (modstoWritePruned.ContainsKey(pep_c_term_variable_mod.ModificationType))
{
sbsequence.Append('[' + pep_c_term_variable_mod.ModificationType + ":" + pep_c_term_variable_mod.IdWithMotif + ']');
sbsequence.Append(
$"[{pep_c_term_variable_mod.ModificationType}:{pep_c_term_variable_mod.IdWithMotif}]");
}
}

Expand All @@ -112,12 +115,13 @@
/// <returns></returns>
public static string DetermineFullSequence(this IBioPolymerWithSetMods withSetMods)
{
var subSequence = new StringBuilder();
// start string builder with initial capacity to avoid resizing costs.
var subSequence = new StringBuilder(withSetMods.BaseSequence.Length + withSetMods.AllModsOneIsNterminus.Count * 30);

// modification on peptide N-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod))
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification? mod))
{
subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']');
subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
}

for (int r = 0; r < withSetMods.Length; r++)
Expand All @@ -127,14 +131,14 @@
// modification on this residue
if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out mod))
{
subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']');
subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
}
}

// modification on peptide C-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out mod))
{
subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']');
subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
}

return subSequence.ToString();
Expand Down
Loading
Loading