From fed869ffecceb737c2a57cbd3de2772a7723fa56 Mon Sep 17 00:00:00 2001 From: Nic Bollis Date: Thu, 12 Dec 2024 13:19:43 -0600 Subject: [PATCH] Averaging slight adjustement for ensured thread safety (#810) * Refactor for thread safety and performance - Import `System.Collections.Concurrent` for concurrent collections. - Change `averagedPeaks` from `List` to `ConcurrentBag` for thread safety. - Move `binIncidences` array creation outside parallel loop to avoid redundancy. - Modify `Parallel.ForEach` loop to use array length instead of list count. - Remove `lock` around `averagedPeaks.Add` as `ConcurrentBag` is thread-safe. * Test Project cleanup * used a partitioner * Oopsie * Removed bag as it was an unnecessry structure --- .../Algorithms/SpectraAveraging.cs | 24 +++++++++---------- .../Util/SpectralAveragingParameters.cs | 4 ++-- mzLib/Test/AveragingTests/TestAveraging.cs | 2 +- .../AveragingTests/TestAveragingExtensions.cs | 2 +- .../TestAveragingSpectraWriteFile.cs | 2 +- .../TestData}/TDYeastFractionMMResult.psmtsv | 0 .../TestData}/TDYeastFractionMS1.mzML | 0 .../TestSpectraFileAveraging.cs | 2 +- mzLib/Test/Test.csproj | 4 ++-- 9 files changed, 19 insertions(+), 21 deletions(-) rename mzLib/Test/{AveragingTestData => AveragingTests/TestData}/TDYeastFractionMMResult.psmtsv (100%) rename mzLib/Test/{AveragingTestData => AveragingTests/TestData}/TDYeastFractionMS1.mzML (100%) diff --git a/mzLib/SpectralAveraging/Algorithms/SpectraAveraging.cs b/mzLib/SpectralAveraging/Algorithms/SpectraAveraging.cs index 0bf3f7230..b93d473ab 100644 --- a/mzLib/SpectralAveraging/Algorithms/SpectraAveraging.cs +++ b/mzLib/SpectralAveraging/Algorithms/SpectraAveraging.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; @@ -39,8 +40,7 @@ public static double[][] AverageSpectra(double[][] xArrays, double[][] yArrays, /// yArrays of spectra to be averaged /// how to perform the averaging /// - private static double[][] MzBinning(double[][] xArrays, double[][] yArrays, - SpectralAveragingParameters parameters) + private static double[][] MzBinning(double[][] xArrays, double[][] yArrays, SpectralAveragingParameters parameters) { // get tics var tics = yArrays.Select(p => p.Sum()).ToArray(); @@ -56,21 +56,20 @@ private static double[][] MzBinning(double[][] xArrays, double[][] yArrays, var weights = SpectralWeighting.CalculateSpectraWeights(xArrays, yArrays, parameters.SpectralWeightingType); // reject outliers and average bins - List<(double mz, double intensity)> averagedPeaks = new(); - Parallel.ForEach(Enumerable.Range(0, parameters.MaxThreadsToUsePerFile), (iterationIndex) => - { - // each bin index that contains peaks - var binIncidences = bins.Keys.ToList(); + var binIncidences = bins.Keys.ToList(); + (double mz, double intensity)[] averagedPeaks = new (double, double)[binIncidences.Count]; + var partitioner = Partitioner.Create(0, binIncidences.Count); - // iterate through each bin index which contains peaks - for (; iterationIndex < binIncidences.Count; iterationIndex += parameters.MaxThreadsToUsePerFile) + Parallel.ForEach(partitioner, new ParallelOptions { MaxDegreeOfParallelism = parameters.MaxThreadsToUsePerFile }, (range, state) => + { + for (int i = range.Item1; i < range.Item2; i++) { - var peaksFromBin = bins[binIncidences[iterationIndex]]; + var peaksFromBin = bins[binIncidences[i]]; peaksFromBin = OutlierRejection.RejectOutliers(peaksFromBin, parameters); if (!peaksFromBin.Any()) continue; - lock (averagedPeaks) - averagedPeaks.Add(AverageBin(peaksFromBin, weights)); + + averagedPeaks[i] = AverageBin(peaksFromBin, weights); } }); @@ -86,7 +85,6 @@ private static double[][] MzBinning(double[][] xArrays, double[][] yArrays, }; } - #region Helpers /// diff --git a/mzLib/SpectralAveraging/Util/SpectralAveragingParameters.cs b/mzLib/SpectralAveraging/Util/SpectralAveragingParameters.cs index 19f2b8be0..a18839461 100644 --- a/mzLib/SpectralAveraging/Util/SpectralAveragingParameters.cs +++ b/mzLib/SpectralAveraging/Util/SpectralAveragingParameters.cs @@ -37,7 +37,7 @@ public void SetValues(OutlierRejectionType outlierRejectionType = OutlierRejecti SpectralAveragingType spectralAveragingType = SpectralAveragingType.MzBinning, NormalizationType normalizationType = NormalizationType.RelativeToTics, SpectraFileAveragingType specAveragingType = SpectraFileAveragingType.AverageAll, - OutputType outputType = OutputType.MzML, int numToAverage = 5, int overlap = 2, + OutputType outputType = OutputType.MzML, int numToAverage = 5, int overlap = 4, double percentile = 0.1, double minSigma = 1.5, double maxSigma = 1.5, double binSize = 0.01, int maxThreads = 1) { @@ -67,7 +67,7 @@ public void SetDefaultValues() SpectraFileAveragingType = SpectraFileAveragingType.AverageAll; NormalizationType = NormalizationType.RelativeToTics; OutputType = OutputType.MzML; - ScanOverlap = 2; + ScanOverlap = 4; NumberOfScansToAverage = 5; Percentile = 0.1; MinSigmaValue = 1.5; diff --git a/mzLib/Test/AveragingTests/TestAveraging.cs b/mzLib/Test/AveragingTests/TestAveraging.cs index 9995af349..47eb8a729 100644 --- a/mzLib/Test/AveragingTests/TestAveraging.cs +++ b/mzLib/Test/AveragingTests/TestAveraging.cs @@ -40,7 +40,7 @@ public static List DummyMzCopy public static void OneTimeSetup() { ActualScans = MsDataFileReader.GetDataFile(Path.Combine(TestContext.CurrentContext.TestDirectory, - @"AveragingTestData\TDYeastFractionMS1.mzML")).GetAllScansList(); + @"AveragingTests\TestData\TDYeastFractionMS1.mzML")).GetAllScansList(); double[] xArray = new double[] { 100.1453781, 200, 300, 400, 500, 600, 700, 800, 900.4123745 }; double[] yArray1 = new double[] { 0, 5, 0, 0, 0, 0, 0, 10, 0, 0 }; double[] yArray2 = new double[] { 0, 5, 0, 0, 0, 0, 0, 10, 0, 0 }; diff --git a/mzLib/Test/AveragingTests/TestAveragingExtensions.cs b/mzLib/Test/AveragingTests/TestAveragingExtensions.cs index 82419db7e..6b9c27da4 100644 --- a/mzLib/Test/AveragingTests/TestAveragingExtensions.cs +++ b/mzLib/Test/AveragingTests/TestAveragingExtensions.cs @@ -37,7 +37,7 @@ public static void OneTimeSetup() { ActualScans = MsDataFileReader.GetDataFile(Path.Combine(TestContext.CurrentContext.TestDirectory, - @"AveragingTestData\TDYeastFractionMS1.mzML")).GetAllScansList().Take(25).ToList(); + @"AveragingTests\TestData\TDYeastFractionMS1.mzML")).GetAllScansList().Take(25).ToList(); double[] xArray = new double[] { 100.1453781, 200, 300, 400, 500, 600, 700, 800, 900.4123745 }; double[] yArray1 = new double[] { 0, 5, 0, 0, 0, 0, 0, 10, 0, 0 }; double[] yArray2 = new double[] { 0, 5, 0, 0, 0, 0, 0, 10, 0, 0 }; diff --git a/mzLib/Test/AveragingTests/TestAveragingSpectraWriteFile.cs b/mzLib/Test/AveragingTests/TestAveragingSpectraWriteFile.cs index 511eeb35c..d4ab463e4 100644 --- a/mzLib/Test/AveragingTests/TestAveragingSpectraWriteFile.cs +++ b/mzLib/Test/AveragingTests/TestAveragingSpectraWriteFile.cs @@ -25,7 +25,7 @@ public class TestAveragingSpectraWriteFile public static void OneTimeSetup() { Parameters = new SpectralAveragingParameters(); - OutputDirectory = Path.Combine(TestContext.CurrentContext.TestDirectory, @"AveragingTestData"); + OutputDirectory = Path.Combine(TestContext.CurrentContext.TestDirectory, "AveragingTests", "TestData"); SpectraPath = Path.Combine(OutputDirectory, "TDYeastFractionMS1.mzML"); Scans = MsDataFileReader.GetDataFile(SpectraPath).GetAllScansList().Take(50).ToList(); diff --git a/mzLib/Test/AveragingTestData/TDYeastFractionMMResult.psmtsv b/mzLib/Test/AveragingTests/TestData/TDYeastFractionMMResult.psmtsv similarity index 100% rename from mzLib/Test/AveragingTestData/TDYeastFractionMMResult.psmtsv rename to mzLib/Test/AveragingTests/TestData/TDYeastFractionMMResult.psmtsv diff --git a/mzLib/Test/AveragingTestData/TDYeastFractionMS1.mzML b/mzLib/Test/AveragingTests/TestData/TDYeastFractionMS1.mzML similarity index 100% rename from mzLib/Test/AveragingTestData/TDYeastFractionMS1.mzML rename to mzLib/Test/AveragingTests/TestData/TDYeastFractionMS1.mzML diff --git a/mzLib/Test/AveragingTests/TestSpectraFileAveraging.cs b/mzLib/Test/AveragingTests/TestSpectraFileAveraging.cs index f46db019d..771c35ced 100644 --- a/mzLib/Test/AveragingTests/TestSpectraFileAveraging.cs +++ b/mzLib/Test/AveragingTests/TestSpectraFileAveraging.cs @@ -244,7 +244,7 @@ public static List DummyDDAScansOutOfOrder #endregion public static List ActualScans => MsDataFileReader.GetDataFile(Path.Combine(TestContext.CurrentContext.TestDirectory, - @"AveragingTestData\TDYeastFractionMS1.mzML")).GetAllScansList().Take(50).ToList(); + @"AveragingTests\TestData\TDYeastFractionMS1.mzML")).GetAllScansList().Take(50).ToList(); public static string NativeId; diff --git a/mzLib/Test/Test.csproj b/mzLib/Test/Test.csproj index 13f09b29f..d87c4245b 100644 --- a/mzLib/Test/Test.csproj +++ b/mzLib/Test/Test.csproj @@ -432,10 +432,10 @@ Always - + Always - + Always