added downsampling step to feature extraction and feature learning pr…

…ocess
QutEcoacoustics · Oct 31, 2018 · e9be1d4 · e9be1d4
1 parent 7c915e6
commit e9be1d4
Show file tree

Hide file tree

Showing 5 changed files with 108 additions and 59 deletions.
diff --git a/src/AnalysisConfigFiles/FeatureLearningConfig.yml b/src/AnalysisConfigFiles/FeatureLearningConfig.yml
@@ -18,7 +18,6 @@ FrequencyScaleType: Mel
 FrameSize: 1024
 FinalBinCount: 128
 
-
 # The default values for minFreqBin and maxFreqBin are 1 and FinalBinCount
 # For any other arbitrary frequency bin bounds, these two parameters need to be manually set.
 MinFreqBin: 24
@@ -44,9 +43,11 @@ NumRandomPatches: 1000
 # the number of clusters to be generated from the selected patch set
 NumClusters: 256
 
-
 # Applying noise reduction and whitening if these options are set to 'true'
 DoNoiseReduction: true
 DoWhitening: true
+
+# The factor of data downsampling using max pooling
+MaxPoolingFactor: 6
 ...
 
diff --git a/src/AnalysisPrograms/MahnooshSandpit.cs b/src/AnalysisPrograms/MahnooshSandpit.cs
@@ -10,19 +10,15 @@ namespace AnalysisPrograms
     using System.Drawing.Imaging;
     using System.IO;
     using System.Linq;
-    using System.Text;
     using System.Threading.Tasks;
-    using Accord.MachineLearning;
     using Accord.Math;
-    using Accord.Statistics;
     using Acoustics.Shared;
     using Acoustics.Shared.ConfigFile;
     using Acoustics.Shared.Csv;
     using AudioAnalysisTools.DSP;
     using AudioAnalysisTools.StandardSpectrograms;
     using AudioAnalysisTools.WavTools;
     using McMaster.Extensions.CommandLineUtils;
-    using NeuralNets;
     using Production.Arguments;
     using TowseyLibrary;
 
@@ -34,17 +30,17 @@ public void Execute(Arguments arguments)
         {
             LoggedConsole.WriteLine("feature learning process...");
 
-            var inputDir = @"D:\Mahnoosh\Liz\"; //@"C:\Users\kholghim\Mahnoosh\Liz\"; // @"C:\Users\kholghim\Mahnoosh\UnsupervisedFeatureLearning\"; // @"M:\Postdoc\Liz\"; //
+            var inputDir = @"M:\Postdoc\Liz\"; //@"C:\Users\kholghim\Mahnoosh\Liz\"; //@"D:\Mahnoosh\Liz\"; // @"C:\Users\kholghim\Mahnoosh\UnsupervisedFeatureLearning\"; // 
             var resultDir = Path.Combine(inputDir, "FeatureLearning");
             var inputPath = Path.Combine(inputDir, "TrainSet"); //PatchSamplingSegments //PatchSampling
             var trainSetPath = Path.Combine(inputDir, "TrainSet");
-            var testSetPath = Path.Combine(inputDir, "TestSet");
-            var configPath = @"D:\Mahnoosh\Liz\AnalysisConfigFiles\FeatureLearningConfig.yml"; //@"C:\Work\GitHub\audio-analysis\src\AnalysisConfigFiles\FeatureLearningConfig.yml"; //
-            var outputMelImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.png");
-            var outputNormMelImagePath = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png");
-            var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png");
-            var outputReSpecImagePath = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png");
-            var outputClusterImagePath = Path.Combine(resultDir, "Clusters.bmp");
+            // var testSetPath = Path.Combine(inputDir, "TestSet");
+            var configPath = @"C:\Users\kholghim\Mahnoosh\Liz\FeatureLearningConfig.yml"; //@"D:\Mahnoosh\Liz\AnalysisConfigFiles\FeatureLearningConfig.yml"; //@"C:\Work\GitHub\audio-analysis\src\AnalysisConfigFiles\FeatureLearningConfig.yml"; //
+            // var outputMelImagePath = Path.Combine(resultDir, "MelScaleSpectrogram.png");
+            // var outputNormMelImagePath = Path.Combine(resultDir, "NormalizedMelScaleSpectrogram.png");
+            // var outputNoiseReducedMelImagePath = Path.Combine(resultDir, "NoiseReducedMelSpectrogram.png");
+            // var outputReSpecImagePath = Path.Combine(resultDir, "ReconstrcutedSpectrogram.png");
+            // var outputClusterImagePath = Path.Combine(resultDir, "Clusters.bmp");
 
             // +++++++++++++++++++++++++++++++++++++++++++++++++patch sampling from 1-min recordings
 
@@ -946,11 +942,11 @@ public override Task<int> Execute(CommandLineApplication app)
         public static void ExtractClusteringFeatures()
         {
             LoggedConsole.WriteLine("feature extraction process...");
-            var inputDir = @"D:\Mahnoosh\Liz\"; //@"C:\Users\kholghim\Mahnoosh\UnsupervisedFeatureLearning\"; //@"M:\Postdoc\Liz\"; //
+            var inputDir = @"M:\Postdoc\Liz\"; //@"D:\Mahnoosh\Liz\"; //@"C:\Users\kholghim\Mahnoosh\UnsupervisedFeatureLearning\"; //@"M:\Postdoc\Liz\"; //
             var resultDir = Path.Combine(inputDir, "FeatureLearning");
             //var trainSetPath = Path.Combine(inputDir, "TrainSet");
             var testSetPath = Path.Combine(inputDir, "TestSet");
-            var configPath = @"D:\Mahnoosh\Liz\AnalysisConfigFiles\FeatureLearningConfig.yml"; //@"C:\Work\GitHub\audio-analysis\src\AnalysisConfigFiles\FeatureLearningConfig.yml"; //  
+            var configPath = @"C:\Users\kholghim\Mahnoosh\Liz\FeatureLearningConfig.yml"; //@"D:\Mahnoosh\Liz\AnalysisConfigFiles\FeatureLearningConfig.yml"; //@"C:\Work\GitHub\audio-analysis\src\AnalysisConfigFiles\FeatureLearningConfig.yml"; // 
             var centroidsPath = Path.Combine(resultDir, "ClusterCentroids0.csv");
 
             var configFile = configPath.ToFileInfo();
@@ -985,7 +981,7 @@ public static void GenerateSpectrograms()
 
             int frameSize = 1024;
             int finalBinCount = 256;
-            int hertzInterval = 1000;
+            // int hertzInterval = 1000;
             FreqScaleType scaleType = FreqScaleType.Mel;
             var settings = new SpectrogramSettings()
             {

diff --git a/src/AudioAnalysisTools/DSP/FeatureExtraction.cs b/src/AudioAnalysisTools/DSP/FeatureExtraction.cs
@@ -61,9 +61,12 @@ public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config,
             // the number of frames that their feature vectors will be concatenated in order to preserve temporal information.
             int frameWindowLength = config.FrameWindowLength;
 
-            // he step size to make a window of frames
+            // the step size to make a window of frames
             int stepSize = config.StepSize;
 
+            // the factor of downsampling
+            int maxPoolingFactor = config.MaxPoolingFactor;
+
             // check whether there is any file in the folder/subfolders
             if (Directory.GetFiles(inputPath, "*", SearchOption.AllDirectories).Length == 0)
             {
@@ -88,7 +91,7 @@ public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config,
                 // process the wav file if it is not empty
                 if (fileInfo.Length != 0)
                 {
-                    string pathToSimilrityVectorsFile = Path.Combine(simVecDir.FullName, fileInfo.Name + ".csv");
+                    string pathToSimilarityVectorsFile = Path.Combine(simVecDir.FullName, fileInfo.Name + ".csv");
                     var recording = new AudioRecording(filePath);
                     settings.SourceFileName = recording.BaseName;
 
@@ -124,9 +127,13 @@ public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config,
                     List<double[,]> allSequentialPatchMatrix = new List<double[,]>();
                     for (int i = 0; i < matrices2.GetLength(0); i++)
                     {
-                        int rows = matrices2[i].GetLength(0);
-                        int columns = matrices2[i].GetLength(1);
-                        var sequentialPatches = PatchSampling.GetPatches(matrices2[i], patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
+                        // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
+                        double[,] downsampledMatrix = FeatureLearning.MaxPooling(matrices2[i], config.MaxPoolingFactor);
+
+                        int rows = downsampledMatrix.GetLength(0); // matrices2[i].GetLength(0);
+                        int columns = downsampledMatrix.GetLength(1); // matrices2[i].GetLength(1);
+                        var sequentialPatches = PatchSampling.GetPatches(downsampledMatrix, patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
+                        //var sequentialPatches = PatchSampling.GetPatches(matrices2[i], patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth), PatchSampling.SamplingMethod.Sequential);
                         allSequentialPatchMatrix.Add(sequentialPatches.ToMatrix());
                     }
 
@@ -186,16 +193,16 @@ public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config,
                             similarityVectors[j] = allNormCentroids.ToArray()[i].ToMatrix().Dot(normVector);
                         }
 
-                        Csv.WriteMatrixToCsv(pathToSimilrityVectorsFile.ToFileInfo(), similarityVectors.ToMatrix());
+                        Csv.WriteMatrixToCsv(pathToSimilarityVectorsFile.ToFileInfo(), similarityVectors.ToMatrix());
 
-                        // To preserve the temporal information, we can concatenate the similarity vectors of a group of frames using
-                        // FrameWindowLength
+                        // To preserve the temporal information, we can concatenate the similarity vectors of a group of frames
+                        // using FrameWindowLength
 
                         // patchId refers to the patch id that has been processed so far according to the step size.
                         // if we want no overlap between different frame windows, then stepSize = frameWindowLength
                         int patchId = 0;
 
-                        // patchCounter refers to the number of patches that has been processed so far accroding to FrameWindowLength.
+                        // patchCounter refers to the number of patches that has been processed so far according to FrameWindowLength.
                         //int patchCounter = 0;
 
                         while (patchId + frameWindowLength < similarityVectors.GetLength(0))
@@ -229,8 +236,12 @@ public static void UnsupervisedFeatureExtraction(FeatureLearningSettings config,
                     List<double[,]> allStdFeatureVectors = new List<double[,]>();
                     List<double[,]> allSkewnessFeatureVectors = new List<double[,]>();
 
-                    // number of frames needs to be concatenated to form 1 second. Each 24 frames make 1 second.
-                    int numFrames = (24 / patchHeight) * 60; //24 // patchHeight; //
+                    // number of frames needs to be concatenated to form 1 minute in the case of Black Rail.
+                    // Each 24 frames form 1 second.
+                    // factors such as stepSize, and maxPoolingFactor should be considered in temporal summarization.
+                    //int numFrames = (24 / patchHeight) * 60; //24 // patchHeight;
+
+                    int numFrames = (24 * 60) / (patchHeight * stepSize * maxPoolingFactor);
 
                     foreach (var freqBandFeature in allFeatureTransVectors)
                     {

diff --git a/src/AudioAnalysisTools/DSP/FeatureLearning.cs b/src/AudioAnalysisTools/DSP/FeatureLearning.cs
@@ -9,6 +9,7 @@ namespace AudioAnalysisTools.DSP
     using System.IO;
     using Accord.Math;
     using StandardSpectrograms;
+    using TowseyLibrary;
     using WavTools;
 
     /// <summary>
@@ -117,22 +118,28 @@ public static class FeatureLearning
 
                     while (count < allSubmatrices.Count)
                     {
+                        // downsampling the input matrix by a factor of n (MaxPoolingFactor) using max pooling
+                        double[,] downsampledMatrix = MaxPooling(allSubmatrices.ToArray()[count], config.MaxPoolingFactor);
+
                         randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
-                            .GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, numRandomPatches,
+                            .GetPatches(downsampledMatrix, patchWidth, patchHeight, numRandomPatches,
+                                PatchSampling.SamplingMethod.Random).ToMatrix());
+
+                        /*
+                        randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling.
+                            GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, numRandomPatches, 
                                 PatchSampling.SamplingMethod.Random).ToMatrix());
-                         /*
-
-                        //  take the total number of frames out of each second minute paper
-                        if (no / 2 == 0)
-                        {
-                            int rows = allSubmatrices.ToArray()[count].GetLength(0);
-                            int columns = allSubmatrices.ToArray()[count].GetLength(1);
-                            randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
-                                .GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth),
-                                    PatchSampling.SamplingMethod.Sequential).ToMatrix());
-                            no++;
-                        }
-                        */
+                       //  take the total number of frames out of each second minute paper
+                       if (no / 2 == 0)
+                       {
+                           int rows = allSubmatrices.ToArray()[count].GetLength(0);
+                           int columns = allSubmatrices.ToArray()[count].GetLength(1);
+                           randomPatchLists[$"randomPatch{count.ToString()}"].Add(PatchSampling
+                               .GetPatches(allSubmatrices.ToArray()[count], patchWidth, patchHeight, (rows / patchHeight) * (columns / patchWidth),
+                                   PatchSampling.SamplingMethod.Sequential).ToMatrix());
+                           no++;
+                       }
+                       */
                         count++;
                     }
                 }
@@ -163,6 +170,34 @@ public static class FeatureLearning
             return allClusteringOutput;
         }
 
+        /// <summary>
+        /// This method downsamples the input matrix (x,y) by a factor of n on the temporal scale (x) using max pooling
+        /// </summary>
+        public static double[,] MaxPooling(double[,] matrix, int factor)
+        {
+            int count = 0;
+            List<double[]> downsampledMatrix = new List<double[]>();
+            while (count + factor <= matrix.GetLength(0))
+            {
+                List<double> maxValues = new List<double>();
+                for (int j = 0; j < matrix.GetLength(1); j++)
+                {
+                    List<double> temp = new List<double>();
+                    for (int i = count; i < count + factor; i++)
+                    {
+                        temp.Add(matrix[i, j]);
+                    }
+
+                    maxValues.Add(temp.ToArray().GetMaxValue());
+                }
+
+                downsampledMatrix.Add(maxValues.ToArray());
+                count = count + factor;
+            }
+
+            return downsampledMatrix.ToArray().ToMatrix();
+        }
+
         /// <summary>
         /// This method is called supervised feature learning because the frames to form a cluster
         /// have been manually selected from 1-min recordings.

diff --git a/src/AudioAnalysisTools/DSP/FeatureLearningSettings.cs b/src/AudioAnalysisTools/DSP/FeatureLearningSettings.cs
@@ -38,55 +38,61 @@ public class FeatureLearningSettings : Config
 
         public const bool DefaultDoWhitening = true;
 
+        public const int DefaultMaxPoolingFactor = 1;
+
         /// <summary>
         /// Initializes a new instance of the <see cref="FeatureLearningSettings"/> class.
         /// CONSTRUCTOR
         /// </summary>
+
+        /*
         public FeatureLearningSettings()
         {
             this.FrequencyScaleType = DefaultFrequencyScaleType;
-            //this.HertzInterval = DefaultHertzInterval;
             this.FrameSize = DefaultFrameSize;
             this.FinalBinCount = DefaultFinalBinCount;
             this.MinFreqBin = DefaultMinFreqBin;
             this.MaxFreqBin = DefaultMaxFreqBin;
             this.NumFreqBand = DefaultNumFreqBand;
-            //this.PatchWidth = DefaultPatchWidth;
             this.PatchHeight = DefaultPatchHeight;
             this.NumRandomPatches = DefaultNumRandomPatches;
             this.NumClusters = DefaultNumClusters;
             this.DoNoiseReduction = DefaultDoNoiseReduction;
             this.DoWhitening = DefaultDoWhitening;
+            this.MaxPoolingFactor = DefaultMaxPoolingFactor;
         }
+        */
+
+        public FreqScaleType FrequencyScaleType { get; set; } = DefaultFrequencyScaleType;
 
-        public FreqScaleType FrequencyScaleType { get; set; }
+        //public int HertzInterval { get; set; } = DefaultHertzInterval;
 
-        //public int HertzInterval { get; set; }
+        public int FrameSize { get; set; } = DefaultFrameSize;
 
-        public int FrameSize { get; set; }
+        public int FinalBinCount { get; set; } = DefaultFinalBinCount;
 
-        public int FinalBinCount { get; set; }
+        public int MinFreqBin { get; set; } = DefaultMinFreqBin;
 
-        public int MinFreqBin { get; set; }
+        public int MaxFreqBin { get; set; } = DefaultMaxFreqBin;
 
-        public int MaxFreqBin { get; set; }
+        public int NumFreqBand { get; set; } = DefaultNumFreqBand;
 
-        public int NumFreqBand { get; set; }
+        //public int PatchWidth { get; set; } = DefaultPatchWidth;
 
-        //public int PatchWidth { get; set; }
+        public int PatchHeight { get; set; } = DefaultPatchHeight;
 
-        public int PatchHeight { get; set; }
+        public int FrameWindowLength { get; set; } = DefaultFrameWindowLength;
 
-        public int FrameWindowLength { get; set; }
+        public int StepSize { get; set; } = DefaultStepSize;
 
-        public int StepSize { get; set; }
+        public int NumRandomPatches { get; set; } = DefaultNumRandomPatches;
 
-        public int NumRandomPatches { get; set; }
+        public int NumClusters { get; set; } = DefaultNumClusters;
 
-        public int NumClusters { get; set; }
+        public bool DoNoiseReduction { get; set; } = DefaultDoNoiseReduction;
 
-        public bool DoNoiseReduction { get; set; }
+        public bool DoWhitening { get; set; } = DefaultDoWhitening;
 
-        public bool DoWhitening { get; set; }
+        public int MaxPoolingFactor { get; set; } = DefaultMaxPoolingFactor;
     }
 }