Skip to content

Commit

Permalink
Update HarmonicParameters.cs
Browse files Browse the repository at this point in the history
Issue #471 Shifted method DetectHarmonicsInSpectrogramData() to a location where it better belongs.
  • Loading branch information
towsey committed Jun 13, 2021
1 parent d873b10 commit f3bc0f8
Showing 1 changed file with 103 additions and 1 deletion.
104 changes: 103 additions & 1 deletion src/AudioAnalysisTools/Harmonics/HarmonicParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ namespace AnalysisPrograms.Recognizers.Base
using System.Linq;
using Acoustics.Shared;
using AudioAnalysisTools;
using AudioAnalysisTools.DSP;
using AudioAnalysisTools.Events;
using AudioAnalysisTools.Events.Types;
using AudioAnalysisTools.StandardSpectrograms;
Expand Down Expand Up @@ -95,7 +96,7 @@ public static (List<EventCommon> SpectralEvents, double[] AmplitudeArray, double

//ii: DETECT HARMONICS
// now look for harmonics in search band using the Xcorrelation-DCT method.
var results = CrossCorrelation.DetectHarmonicsInSpectrogramData(subMatrix, decibelThreshold);
var results = DetectHarmonicsInSpectrogramData(subMatrix, decibelThreshold);

// set up score arrays
double[] dBArray = results.Item1; // this is not used currently.
Expand Down Expand Up @@ -160,6 +161,107 @@ public static (List<EventCommon> SpectralEvents, double[] AmplitudeArray, double
return (harmonicEvents, dBArray, harmonicIntensityScores2);
}

/// <summary>
/// A METHOD TO DETECT a set of stacked HARMONICS/FORMANTS in the sub-band of a spectrogram.
/// Developed for GenericRecognizer of harmonics.
/// NOTE 1: This method assumes the matrix is derived from a spectrogram rotated so that the matrix rows are spectral columns of the spectrogram.
/// NOTE 2: As of March 2020, this method averages the values in five adjacent frames. This is to reduce noise.
/// But it requires that the frequency of any potential formants is not changing rapidly.
/// A side-effect is that the edges of harmonic events become blurred.
/// This may not be suitable for detecting human speech. However can reduce the frame step.
/// NOTE 3: This method assumes that the minimum number of formants in a stack = 3.
/// This means that the first 4 values in the returned array of DCT coefficients are set = 0 (see below).
/// </summary>
/// <param name="m">data matrix derived from the subband of a spectrogram.</param>
/// <param name="xThreshold">Minimum acceptable value to be considered part of a harmonic.</param>
/// <returns>three arrays: dBArray, intensity, maxIndexArray.</returns>
public static Tuple<double[], double[], int[]> DetectHarmonicsInSpectrogramData(double[,] m, double xThreshold)
{
int rowCount = m.GetLength(0);
int colCount = m.GetLength(1);
var binCount = m.GetLength(1);

//set up the cosine coefficients
double[,] cosines = MFCCStuff.Cosines(binCount, binCount);

// set up time-frame arrays to store decibels, formant intensity and max index.
var dBArray = new double[rowCount];
var intensity = new double[rowCount];
var maxIndexArray = new int[rowCount];

// for all time frames
for (int t = 2; t < rowCount - 2; t++)
{
// Smooth the frame values by taking the average of five adjacent frames
var frame1 = MatrixTools.GetRow(m, t - 2);
var frame2 = MatrixTools.GetRow(m, t - 1);
var frame3 = MatrixTools.GetRow(m, t);
var frame4 = MatrixTools.GetRow(m, t + 1);
var frame5 = MatrixTools.GetRow(m, t + 2);

// set up a frame of average db values.
var avFrame = new double[colCount];
for (int i = 0; i < colCount; i++)
{
//avFrame[i] = (frame2[i] + frame3[i] + frame4[i]) / 3;
avFrame[i] = (frame1[i] + frame2[i] + frame3[i] + frame4[i] + frame5[i]) / 5;
}

// ignore frame if its maximum decibel value is below the threshold.
double maxValue = avFrame.Max();
dBArray[t] = maxValue;
if (maxValue < xThreshold)
{
continue;
}

// do the autocross-correlation prior to doing the DCT.
double[] xr = AutoAndCrossCorrelation.AutoCrossCorr(avFrame);

// xr has twice length of frame and is symmetrical. Require only first half.
double[] normXr = new double[colCount];
for (int i = 0; i < colCount; i++)
{
// Typically normalise the xcorr values for overlap count.
// i.e. normXr[i] = xr[i] / (colCount - i);
// But for harmonics, this introduces too much noise - need to give less weight to the less overlapped values.
// Therefore just normalise by dividing values by the first, so first value = 1.
normXr[i] = xr[i] / xr[0];
}

//normXr = DataTools.DiffFromMean(normXr);

// fit the x-correlation array to a line to remove first order trend.
// This will help in detecting the correct maximum DCT coefficient.
var xValues = new double[normXr.Length];
for (int j = 0; j < xValues.Length; j++)
{
xValues[j] = j;
}

// get the line of best fit and subtract to get deviation from the line.
Tuple<double, double> values = MathNet.Numerics.Fit.Line(xValues, normXr);
var intercept = values.Item1;
var slope = values.Item2;
for (int j = 0; j < xValues.Length; j++)
{
var lineValue = (slope * j) + intercept;
normXr[j] -= lineValue;
}

// now do DCT across the detrended auto-cross-correlation
// set the first four values in the returned DCT coefficients to 0.
// We require a minimum of three formants, that is two gaps.
int lowerDctBound = 4;
var dctCoefficients = Oscillations2012.DoDct(normXr, cosines, lowerDctBound);
int indexOfMaxValue = DataTools.GetMaxIndex(dctCoefficients);
intensity[t] = dctCoefficients[indexOfMaxValue];
maxIndexArray[t] = indexOfMaxValue;
}

return Tuple.Create(dBArray, intensity, maxIndexArray);
}

/// <summary>
/// Finds harmonic events in an array harmonic scores.
/// NOTE: The score array is assumed to be temporal i.e. each element of the array is derived from a time frame.
Expand Down

0 comments on commit f3bc0f8

Please sign in to comment.