@@ -703,7 +703,7 @@ def equalizer_biquad(waveform, sample_rate, center_freq, gain, Q=0.707):
703
703
Args:
704
704
waveform (torch.Tensor): audio waveform of dimension of `(channel, time)`
705
705
sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
706
- center_freq (float): filter’ s central frequency
706
+ center_freq (float): filter' s central frequency
707
707
gain (float): desired gain at the boost (or attenuation) in dB
708
708
q_factor (float): https://en.wikipedia.org/wiki/Q_factor
709
709
@@ -844,6 +844,175 @@ def compute_deltas(specgram, win_length=5, mode="replicate"):
844
844
845
845
846
846
@torch .jit .script
847
+ def gain (waveform , gain_db = 1.0 ):
848
+ # type: (Tensor, float) -> Tensor
849
+ r"""Apply amplification or attenuation to the whole waveform.
850
+
851
+ Args:
852
+ waveform (torch.Tensor): Tensor of audio of dimension (channel, time).
853
+ gain_db (float) Gain adjustment in decibels (dB) (Default: `1.0`).
854
+
855
+ Returns:
856
+ torch.Tensor: the whole waveform amplified by gain_db.
857
+ """
858
+ if (gain_db == 0 ):
859
+ return waveform
860
+
861
+ ratio = 10 ** (gain_db / 20 )
862
+
863
+ return waveform * ratio
864
+
865
+
866
+ @torch .jit .script
867
+ def scale_to_interval (waveform , interval = 1.0 ):
868
+ # type: (Tensor, float) -> Tensor
869
+ r"""Scales the whole waveform to an interval.
870
+
871
+ Args:
872
+ waveform (torch.Tensor): Tensor of audio of dimension (channel, time).
873
+ interval (float): The bounds of the interval, where the float indicates
874
+ the upper bound and the negative of the float indicates the lower
875
+ bound (Default: `1.0`).
876
+ Example: interval=1.0 -> [-1.0, 1.0]
877
+
878
+ Returns:
879
+ torch.Tensor: the whole waveform scaled to interval.
880
+ """
881
+ abs_max = torch .max (torch .abs (waveform ))
882
+ ratio = abs_max / interval
883
+ waveform /= ratio
884
+
885
+ return waveform
886
+
887
+
888
+ def _add_noise_shaping (dithered_waveform , waveform ):
889
+ r"""Noise shaping is calculated by error:
890
+ error[n] = dithered[n] - original[n]
891
+ noise_shaped_waveform[n] = dithered[n] + error[n-1]
892
+ """
893
+ wf_shape = waveform .size ()
894
+ waveform = waveform .reshape (- 1 , wf_shape [- 1 ])
895
+
896
+ dithered_shape = dithered_waveform .size ()
897
+ dithered_waveform = dithered_waveform .reshape (- 1 , dithered_shape [- 1 ])
898
+
899
+ error = dithered_waveform - waveform
900
+
901
+ # add error[n-1] to dithered_waveform[n], so offset the error by 1 index
902
+ for index in range (error .size ()[0 ]):
903
+ err = error [index ]
904
+ error_offset = torch .cat ((torch .zeros (1 ), err ))
905
+ error [index ] = error_offset [:waveform .size ()[1 ]]
906
+
907
+ noise_shaped = dithered_waveform + error
908
+ return noise_shaped .reshape (dithered_shape [:- 1 ] + noise_shaped .shape [- 1 :])
909
+
910
+
911
+ @torch .jit .script
912
+ def probability_distribution (waveform , density_function = "TPDF" ):
913
+ # type: (Tensor, str) -> Tensor
914
+ r"""Apply a probability distribution function on a waveform.
915
+
916
+ Triangular probability density function (TPDF) dither noise has a
917
+ triangular distribution; values in the center of the range have a higher
918
+ probability of occurring.
919
+
920
+ Rectangular probability density function (RPDF) dither noise has a
921
+ uniform distribution; any value in the specified range has the same
922
+ probability of occurring.
923
+
924
+ Gaussian probability density function (GPDF) has a normal distribution.
925
+ The relationship of probabilities of results follows a bell-shaped,
926
+ or Gaussian curve, typical of dither generated by analog sources.
927
+ Args:
928
+ waveform (torch.Tensor): Tensor of audio of dimension (channel, time)
929
+ probability_density_function (string): The density function of a
930
+ continuous random variable (Default: `TPDF`)
931
+ Options: Triangular Probability Density Function - `TPDF`
932
+ Rectangular Probability Density Function - `RPDF`
933
+ Gaussian Probability Density Function - `GPDF`
934
+ Returns:
935
+ torch.Tensor: waveform dithered with TPDF
936
+ """
937
+ shape = waveform .size ()
938
+ waveform = waveform .reshape (- 1 , shape [- 1 ])
939
+
940
+ channel_size = waveform .size ()[0 ] - 1
941
+ time_size = waveform .size ()[- 1 ] - 1
942
+
943
+ random_channel = int (torch .randint (channel_size , [1 , ]).item ()) if channel_size > 0 else 0
944
+ random_time = int (torch .randint (time_size , [1 , ]).item ()) if time_size > 0 else 0
945
+
946
+ number_of_bits = 16
947
+ up_scaling = 2 ** (number_of_bits - 1 ) - 2
948
+ signal_scaled = waveform * up_scaling
949
+ down_scaling = 2 ** (number_of_bits - 1 )
950
+
951
+ signal_scaled_dis = waveform
952
+ if (density_function == "RPDF" ):
953
+ RPDF = waveform [random_channel ][random_time ] - 0.5
954
+
955
+ signal_scaled_dis = signal_scaled + RPDF
956
+ elif (density_function == "GPDF" ):
957
+ # TODO Replace by distribution code once
958
+ # https://github.com/pytorch/pytorch/issues/29843 is resolved
959
+ # gaussian = torch.distributions.normal.Normal(torch.mean(waveform), 1).sample()
960
+
961
+ EPOCH = 6
962
+
963
+ gaussian = waveform [random_channel ][random_time ]
964
+ for ws in EPOCH * [time_size ]:
965
+ rand_chan = int (torch .randint (channel_size , [1 , ]).item ())
966
+ gaussian += waveform [rand_chan ][int (torch .randint (ws , [1 , ]).item ())]
967
+
968
+ signal_scaled_dis = signal_scaled + gaussian
969
+ else :
970
+ TPDF = torch .bartlett_window (time_size + 1 )
971
+
972
+ signal_scaled_dis = signal_scaled
973
+ for index in range (channel_size + 1 ):
974
+ signal_scaled_dis [index ] += TPDF
975
+
976
+ quantised_signal_scaled = torch .round (signal_scaled_dis )
977
+ quantised_signal = quantised_signal_scaled / down_scaling
978
+ return quantised_signal .reshape (shape [:- 1 ] + quantised_signal .shape [- 1 :])
979
+
980
+
981
+ @torch .jit .script
982
+ def dither (waveform , probability_density_function = "TPDF" , noise_shaping = False , ns_filter = "" ):
983
+ # type: (Tensor, str, bool, str) -> Tensor
984
+ r"""Dither increases the perceived dynamic range of audio stored at a
985
+ particular bit-depth by eliminating nonlinear truncation distortion
986
+ (i.e. adding minimally perceived noise to mask distortion caused by quantization).
987
+ Args:
988
+ waveform (torch.Tensor): Tensor of audio of dimension (channel, time)
989
+ probability_density_function (string): The density function of a
990
+ continuous random variable (Default: `TPDF`)
991
+ Options: Triangular Probability Density Function - `TPDF`
992
+ Rectangular Probability Density Function - `RPDF`
993
+ Gaussian Probability Density Function - `GPDF`
994
+ noise_shaping (boolean): a filtering process that shapes the spectral
995
+ energy of quantisation error (Default: `False`)
996
+ ns_filter (string): TODO The noise shaping filter (Default: `""`)
997
+ Options: Lipshitz - `L`
998
+ F-Weighted - `FW`
999
+ Modified-E-Weighted - `MEW`
1000
+ Improved-E-Weighted - `IEW`
1001
+ Gesemann - `G`
1002
+ Shibata - `S`
1003
+ Low-Shibata - `LS`
1004
+ High-Shibata - `HS`
1005
+ Returns:
1006
+ torch.Tensor: waveform dithered
1007
+ """
1008
+ dithered = probability_distribution (waveform , density_function = probability_density_function )
1009
+
1010
+ if noise_shaping :
1011
+ return _add_noise_shaping (dithered , waveform )
1012
+ else :
1013
+ return dithered
1014
+
1015
+
847
1016
def _compute_nccf (waveform , sample_rate , frame_time , freq_low ):
848
1017
# type: (Tensor, int, float, int) -> Tensor
849
1018
r"""
0 commit comments