Merge branch 'DerMoehre-feature/mean-np' into dev

chrisbrickhouse · Oct 10, 2022 · 185ce82 · 185ce82
2 parents 5e68946 + dc23ff4
commit 185ce82
Show file tree

Hide file tree

Showing 5 changed files with 145 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -24,6 +24,8 @@ There may be a delay between when a bug is reported and when a bug is resolved.
 
 ## Attribution
 [![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.22281.svg)](http://dx.doi.org/10.5281/zenodo.22281)
+![GitHub](https://img.shields.io/github/license/JoFrhwld/FAVE)
+![GitHub](https://img.shields.io/badge/Python-3.8%2B-brightgreen)
 As of v1.1.3 onwards, releases from this repository will have a DOI associated with them through Zenodo. The DOI for the current release is [10.5281/zenodo.22281](http://dx.doi.org/10.5281/zenodo.22281). We would recommend the citation:
 
 Rosenfelder, Ingrid; Fruehwald, Josef; Brickhouse, Christian; Evanini, Keelan; Seyfarth, Scott; Gorman, Kyle; Prichard, Hilary; Yuan, Jiahong; 2022. FAVE (Forced Alignment and Vowel Extraction) Program Suite v2.0.0 */zenodo.*

diff --git a/fave/align/transcriptprocessor.py b/fave/align/transcriptprocessor.py
@@ -247,7 +247,13 @@ def read_transcription_file(self):
         """Reads file into memory"""
         with open(self.file) as f:
             lines = self.replace_smart_quotes(f.readlines())
-        self.lines = lines
+            self.lines = lines
+            try:
+                float(lines[0].split('\t')[2]) 
+            except ValueError:
+                # Log a warning about having detected a header row
+                self.logger.warning('Header row was detected')
+                del lines[0]
 
     # substitute any 'smart' quotes in the input file with the corresponding
     # ASCII equivalents (otherwise they will be excluded as out-of-

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "fave"
-version = "2.0.2-dev"
+version = "2.0.3-dev"
 description = "Forced alignment and vowel extraction"
 authors = [
 	"Ingrid Rosenfelder",
@@ -35,3 +35,5 @@ build-backend = "poetry.masonry.api"
 [tool.poetry.scripts]
 fave-extract = "fave.extractFormants:main"
 fave-align = "fave.FAAValign:setup"
+extractFormants = "fave.extractFormants:main"
+FAAValign = "fave.FAAValign:setup"
diff --git a/tests/fave/align/test_transcriptprocessor.py b/tests/fave/align/test_transcriptprocessor.py
@@ -1,5 +1,27 @@
+import logging
 import pytest
 from fave.align import transcriptprocessor
+from fave import cmudictionary # We shouldn't be doing this...
+
+# Copied from ../test_cmudictionary.py
+#  which means this really should be made a fixture...
+KWARGS = {
+        'verbose': 1
+    }
+
+CMU_EXCERPT = """
+TEST  T EH1 S T 
+TEST'S  T EH1 S T S 
+TESTA  T EH1 S T AH0 
+TESTAMENT  T EH1 S T AH0 M AH0 N T 
+TESTAMENTARY  T EH2 S T AH0 M EH1 N T ER0 IY0 
+TESTED  T EH1 S T AH0 D 
+TESTER  T EH1 S T ER0 
+TESTERMAN  T EH1 S T ER0 M AH0 N 
+TESTERS  T EH1 S T ER0 Z 
+TESTERS  T EH1 S T AH0 Z 
+"""
+
 
 def test_replace_smart_quotes():
     def test_func( testcase ):
@@ -69,3 +91,57 @@ def provide_check_transcription_format_raises_value_error():
                                                # Skip 5 entries (not an error)
             [ 'a\tb\tc\td\te\tf', ValueError], # 6 entries
         ]
+
+def test_read_transcription_file(tmp_path):
+    tmp_directory = tmp_path / "transcripts"
+    tmp_directory.mkdir()
+    tmp_file = tmp_directory / "test_transcript.csv"
+    dict_file = tmp_directory / "cmu.dict"
+    dict_file.write_text(CMU_EXCERPT)
+    cmu_dict = cmudictionary.CMU_Dictionary(dict_file, **KWARGS)
+    for test_case in provide_value_error_file():
+        test_text = test_case[0]
+        flags = test_case[1]
+        expected = test_case[2]
+        tmp_file.write_text(test_text)
+        tp_obj = transcriptprocessor.TranscriptProcessor(
+                tmp_file,
+                cmu_dict,
+                **flags
+            )
+        tp_obj.read_transcription_file()
+
+        assert tp_obj.lines == expected
+
+def provide_value_error_file():
+    return [
+        [   # header row is detected and deleted
+            "Style\tSpeaker\tBeginning\tEnd\tDuration\nFoo\tBar\t0.0\t3.2\t3.2",
+            {
+                'prompt': "IDK what this is -CJB",
+                'check' : '',
+                'verbose': logging.DEBUG
+            },
+            ['Foo\tBar\t0.0\t3.2\t3.2']
+        ],
+        [   # test with one line 
+            "Foo\tBar\t0.0\t3.2\t3.2\nTest\t1.0\t4.5\t3.5",
+            {
+                'prompt': "IDK what this is -CJB",
+                'check' : '',
+                'verbose': logging.DEBUG
+            },
+            ['Foo\tBar\t0.0\t3.2\t3.2\n', 'Test\t1.0\t4.5\t3.5']
+        ],
+        [   # test with more lines 
+            "Foo\tBar\t0.0\t3.2\t3.2\nTest\t1.0\t4.5\t3.5\nTest\t1.0\t4.5\t3.5",
+            {
+                'prompt': "IDK what this is -CJB",
+                'check' : '',
+                'verbose': logging.DEBUG
+            },
+            ['Foo\tBar\t0.0\t3.2\t3.2\n', 'Test\t1.0\t4.5\t3.5\n', 'Test\t1.0\t4.5\t3.5']
+        ]
+
+    ]
+
diff --git a/tests/fave/extract/test_extractFormants.py b/tests/fave/extract/test_extractFormants.py
@@ -0,0 +1,57 @@
+
+import logging
+import pytest
+import numpy as np
+from fave import extractFormants
+
+def test_mean_stdv():
+    for test_case in provide_valuelist():
+        mean, stdv = extractFormants.mean_stdv(test_case[0])
+
+        assert mean == test_case[1]
+        assert stdv == test_case[2]
+
+def provide_valuelist():
+    return [  
+        [
+            [1, 2, 3, 4],
+            np.mean([1, 2, 3, 4]),
+            np.std([1, 2, 3, 4], ddof=1)
+        ],
+        [
+            [3.5, 2.6, 11.6, 34.66, 2.8, 4.7],
+            np.mean([3.5, 2.6, 11.6, 34.66, 2.8, 4.7]),
+            np.std([3.5, 2.6, 11.6, 34.66, 2.8, 4.7], ddof=1)
+        ],
+        [
+            [],
+            None,
+            None
+        ],
+        [
+            [23, 34, 45, 56, 12, 312, 45, 943, 21, 1, 4, 6, 9, 2],
+            np.mean([23, 34, 45, 56, 12, 312, 45, 943, 21, 1, 4, 6, 9, 2]),
+            np.std([23, 34, 45, 56, 12, 312, 45, 943, 21, 1, 4, 6, 9, 2], ddof=1)
+        ],
+        [
+            [3],
+            np.mean([3]),
+            0
+        ],
+        [
+            [-1],
+            np.mean([-1]),
+            0
+        ]
+        [
+            [3.5, 2.6, 11.6, None, 34.66, 2.8, 4.7],
+            np.nanmean(np.array([3.5, 2.6, 11.6, None, 34.66, 2.8, 4.7], 
+                                dtype=np.float64)),
+            np.nanstd(np.array([3.5, 2.6, 11.6, None, 34.66, 2.8, 4.7], 
+                               dtype=np.float64),
+                      ddof=1)
+        ]
+    ]
+
+
+