From ce4ee7149a3eea07671c50831c00180a2fc00790 Mon Sep 17 00:00:00 2001
From: Moritz Dahm <94791958+DerMoehre@users.noreply.github.com>
Date: Thu, 6 Oct 2022 19:45:30 +0200
Subject: [PATCH 1/3] Handle TSVs with header rows

Resolves #65 by checking the data type of the first time field. If it's not a float, we assume it's a header row and remove it from the returned list. Otherwise the function returns as previously.

Squashed commit of DerMoehre's PR #73

Co-authored-by: JoFrhwld <JoFrhwld@gmail.com>
Co-authored-by: Christian Brickhouse <chrisbrickhouse@users.noreply.github.com>
---
 fave/align/transcriptprocessor.py            |  8 ++-
 pyproject.toml                               |  4 +-
 tests/fave/align/test_transcriptprocessor.py | 76 ++++++++++++++++++++
 3 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/fave/align/transcriptprocessor.py b/fave/align/transcriptprocessor.py
index d9a2d95..34e0957 100644
--- a/fave/align/transcriptprocessor.py
+++ b/fave/align/transcriptprocessor.py
@@ -247,7 +247,13 @@ def read_transcription_file(self):
         """Reads file into memory"""
         with open(self.file) as f:
             lines = self.replace_smart_quotes(f.readlines())
-        self.lines = lines
+            self.lines = lines
+            try:
+                float(lines[0].split('\t')[2]) 
+            except ValueError:
+                # Log a warning about having detected a header row
+                self.logger.warning('Header row was detected')
+                del lines[0]
 
     # substitute any 'smart' quotes in the input file with the corresponding
     # ASCII equivalents (otherwise they will be excluded as out-of-
diff --git a/pyproject.toml b/pyproject.toml
index 901431f..4987318 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "fave"
-version = "2.0.2-dev"
+version = "2.0.3-dev"
 description = "Forced alignment and vowel extraction"
 authors = [
 	"Ingrid Rosenfelder",
@@ -35,3 +35,5 @@ build-backend = "poetry.masonry.api"
 [tool.poetry.scripts]
 fave-extract = "fave.extractFormants:main"
 fave-align = "fave.FAAValign:setup"
+extractFormants = "fave.extractFormants:main"
+FAAValign = "fave.FAAValign:setup"
diff --git a/tests/fave/align/test_transcriptprocessor.py b/tests/fave/align/test_transcriptprocessor.py
index 8854167..873d266 100644
--- a/tests/fave/align/test_transcriptprocessor.py
+++ b/tests/fave/align/test_transcriptprocessor.py
@@ -1,5 +1,27 @@
+import logging
 import pytest
 from fave.align import transcriptprocessor
+from fave import cmudictionary # We shouldn't be doing this...
+
+# Copied from ../test_cmudictionary.py
+#  which means this really should be made a fixture...
+KWARGS = {
+        'verbose': 1
+    }
+
+CMU_EXCERPT = """
+TEST  T EH1 S T 
+TEST'S  T EH1 S T S 
+TESTA  T EH1 S T AH0 
+TESTAMENT  T EH1 S T AH0 M AH0 N T 
+TESTAMENTARY  T EH2 S T AH0 M EH1 N T ER0 IY0 
+TESTED  T EH1 S T AH0 D 
+TESTER  T EH1 S T ER0 
+TESTERMAN  T EH1 S T ER0 M AH0 N 
+TESTERS  T EH1 S T ER0 Z 
+TESTERS  T EH1 S T AH0 Z 
+"""
+
 
 def test_replace_smart_quotes():
     def test_func( testcase ):
@@ -69,3 +91,57 @@ def provide_check_transcription_format_raises_value_error():
                                                # Skip 5 entries (not an error)
             [ 'a\tb\tc\td\te\tf', ValueError], # 6 entries
         ]
+
+def test_read_transcription_file(tmp_path):
+    tmp_directory = tmp_path / "transcripts"
+    tmp_directory.mkdir()
+    tmp_file = tmp_directory / "test_transcript.csv"
+    dict_file = tmp_directory / "cmu.dict"
+    dict_file.write_text(CMU_EXCERPT)
+    cmu_dict = cmudictionary.CMU_Dictionary(dict_file, **KWARGS)
+    for test_case in provide_value_error_file():
+        test_text = test_case[0]
+        flags = test_case[1]
+        expected = test_case[2]
+        tmp_file.write_text(test_text)
+        tp_obj = transcriptprocessor.TranscriptProcessor(
+                tmp_file,
+                cmu_dict,
+                **flags
+            )
+        tp_obj.read_transcription_file()
+
+        assert tp_obj.lines == expected
+
+def provide_value_error_file():
+    return [
+        [   # header row is detected and deleted
+            "Style\tSpeaker\tBeginning\tEnd\tDuration\nFoo\tBar\t0.0\t3.2\t3.2",
+            {
+                'prompt': "IDK what this is -CJB",
+                'check' : '',
+                'verbose': logging.DEBUG
+            },
+            ['Foo\tBar\t0.0\t3.2\t3.2']
+        ],
+        [   # test with one line 
+            "Foo\tBar\t0.0\t3.2\t3.2\nTest\t1.0\t4.5\t3.5",
+            {
+                'prompt': "IDK what this is -CJB",
+                'check' : '',
+                'verbose': logging.DEBUG
+            },
+            ['Foo\tBar\t0.0\t3.2\t3.2\n', 'Test\t1.0\t4.5\t3.5']
+        ],
+        [   # test with more lines 
+            "Foo\tBar\t0.0\t3.2\t3.2\nTest\t1.0\t4.5\t3.5\nTest\t1.0\t4.5\t3.5",
+            {
+                'prompt': "IDK what this is -CJB",
+                'check' : '',
+                'verbose': logging.DEBUG
+            },
+            ['Foo\tBar\t0.0\t3.2\t3.2\n', 'Test\t1.0\t4.5\t3.5\n', 'Test\t1.0\t4.5\t3.5']
+        ]
+
+    ]
+

From e873ea2570f8c3c0fdaf61fae7706a0832bb82d2 Mon Sep 17 00:00:00 2001
From: DerMoehre <moehre2507@gmail.com>
Date: Mon, 10 Oct 2022 06:33:08 +0200
Subject: [PATCH 2/3] Revert "added a test for extractFormants"

This reverts commit 5983ed6c41a46f1e5050ddae271a3ec570dbb812.
---
 tests/fave/extract/test_extractFormants.py | 50 ----------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 tests/fave/extract/test_extractFormants.py

diff --git a/tests/fave/extract/test_extractFormants.py b/tests/fave/extract/test_extractFormants.py
deleted file mode 100644
index 147875e..0000000
--- a/tests/fave/extract/test_extractFormants.py
+++ /dev/null
@@ -1,50 +0,0 @@
-
-import logging
-import pytest
-import numpy as np
-from fave import extractFormants
-
-def test_mean_stdv():
-    for test_case in provide_valuelist():
-        mean, stdv = extractFormants.mean_stdv(test_case[0])
-        
-        assert mean == test_case[1]
-        assert stdv == test_case[2]
-
-def provide_valuelist():
-    return [  
-        [
-            [1, 2, 3, 4],
-            np.mean([1, 2, 3, 4]),
-            np.std([1, 2, 3, 4], ddof=1)
-        ],
-        [
-            [3.5, 2.6, 11.6, 34.66, 2.8, 4.7],
-            np.mean([3.5, 2.6, 11.6, 34.66, 2.8, 4.7]),
-            np.std([3.5, 2.6, 11.6, 34.66, 2.8, 4.7], ddof=1)
-        ],
-        [
-            [],
-            None,
-            None
-        ],
-        [
-            [23, 34, 45, 56, 12, 312, 45, 943, 21, 1, 4, 6, 9, 2],
-            np.mean([23, 34, 45, 56, 12, 312, 45, 943, 21, 1, 4, 6, 9, 2]),
-            np.std([23, 34, 45, 56, 12, 312, 45, 943, 21, 1, 4, 6, 9, 2], ddof=1)
-        ],
-        [
-            [3],
-            np.mean([3]),
-            0
-        ],
-        [
-            [-1],
-            np.mean([-1]),
-            0
-        ]
-
-    ]
-
-    
-

From 5c62587684fc01f037ffbddaa1baf65dd049b15a Mon Sep 17 00:00:00 2001
From: DerMoehre <moehre2507@gmail.com>
Date: Mon, 10 Oct 2022 20:28:51 +0200
Subject: [PATCH 3/3] added shields badges for PyPI and DOI

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 264c053..04a639c 100644
--- a/README.md
+++ b/README.md
@@ -23,9 +23,11 @@ You can fill in a bug report at the [issue tab](https://github.com/JoFrhwld/FAVE
 There may be a delay between when a bug is reported and when a bug is resolved. Developers prioritize bugs based on difficulty, importance, and other factors, so bug reports are usually not handled in the order they are received. 
 
 ## Attribution
-[![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.22281.svg)](http://dx.doi.org/10.5281/zenodo.22281)
+[![DOI](https://zenodo.org/badge/13744621.svg)](https://zenodo.org/badge/latestdoi/13744621)
 ![GitHub](https://img.shields.io/github/license/JoFrhwld/FAVE)
 ![GitHub](https://img.shields.io/badge/Python-3.8%2B-brightgreen)
+[![PyPI version fury.io](https://badge.fury.io/py/fave.svg)](https://pypi.python.org/pypi/fave/)
+
 As of v1.1.3 onwards, releases from this repository will have a DOI associated with them through Zenodo. The DOI for the current release is [10.5281/zenodo.22281](http://dx.doi.org/10.5281/zenodo.22281). We would recommend the citation:
 
 Rosenfelder, Ingrid; Fruehwald, Josef; Brickhouse, Christian; Evanini, Keelan; Seyfarth, Scott; Gorman, Kyle; Prichard, Hilary; Yuan, Jiahong; 2022. FAVE (Forced Alignment and Vowel Extraction) Program Suite v2.0.0 */zenodo.*