From ce4ee7149a3eea07671c50831c00180a2fc00790 Mon Sep 17 00:00:00 2001 From: Moritz Dahm <94791958+DerMoehre@users.noreply.github.com> Date: Thu, 6 Oct 2022 19:45:30 +0200 Subject: [PATCH 1/3] Handle TSVs with header rows Resolves #65 by checking the data type of the first time field. If it's not a float, we assume it's a header row and remove it from the returned list. Otherwise the function returns as previously. Squashed commit of DerMoehre's PR #73 Co-authored-by: JoFrhwld Co-authored-by: Christian Brickhouse --- fave/align/transcriptprocessor.py | 8 ++- pyproject.toml | 4 +- tests/fave/align/test_transcriptprocessor.py | 76 ++++++++++++++++++++ 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/fave/align/transcriptprocessor.py b/fave/align/transcriptprocessor.py index d9a2d95..34e0957 100644 --- a/fave/align/transcriptprocessor.py +++ b/fave/align/transcriptprocessor.py @@ -247,7 +247,13 @@ def read_transcription_file(self): """Reads file into memory""" with open(self.file) as f: lines = self.replace_smart_quotes(f.readlines()) - self.lines = lines + self.lines = lines + try: + float(lines[0].split('\t')[2]) + except ValueError: + # Log a warning about having detected a header row + self.logger.warning('Header row was detected') + del lines[0] # substitute any 'smart' quotes in the input file with the corresponding # ASCII equivalents (otherwise they will be excluded as out-of- diff --git a/pyproject.toml b/pyproject.toml index 901431f..4987318 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fave" -version = "2.0.2-dev" +version = "2.0.3-dev" description = "Forced alignment and vowel extraction" authors = [ "Ingrid Rosenfelder", @@ -35,3 +35,5 @@ build-backend = "poetry.masonry.api" [tool.poetry.scripts] fave-extract = "fave.extractFormants:main" fave-align = "fave.FAAValign:setup" +extractFormants = "fave.extractFormants:main" +FAAValign = "fave.FAAValign:setup" diff --git a/tests/fave/align/test_transcriptprocessor.py b/tests/fave/align/test_transcriptprocessor.py index 8854167..873d266 100644 --- a/tests/fave/align/test_transcriptprocessor.py +++ b/tests/fave/align/test_transcriptprocessor.py @@ -1,5 +1,27 @@ +import logging import pytest from fave.align import transcriptprocessor +from fave import cmudictionary # We shouldn't be doing this... + +# Copied from ../test_cmudictionary.py +# which means this really should be made a fixture... +KWARGS = { + 'verbose': 1 + } + +CMU_EXCERPT = """ +TEST T EH1 S T +TEST'S T EH1 S T S +TESTA T EH1 S T AH0 +TESTAMENT T EH1 S T AH0 M AH0 N T +TESTAMENTARY T EH2 S T AH0 M EH1 N T ER0 IY0 +TESTED T EH1 S T AH0 D +TESTER T EH1 S T ER0 +TESTERMAN T EH1 S T ER0 M AH0 N +TESTERS T EH1 S T ER0 Z +TESTERS T EH1 S T AH0 Z +""" + def test_replace_smart_quotes(): def test_func( testcase ): @@ -69,3 +91,57 @@ def provide_check_transcription_format_raises_value_error(): # Skip 5 entries (not an error) [ 'a\tb\tc\td\te\tf', ValueError], # 6 entries ] + +def test_read_transcription_file(tmp_path): + tmp_directory = tmp_path / "transcripts" + tmp_directory.mkdir() + tmp_file = tmp_directory / "test_transcript.csv" + dict_file = tmp_directory / "cmu.dict" + dict_file.write_text(CMU_EXCERPT) + cmu_dict = cmudictionary.CMU_Dictionary(dict_file, **KWARGS) + for test_case in provide_value_error_file(): + test_text = test_case[0] + flags = test_case[1] + expected = test_case[2] + tmp_file.write_text(test_text) + tp_obj = transcriptprocessor.TranscriptProcessor( + tmp_file, + cmu_dict, + **flags + ) + tp_obj.read_transcription_file() + + assert tp_obj.lines == expected + +def provide_value_error_file(): + return [ + [ # header row is detected and deleted + "Style\tSpeaker\tBeginning\tEnd\tDuration\nFoo\tBar\t0.0\t3.2\t3.2", + { + 'prompt': "IDK what this is -CJB", + 'check' : '', + 'verbose': logging.DEBUG + }, + ['Foo\tBar\t0.0\t3.2\t3.2'] + ], + [ # test with one line + "Foo\tBar\t0.0\t3.2\t3.2\nTest\t1.0\t4.5\t3.5", + { + 'prompt': "IDK what this is -CJB", + 'check' : '', + 'verbose': logging.DEBUG + }, + ['Foo\tBar\t0.0\t3.2\t3.2\n', 'Test\t1.0\t4.5\t3.5'] + ], + [ # test with more lines + "Foo\tBar\t0.0\t3.2\t3.2\nTest\t1.0\t4.5\t3.5\nTest\t1.0\t4.5\t3.5", + { + 'prompt': "IDK what this is -CJB", + 'check' : '', + 'verbose': logging.DEBUG + }, + ['Foo\tBar\t0.0\t3.2\t3.2\n', 'Test\t1.0\t4.5\t3.5\n', 'Test\t1.0\t4.5\t3.5'] + ] + + ] + From e873ea2570f8c3c0fdaf61fae7706a0832bb82d2 Mon Sep 17 00:00:00 2001 From: DerMoehre Date: Mon, 10 Oct 2022 06:33:08 +0200 Subject: [PATCH 2/3] Revert "added a test for extractFormants" This reverts commit 5983ed6c41a46f1e5050ddae271a3ec570dbb812. --- tests/fave/extract/test_extractFormants.py | 50 ---------------------- 1 file changed, 50 deletions(-) delete mode 100644 tests/fave/extract/test_extractFormants.py diff --git a/tests/fave/extract/test_extractFormants.py b/tests/fave/extract/test_extractFormants.py deleted file mode 100644 index 147875e..0000000 --- a/tests/fave/extract/test_extractFormants.py +++ /dev/null @@ -1,50 +0,0 @@ - -import logging -import pytest -import numpy as np -from fave import extractFormants - -def test_mean_stdv(): - for test_case in provide_valuelist(): - mean, stdv = extractFormants.mean_stdv(test_case[0]) - - assert mean == test_case[1] - assert stdv == test_case[2] - -def provide_valuelist(): - return [ - [ - [1, 2, 3, 4], - np.mean([1, 2, 3, 4]), - np.std([1, 2, 3, 4], ddof=1) - ], - [ - [3.5, 2.6, 11.6, 34.66, 2.8, 4.7], - np.mean([3.5, 2.6, 11.6, 34.66, 2.8, 4.7]), - np.std([3.5, 2.6, 11.6, 34.66, 2.8, 4.7], ddof=1) - ], - [ - [], - None, - None - ], - [ - [23, 34, 45, 56, 12, 312, 45, 943, 21, 1, 4, 6, 9, 2], - np.mean([23, 34, 45, 56, 12, 312, 45, 943, 21, 1, 4, 6, 9, 2]), - np.std([23, 34, 45, 56, 12, 312, 45, 943, 21, 1, 4, 6, 9, 2], ddof=1) - ], - [ - [3], - np.mean([3]), - 0 - ], - [ - [-1], - np.mean([-1]), - 0 - ] - - ] - - - From 5c62587684fc01f037ffbddaa1baf65dd049b15a Mon Sep 17 00:00:00 2001 From: DerMoehre Date: Mon, 10 Oct 2022 20:28:51 +0200 Subject: [PATCH 3/3] added shields badges for PyPI and DOI --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 264c053..04a639c 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,11 @@ You can fill in a bug report at the [issue tab](https://github.com/JoFrhwld/FAVE There may be a delay between when a bug is reported and when a bug is resolved. Developers prioritize bugs based on difficulty, importance, and other factors, so bug reports are usually not handled in the order they are received. ## Attribution -[![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.22281.svg)](http://dx.doi.org/10.5281/zenodo.22281) +[![DOI](https://zenodo.org/badge/13744621.svg)](https://zenodo.org/badge/latestdoi/13744621) ![GitHub](https://img.shields.io/github/license/JoFrhwld/FAVE) ![GitHub](https://img.shields.io/badge/Python-3.8%2B-brightgreen) +[![PyPI version fury.io](https://badge.fury.io/py/fave.svg)](https://pypi.python.org/pypi/fave/) + As of v1.1.3 onwards, releases from this repository will have a DOI associated with them through Zenodo. The DOI for the current release is [10.5281/zenodo.22281](http://dx.doi.org/10.5281/zenodo.22281). We would recommend the citation: Rosenfelder, Ingrid; Fruehwald, Josef; Brickhouse, Christian; Evanini, Keelan; Seyfarth, Scott; Gorman, Kyle; Prichard, Hilary; Yuan, Jiahong; 2022. FAVE (Forced Alignment and Vowel Extraction) Program Suite v2.0.0 */zenodo.*