Skip to content

Commit

Permalink
Merge branch 'DerMoehre-feature/mean-np' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
JoFrhwld committed Oct 10, 2022
2 parents 5e68946 + dc23ff4 commit 185ce82
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 2 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ There may be a delay between when a bug is reported and when a bug is resolved.

## Attribution
[![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.22281.svg)](http://dx.doi.org/10.5281/zenodo.22281)
![GitHub](https://img.shields.io/github/license/JoFrhwld/FAVE)
![GitHub](https://img.shields.io/badge/Python-3.8%2B-brightgreen)
As of v1.1.3 onwards, releases from this repository will have a DOI associated with them through Zenodo. The DOI for the current release is [10.5281/zenodo.22281](http://dx.doi.org/10.5281/zenodo.22281). We would recommend the citation:

Rosenfelder, Ingrid; Fruehwald, Josef; Brickhouse, Christian; Evanini, Keelan; Seyfarth, Scott; Gorman, Kyle; Prichard, Hilary; Yuan, Jiahong; 2022. FAVE (Forced Alignment and Vowel Extraction) Program Suite v2.0.0 */zenodo.*
Expand Down
8 changes: 7 additions & 1 deletion fave/align/transcriptprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,13 @@ def read_transcription_file(self):
"""Reads file into memory"""
with open(self.file) as f:
lines = self.replace_smart_quotes(f.readlines())
self.lines = lines
self.lines = lines
try:
float(lines[0].split('\t')[2])
except ValueError:
# Log a warning about having detected a header row
self.logger.warning('Header row was detected')
del lines[0]

# substitute any 'smart' quotes in the input file with the corresponding
# ASCII equivalents (otherwise they will be excluded as out-of-
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "fave"
version = "2.0.2-dev"
version = "2.0.3-dev"
description = "Forced alignment and vowel extraction"
authors = [
"Ingrid Rosenfelder",
Expand Down Expand Up @@ -35,3 +35,5 @@ build-backend = "poetry.masonry.api"
[tool.poetry.scripts]
fave-extract = "fave.extractFormants:main"
fave-align = "fave.FAAValign:setup"
extractFormants = "fave.extractFormants:main"
FAAValign = "fave.FAAValign:setup"
76 changes: 76 additions & 0 deletions tests/fave/align/test_transcriptprocessor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,27 @@
import logging
import pytest
from fave.align import transcriptprocessor
from fave import cmudictionary # We shouldn't be doing this...

# Copied from ../test_cmudictionary.py
# which means this really should be made a fixture...
KWARGS = {
'verbose': 1
}

CMU_EXCERPT = """
TEST T EH1 S T
TEST'S T EH1 S T S
TESTA T EH1 S T AH0
TESTAMENT T EH1 S T AH0 M AH0 N T
TESTAMENTARY T EH2 S T AH0 M EH1 N T ER0 IY0
TESTED T EH1 S T AH0 D
TESTER T EH1 S T ER0
TESTERMAN T EH1 S T ER0 M AH0 N
TESTERS T EH1 S T ER0 Z
TESTERS T EH1 S T AH0 Z
"""


def test_replace_smart_quotes():
def test_func( testcase ):
Expand Down Expand Up @@ -69,3 +91,57 @@ def provide_check_transcription_format_raises_value_error():
# Skip 5 entries (not an error)
[ 'a\tb\tc\td\te\tf', ValueError], # 6 entries
]

def test_read_transcription_file(tmp_path):
tmp_directory = tmp_path / "transcripts"
tmp_directory.mkdir()
tmp_file = tmp_directory / "test_transcript.csv"
dict_file = tmp_directory / "cmu.dict"
dict_file.write_text(CMU_EXCERPT)
cmu_dict = cmudictionary.CMU_Dictionary(dict_file, **KWARGS)
for test_case in provide_value_error_file():
test_text = test_case[0]
flags = test_case[1]
expected = test_case[2]
tmp_file.write_text(test_text)
tp_obj = transcriptprocessor.TranscriptProcessor(
tmp_file,
cmu_dict,
**flags
)
tp_obj.read_transcription_file()

assert tp_obj.lines == expected

def provide_value_error_file():
return [
[ # header row is detected and deleted
"Style\tSpeaker\tBeginning\tEnd\tDuration\nFoo\tBar\t0.0\t3.2\t3.2",
{
'prompt': "IDK what this is -CJB",
'check' : '',
'verbose': logging.DEBUG
},
['Foo\tBar\t0.0\t3.2\t3.2']
],
[ # test with one line
"Foo\tBar\t0.0\t3.2\t3.2\nTest\t1.0\t4.5\t3.5",
{
'prompt': "IDK what this is -CJB",
'check' : '',
'verbose': logging.DEBUG
},
['Foo\tBar\t0.0\t3.2\t3.2\n', 'Test\t1.0\t4.5\t3.5']
],
[ # test with more lines
"Foo\tBar\t0.0\t3.2\t3.2\nTest\t1.0\t4.5\t3.5\nTest\t1.0\t4.5\t3.5",
{
'prompt': "IDK what this is -CJB",
'check' : '',
'verbose': logging.DEBUG
},
['Foo\tBar\t0.0\t3.2\t3.2\n', 'Test\t1.0\t4.5\t3.5\n', 'Test\t1.0\t4.5\t3.5']
]

]

57 changes: 57 additions & 0 deletions tests/fave/extract/test_extractFormants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@

import logging
import pytest
import numpy as np
from fave import extractFormants

def test_mean_stdv():
for test_case in provide_valuelist():
mean, stdv = extractFormants.mean_stdv(test_case[0])

assert mean == test_case[1]
assert stdv == test_case[2]

def provide_valuelist():
return [
[
[1, 2, 3, 4],
np.mean([1, 2, 3, 4]),
np.std([1, 2, 3, 4], ddof=1)
],
[
[3.5, 2.6, 11.6, 34.66, 2.8, 4.7],
np.mean([3.5, 2.6, 11.6, 34.66, 2.8, 4.7]),
np.std([3.5, 2.6, 11.6, 34.66, 2.8, 4.7], ddof=1)
],
[
[],
None,
None
],
[
[23, 34, 45, 56, 12, 312, 45, 943, 21, 1, 4, 6, 9, 2],
np.mean([23, 34, 45, 56, 12, 312, 45, 943, 21, 1, 4, 6, 9, 2]),
np.std([23, 34, 45, 56, 12, 312, 45, 943, 21, 1, 4, 6, 9, 2], ddof=1)
],
[
[3],
np.mean([3]),
0
],
[
[-1],
np.mean([-1]),
0
]
[
[3.5, 2.6, 11.6, None, 34.66, 2.8, 4.7],
np.nanmean(np.array([3.5, 2.6, 11.6, None, 34.66, 2.8, 4.7],
dtype=np.float64)),
np.nanstd(np.array([3.5, 2.6, 11.6, None, 34.66, 2.8, 4.7],
dtype=np.float64),
ddof=1)
]
]



0 comments on commit 185ce82

Please sign in to comment.