Skip to content

Commit

Permalink
bugfix in festival backend, improved tests
Browse files Browse the repository at this point in the history
bugfix when empty or invalid input
  • Loading branch information
mmmaat committed Jul 26, 2018
1 parent 1c2beb0 commit afb099c
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 34 deletions.
10 changes: 7 additions & 3 deletions phonemizer/festival.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ def phonemize(text, language='en-us', separator=default_separator,
logger.debug('loading {}'.format(script))

a = _preprocess(text)
if len(a) == 0:
return []
b = _process(a, script, logger)
c = _postprocess(b, separator, strip)

Expand All @@ -87,7 +89,8 @@ def _double_quoted(line):

def _cleaned(line):
"""Remove 'forbidden' characters from the line"""
return line.replace('"', "'").replace('(', '').replace(')', '')
return line.replace('"', '').replace("'", '').replace(
'(', '').replace(')', '').strip()


def _preprocess(text):
Expand All @@ -98,9 +101,10 @@ def _preprocess(text):
a multiline string. Empty lines in inputs are ignored.
"""
cleaned_text = (
_cleaned(line) for line in text.split('\n') if line != '')
return '\n'.join(
[_double_quoted(_cleaned(line))
for line in text.split('\n') if line != ''])
_double_quoted(line) for line in cleaned_text if line != '')


def _process(text, script, logger):
Expand Down
42 changes: 18 additions & 24 deletions test/test_festival.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,39 +15,33 @@
"""Test of the phonemizer.Phonemizer class"""

import pytest
from phonemizer import phonemize, separator
from phonemizer import festival, separator


def _test(text):
return phonemize(
text, language='en-us', backend='festival', strip=True,
return festival.phonemize(
text, language='en-us', strip=True,
separator=separator.Separator(' ', '|', '-'))

@pytest.mark.skipif(
'2.1' in festival.festival_version(),
reason='festival-2.1 gives different results than further versions '
'for syllable boundaries')
def test_hello():
assert _test('hello world') == 'hh-ax-l|ow w-er-l-d'
assert _test('hello\nworld') == 'hh-ax-l|ow\nw-er-l-d'
assert _test('hello\nworld\n') == 'hh-ax-l|ow\nw-er-l-d'

assert _test('hello world') == ['hh-ax|l-ow w-er-l-d']
assert _test('hello\nworld') == ['hh-ax|l-ow', 'w-er-l-d']
assert _test('hello\nworld\n') == ['hh-ax|l-ow', 'w-er-l-d']

@pytest.mark.parametrize('text', ['', ' ', ' ', '(', '()', '"', "'"])
def test_empty(text):
assert _test(text) == ''
def test_bad_input(text):
assert _test(text) == []

def test_quote():
assert _test("here a 'quote") == 'hh-ih-r ax k-w-ow-t'
assert _test('here a "quote') == 'hh-ih-r ax k-w-ow-t'
assert _test("here a 'quote") == ['hh-ih-r ax k-w-ow-t']
assert _test('here a "quote') == ['hh-ih-r ax k-w-ow-t']

def test_its():
assert _test("it's") == 'ih-t-s'
assert _test("its") == 'ih-t-s'
assert _test("it s") == 'ih-t eh-s'
assert _test('it "s') == 'ih-t eh-s'

def test_list():
assert _test(['hello world']) == ['hh-ax-l|ow w-er-l-d']
assert _test(['hello\nworld']) == ['hh-ax-l|ow', 'w-er-l-d']
assert _test(['hello', 'world']) == ['hh-ax-l|ow', 'w-er-l-d']

def test_tuple():
# this is out of specifications
assert _test(('hello', 'world')) == ['hh-ax-l|ow', 'w-er-l-d']
assert _test("it's") == ['ih-t-s']
assert _test("its") == ['ih-t-s']
assert _test("it s") == ['ih-t eh-s']
assert _test('it "s') == ['ih-t eh-s']
23 changes: 16 additions & 7 deletions test/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import tempfile
import shlex

from phonemizer.main import main
from phonemizer import main, festival


def _test(input, output, args=''):
Expand All @@ -30,24 +30,33 @@ def _test(input, output, args=''):

with tempfile.NamedTemporaryFile('w+', delete=False) as foutput:
opts = '{} -o {} {}'.format(finput.name, foutput.name, args)
main(shlex.split(opts))
main.main(shlex.split(opts))
assert foutput.read() == output + '\n'

def test_help():
with pytest.raises(SystemExit):
main('-h'.split())
main.main('-h'.split())

def test_readme():
_test(u'hello world', u'hhaxlow werld ')
_test(u'hello world', u'hhaxlow werld', '--strip')
_test(u'hello world', u'həloʊ wɜːld ', '-l en-us')
_test(u'bonjour le monde', u'bɔ̃ʒuʁ lə- mɔ̃d ', '-l fr-fr')
_test(u'bonjour le monde', u'b ɔ̃ ʒ u ʁ ;eword l ə- ;eword m ɔ̃ d ;eword ',
'-l fr-fr -p " " -w ";eword "')

@pytest.mark.skipif(
'2.1' in festival.festival_version(),
reason='festival-2.1 gives different results than further versions '
'for syllable boundaries')
def test_readme_festival_syll():
_test(u'hello world',
u'hh ax l ;esyll ow ;esyll ;eword w er l d ;esyll ;eword ',
u'hh ax ;esyll l ow ;esyll ;eword w er l d ;esyll ;eword ',
u"-p ' ' -s ';esyll ' -w ';eword '")

def test_njobs():
for njobs in range(1, 4):
_test(
u'hello world\ngoodbye\nthird line\nyet another',
u'hh-ax-l|ow w-er-l-d\ng-uh-d|b-ay\nth-er-d l-ay-n\n'
u'y-eh-t ax-n|ah-dh|er',
u'--strip -j {} -p "-" -s "|" -w " "'.format(njobs))
u'h-ə-l-oʊ w-ɜː-l-d\nɡ-ʊ-d-b-aɪ\nθ-ɜː-d l-aɪ-n\nj-ɛ-t ɐ-n-ʌ-ð-ɚ',
u'--strip -j {} -l en-us -p "-" -s "|" -w " "'.format(njobs))

0 comments on commit afb099c

Please sign in to comment.