Skip to content

Commit

Permalink
[G2P] fixed typos and broken import library. (NVIDIA#5978) (NVIDIA#5979)
Browse files Browse the repository at this point in the history
Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Co-authored-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Signed-off-by: Jason <jasoli@nvidia.com>
  • Loading branch information
2 people authored and blisc committed Feb 10, 2023
1 parent ebfad90 commit ce6f6af
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
1 change: 1 addition & 0 deletions nemo_text_processing/g2p/data/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
"GRAPHEME_CASE_UPPER",
"GRAPHEME_CASE_LOWER",
"GRAPHEME_CASE_MIXED",
"get_heteronym_spans",
]

# Derived from LJSpeech
Expand Down
17 changes: 9 additions & 8 deletions tests/nemo_text_processing/g2p/data/test_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from nemo_text_processing.g2p.data.data_utils import (
any_locale_word_tokenize,
english_word_tokenize,
get_homograph_spans,
get_heteronym_spans,
)


Expand Down Expand Up @@ -95,7 +95,7 @@ def test_any_locale_word_tokenize_with_accents(self):
@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_any_locale_word_tokenize_with_numbers(self):
input_text = "Three times× four^teen ÷divided by [movies] on \slash."
input_text = r"Three times× four^teen ÷divided by [movies] on \slash."
expected_output = self._create_expected_output(
[
"three",
Expand Down Expand Up @@ -124,10 +124,11 @@ def test_any_locale_word_tokenize_with_numbers(self):

@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_get_homograph_spans(self):
supported_homographs = ["live", "read", "protest", "diffuse", "desert"]
def test_get_heteronym_spans(self):
supported_heteronyms = ["live", "read", "protest", "diffuse", "desert"]
sentences = [
"I live in California. I READ a book. Only people who have already gained something are willing to protest. He reads a book!",
"I live in California. I READ a book. Only people who have already gained something are willing to protest."
" He reads a book!",
"Yesterday, I read a book.",
"He read a book last night and pre-diffuse and LivE-post and pre-desert-post.",
"the soldier deserted the desert in desert.",
Expand All @@ -139,13 +140,13 @@ def test_get_homograph_spans(self):
[(3, 7), (34, 41), (46, 50), (64, 70)],
[(25, 31), (35, 41)],
]
expected_homographs = [
expected_heteronyms = [
["live", "read", "protest"],
['read'],
['read', 'diffuse', 'live', 'desert'],
['desert', 'desert'],
]

out_start_end, out_homographs = get_homograph_spans(sentences, supported_homographs)
out_start_end, out_heteronyms = get_heteronym_spans(sentences, supported_heteronyms)
assert out_start_end == expected_start_end, "start-end spans do not match"
assert out_homographs == expected_homographs, "homograph spans do not match"
assert out_heteronyms == expected_heteronyms, "heteronym spans do not match"

0 comments on commit ce6f6af

Please sign in to comment.