From a6c2552189e964c736dc1a4caffc1e32ee017b58 Mon Sep 17 00:00:00 2001
From: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
Date: Thu, 9 Feb 2023 14:47:02 -0800
Subject: [PATCH] [G2P] fixed typos and broken import library. (#5978)

Signed-off-by: Xuesong Yang <1646669+XuesongYang@users.noreply.github.com>
---
 nemo_text_processing/g2p/data/data_utils.py     |  1 +
 .../g2p/data/test_data_utils.py                 | 17 +++++++++--------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/nemo_text_processing/g2p/data/data_utils.py b/nemo_text_processing/g2p/data/data_utils.py
index d695ec9ba391..8d9db35a1808 100644
--- a/nemo_text_processing/g2p/data/data_utils.py
+++ b/nemo_text_processing/g2p/data/data_utils.py
@@ -35,6 +35,7 @@
     "GRAPHEME_CASE_UPPER",
     "GRAPHEME_CASE_LOWER",
     "GRAPHEME_CASE_MIXED",
+    "get_heteronym_spans",
 ]
 
 # Derived from LJSpeech
diff --git a/tests/nemo_text_processing/g2p/data/test_data_utils.py b/tests/nemo_text_processing/g2p/data/test_data_utils.py
index c04a4d0f0e13..5986556663cb 100644
--- a/tests/nemo_text_processing/g2p/data/test_data_utils.py
+++ b/tests/nemo_text_processing/g2p/data/test_data_utils.py
@@ -16,7 +16,7 @@
 from nemo_text_processing.g2p.data.data_utils import (
     any_locale_word_tokenize,
     english_word_tokenize,
-    get_homograph_spans,
+    get_heteronym_spans,
 )
 
 
@@ -95,7 +95,7 @@ def test_any_locale_word_tokenize_with_accents(self):
     @pytest.mark.run_only_on('CPU')
     @pytest.mark.unit
     def test_any_locale_word_tokenize_with_numbers(self):
-        input_text = "Three times× four^teen ÷divided by [movies] on \slash."
+        input_text = r"Three times× four^teen ÷divided by [movies] on \slash."
         expected_output = self._create_expected_output(
             [
                 "three",
@@ -124,10 +124,11 @@ def test_any_locale_word_tokenize_with_numbers(self):
 
     @pytest.mark.run_only_on('CPU')
     @pytest.mark.unit
-    def test_get_homograph_spans(self):
-        supported_homographs = ["live", "read", "protest", "diffuse", "desert"]
+    def test_get_heteronym_spans(self):
+        supported_heteronyms = ["live", "read", "protest", "diffuse", "desert"]
         sentences = [
-            "I live in California. I READ a book. Only people who have already gained something are willing to protest. He reads a book!",
+            "I live in California. I READ a book. Only people who have already gained something are willing to protest."
+            " He reads a book!",
             "Yesterday, I read a book.",
             "He read a book last night and pre-diffuse and LivE-post and pre-desert-post.",
             "the soldier deserted the desert in desert.",
@@ -139,13 +140,13 @@ def test_get_homograph_spans(self):
             [(3, 7), (34, 41), (46, 50), (64, 70)],
             [(25, 31), (35, 41)],
         ]
-        expected_homographs = [
+        expected_heteronyms = [
             ["live", "read", "protest"],
             ['read'],
             ['read', 'diffuse', 'live', 'desert'],
             ['desert', 'desert'],
         ]
 
-        out_start_end, out_homographs = get_homograph_spans(sentences, supported_homographs)
+        out_start_end, out_heteronyms = get_heteronym_spans(sentences, supported_heteronyms)
         assert out_start_end == expected_start_end, "start-end spans do not match"
-        assert out_homographs == expected_homographs, "homograph spans do not match"
+        assert out_heteronyms == expected_heteronyms, "heteronym spans do not match"