From 3aa637223364c953b21230c6f4674f29bd06fd5a Mon Sep 17 00:00:00 2001
From: Mostafa Ghorbandoost <mos.ghorbandoost@gmail.com>
Date: Wed, 19 Apr 2023 13:56:54 -0700
Subject: [PATCH] Fix an invalid link in get_data.py of ljspeech

Usage of the link in line 63 leads to downloading a html file not a tsv file, so we need to change it to a raw link.

Signed-off-by: Mostafa Ghorbandoost <mos.ghorbandoost@gmail.com>
---
 scripts/dataset_processing/tts/ljspeech/get_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/dataset_processing/tts/ljspeech/get_data.py b/scripts/dataset_processing/tts/ljspeech/get_data.py
index 7c28fb8ef903..d8a0b1c2834c 100644
--- a/scripts/dataset_processing/tts/ljspeech/get_data.py
+++ b/scripts/dataset_processing/tts/ljspeech/get_data.py
@@ -60,7 +60,7 @@ def __extract_file(filepath, data_dir):
 def __process_data(data_root, whitelist_path):
     if whitelist_path is None:
         wget.download(
-            "https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv",
+            "https://raw.githubusercontent.com/NVIDIA/NeMo-text-processing/main/nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv",
             out=str(data_root),
         )
         whitelist_path = data_root / "lj_speech.tsv"