diff --git a/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py b/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py index 624cd8066a..2c86f76c07 100644 --- a/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py +++ b/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py @@ -64,7 +64,7 @@ import IPython import matplotlib.pyplot as plt from torchaudio.models.decoder import ctc_decoder -from torchaudio.utils import download_asset +from torchaudio.utils import _download_asset ###################################################################### # @@ -85,7 +85,7 @@ # We will load a sample from the LibriSpeech test-other dataset. # -speech_file = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav") +speech_file = _download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav") IPython.display.Audio(speech_file) diff --git a/examples/tutorials/asr_inference_with_cuda_ctc_decoder_tutorial.py b/examples/tutorials/asr_inference_with_cuda_ctc_decoder_tutorial.py index 8329d8a40e..0ad21d7f1d 100755 --- a/examples/tutorials/asr_inference_with_cuda_ctc_decoder_tutorial.py +++ b/examples/tutorials/asr_inference_with_cuda_ctc_decoder_tutorial.py @@ -67,7 +67,7 @@ import IPython import sentencepiece as spm from torchaudio.models.decoder import cuda_ctc_decoder -from torchaudio.utils import download_asset +from torchaudio.utils import _download_asset ###################################################################### # @@ -95,7 +95,7 @@ def download_asset_external(url, key): # We will load a sample from the LibriSpeech test-other dataset. # -speech_file = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav") +speech_file = _download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav") waveform, sample_rate = torchaudio.load(speech_file) assert sample_rate == 16000 IPython.display.Audio(speech_file) diff --git a/examples/tutorials/audio_data_augmentation_tutorial.py b/examples/tutorials/audio_data_augmentation_tutorial.py index 13371e2058..60a7105d04 100644 --- a/examples/tutorials/audio_data_augmentation_tutorial.py +++ b/examples/tutorials/audio_data_augmentation_tutorial.py @@ -31,12 +31,12 @@ from IPython.display import Audio -from torchaudio.utils import download_asset +from torchaudio.utils import _download_asset -SAMPLE_WAV = download_asset("tutorial-assets/steam-train-whistle-daniel_simon.wav") -SAMPLE_RIR = download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo-8000hz.wav") -SAMPLE_SPEECH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042-8000hz.wav") -SAMPLE_NOISE = download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo-8000hz.wav") +SAMPLE_WAV = _download_asset("tutorial-assets/steam-train-whistle-daniel_simon.wav") +SAMPLE_RIR = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo-8000hz.wav") +SAMPLE_SPEECH = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042-8000hz.wav") +SAMPLE_NOISE = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo-8000hz.wav") ###################################################################### diff --git a/examples/tutorials/audio_feature_augmentation_tutorial.py b/examples/tutorials/audio_feature_augmentation_tutorial.py index c7f77c353d..4c43af1b67 100644 --- a/examples/tutorials/audio_feature_augmentation_tutorial.py +++ b/examples/tutorials/audio_feature_augmentation_tutorial.py @@ -22,7 +22,7 @@ import matplotlib.pyplot as plt from IPython.display import Audio -from torchaudio.utils import download_asset +from torchaudio.utils import _download_asset import torchaudio ###################################################################### @@ -30,7 +30,7 @@ # `VOiCES dataset `__, # which is licensed under Creative Commos BY 4.0. -SAMPLE_WAV_SPEECH_PATH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav") +SAMPLE_WAV_SPEECH_PATH = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav") def _get_sample(path): diff --git a/examples/tutorials/audio_feature_extractions_tutorial.py b/examples/tutorials/audio_feature_extractions_tutorial.py index 70b0a662a0..d4445ab739 100644 --- a/examples/tutorials/audio_feature_extractions_tutorial.py +++ b/examples/tutorials/audio_feature_extractions_tutorial.py @@ -48,11 +48,11 @@ from IPython.display import Audio from matplotlib.patches import Rectangle -from torchaudio.utils import download_asset +from torchaudio.utils import _download_asset torch.random.manual_seed(0) -SAMPLE_SPEECH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav") +SAMPLE_SPEECH = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav") def plot_waveform(waveform, sr, title="Waveform", ax=None): diff --git a/examples/tutorials/ctc_forced_alignment_api_tutorial.py b/examples/tutorials/ctc_forced_alignment_api_tutorial.py index 789fa3cf85..60ee93d19c 100644 --- a/examples/tutorials/ctc_forced_alignment_api_tutorial.py +++ b/examples/tutorials/ctc_forced_alignment_api_tutorial.py @@ -62,7 +62,7 @@ # to use. # -SPEECH_FILE = torchaudio.utils.download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav") +SPEECH_FILE = torchaudio.utils._download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav") waveform, _ = torchaudio.load(SPEECH_FILE) TRANSCRIPT = "i had that curiosity beside me at this moment".split() diff --git a/examples/tutorials/forced_alignment_tutorial.py b/examples/tutorials/forced_alignment_tutorial.py index 624037da9d..7fa7c86dc3 100644 --- a/examples/tutorials/forced_alignment_tutorial.py +++ b/examples/tutorials/forced_alignment_tutorial.py @@ -81,7 +81,7 @@ torch.random.manual_seed(0) -SPEECH_FILE = torchaudio.utils.download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav") +SPEECH_FILE = torchaudio.utils._download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav") ###################################################################### diff --git a/examples/tutorials/hybrid_demucs_tutorial.py b/examples/tutorials/hybrid_demucs_tutorial.py index 4e1446ed82..b6b08d02cc 100644 --- a/examples/tutorials/hybrid_demucs_tutorial.py +++ b/examples/tutorials/hybrid_demucs_tutorial.py @@ -51,7 +51,7 @@ from IPython.display import Audio from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB_PLUS -from torchaudio.utils import download_asset +from torchaudio.utils import _download_asset ###################################################################### # 3. Construct the pipeline @@ -181,7 +181,7 @@ def plot_spectrogram(stft, title="Spectrogram"): # # We download the audio file from our storage. Feel free to download another file and use audio from a specific path -SAMPLE_SONG = download_asset("tutorial-assets/hdemucs_mix.wav") +SAMPLE_SONG = _download_asset("tutorial-assets/hdemucs_mix.wav") waveform, sample_rate = torchaudio.load(SAMPLE_SONG) # replace SAMPLE_SONG with desired path for different song waveform = waveform.to(device) mixture = waveform @@ -254,10 +254,10 @@ def output_results(original_source: torch.Tensor, predicted_source: torch.Tensor frame_start = segment_start * sample_rate frame_end = segment_end * sample_rate -drums_original = download_asset("tutorial-assets/hdemucs_drums_segment.wav") -bass_original = download_asset("tutorial-assets/hdemucs_bass_segment.wav") -vocals_original = download_asset("tutorial-assets/hdemucs_vocals_segment.wav") -other_original = download_asset("tutorial-assets/hdemucs_other_segment.wav") +drums_original = _download_asset("tutorial-assets/hdemucs_drums_segment.wav") +bass_original = _download_asset("tutorial-assets/hdemucs_bass_segment.wav") +vocals_original = _download_asset("tutorial-assets/hdemucs_vocals_segment.wav") +other_original = _download_asset("tutorial-assets/hdemucs_other_segment.wav") drums_spec = audios["drums"][:, frame_start:frame_end].cpu() drums, sample_rate = torchaudio.load(drums_original) diff --git a/examples/tutorials/mvdr_tutorial.py b/examples/tutorials/mvdr_tutorial.py index fd5978adcc..01890afeb9 100644 --- a/examples/tutorials/mvdr_tutorial.py +++ b/examples/tutorials/mvdr_tutorial.py @@ -49,7 +49,7 @@ # 2.1. Import the packages # -from torchaudio.utils import download_asset +from torchaudio.utils import _download_asset ###################################################################### # 2.2. Download audio data @@ -74,8 +74,8 @@ # SAMPLE_RATE = 16000 -SAMPLE_CLEAN = download_asset("tutorial-assets/mvdr/clean_speech.wav") -SAMPLE_NOISE = download_asset("tutorial-assets/mvdr/noise.wav") +SAMPLE_CLEAN = _download_asset("tutorial-assets/mvdr/clean_speech.wav") +SAMPLE_NOISE = _download_asset("tutorial-assets/mvdr/noise.wav") ###################################################################### diff --git a/examples/tutorials/speech_recognition_pipeline_tutorial.py b/examples/tutorials/speech_recognition_pipeline_tutorial.py index 2d815a2e8e..2c8dfc752b 100644 --- a/examples/tutorials/speech_recognition_pipeline_tutorial.py +++ b/examples/tutorials/speech_recognition_pipeline_tutorial.py @@ -51,9 +51,9 @@ import IPython import matplotlib.pyplot as plt -from torchaudio.utils import download_asset +from torchaudio.utils import _download_asset -SPEECH_FILE = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav") +SPEECH_FILE = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav") ###################################################################### diff --git a/examples/tutorials/squim_tutorial.py b/examples/tutorials/squim_tutorial.py index 9b9b55ac2e..18bc0db84c 100644 --- a/examples/tutorials/squim_tutorial.py +++ b/examples/tutorials/squim_tutorial.py @@ -109,7 +109,7 @@ import torchaudio.functional as F from IPython.display import Audio -from torchaudio.utils import download_asset +from torchaudio.utils import _download_asset def si_snr(estimate, reference, epsilon=1e-8): @@ -150,8 +150,8 @@ def plot(waveform, title, sample_rate=16000): # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # -SAMPLE_SPEECH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav") -SAMPLE_NOISE = download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav") +SAMPLE_SPEECH = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav") +SAMPLE_NOISE = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav") ###################################################################### @@ -326,7 +326,7 @@ def plot(waveform, title, sample_rate=16000): # Load a non-matching reference (NMR) # -NMR_SPEECH = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav") +NMR_SPEECH = _download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav") WAVEFORM_NMR, SAMPLE_RATE_NMR = torchaudio.load(NMR_SPEECH) if SAMPLE_RATE_NMR != 16000: diff --git a/src/torchaudio/models/decoder/_ctc_decoder.py b/src/torchaudio/models/decoder/_ctc_decoder.py index 4d45f12f52..a45662011b 100644 --- a/src/torchaudio/models/decoder/_ctc_decoder.py +++ b/src/torchaudio/models/decoder/_ctc_decoder.py @@ -25,7 +25,7 @@ Dictionary as _Dictionary, load_words as _load_words, ) -from torchaudio.utils import download_asset +from torchaudio.utils import _download_asset try: from flashlight.lib.text.decoder.kenlm import KenLM as _KenLM @@ -554,10 +554,10 @@ def download_pretrained_files(model: str) -> _PretrainedFiles: """ files = _get_filenames(model) - lexicon_file = download_asset(files.lexicon) - tokens_file = download_asset(files.tokens) + lexicon_file = _download_asset(files.lexicon) + tokens_file = _download_asset(files.tokens) if files.lm is not None: - lm_file = download_asset(files.lm) + lm_file = _download_asset(files.lm) else: lm_file = None diff --git a/src/torchaudio/pipelines/_source_separation_pipeline.py b/src/torchaudio/pipelines/_source_separation_pipeline.py index ae92e21831..368b72d45e 100644 --- a/src/torchaudio/pipelines/_source_separation_pipeline.py +++ b/src/torchaudio/pipelines/_source_separation_pipeline.py @@ -52,7 +52,7 @@ def sample_rate(self) -> int: def get_model(self) -> torch.nn.Module: """Construct the model and load the pretrained weight.""" model = self._model_factory_func() - path = torchaudio.utils.download_asset(self._model_path) + path = torchaudio.utils._download_asset(self._model_path) state_dict = torch.load(path) model.load_state_dict(state_dict) model.eval() diff --git a/src/torchaudio/pipelines/_squim_pipeline.py b/src/torchaudio/pipelines/_squim_pipeline.py index 0c70db4aef..f7e7c1d908 100644 --- a/src/torchaudio/pipelines/_squim_pipeline.py +++ b/src/torchaudio/pipelines/_squim_pipeline.py @@ -50,7 +50,7 @@ def get_model(self) -> SquimObjective: Variation of :py:class:`~torchaudio.models.SquimObjective`. """ model = squim_objective_base() - path = torchaudio.utils.download_asset(f"models/{self._path}") + path = torchaudio.utils._download_asset(f"models/{self._path}") state_dict = torch.load(path, weights_only=True) model.load_state_dict(state_dict) model.eval() @@ -125,7 +125,7 @@ def get_model(self) -> SquimSubjective: Variation of :py:class:`~torchaudio.models.SquimObjective`. """ model = squim_subjective_base() - path = torchaudio.utils.download_asset(f"models/{self._path}") + path = torchaudio.utils._download_asset(f"models/{self._path}") state_dict = torch.load(path, weights_only=True) model.load_state_dict(state_dict) model.eval() diff --git a/src/torchaudio/pipelines/rnnt_pipeline.py b/src/torchaudio/pipelines/rnnt_pipeline.py index 11b5a479f3..c7d5385b37 100644 --- a/src/torchaudio/pipelines/rnnt_pipeline.py +++ b/src/torchaudio/pipelines/rnnt_pipeline.py @@ -244,7 +244,7 @@ class TokenProcessor(_TokenProcessor): def _get_model(self) -> RNNT: model = self._rnnt_factory_func() - path = torchaudio.utils.download_asset(self._rnnt_path) + path = torchaudio.utils._download_asset(self._rnnt_path) state_dict = torch.load(path) model.load_state_dict(state_dict) model.eval() @@ -313,7 +313,7 @@ def get_feature_extractor(self) -> FeatureExtractor: Returns: FeatureExtractor """ - local_path = torchaudio.utils.download_asset(self._global_stats_path) + local_path = torchaudio.utils._download_asset(self._global_stats_path) return _ModuleFeatureExtractor( torch.nn.Sequential( torchaudio.transforms.MelSpectrogram( @@ -332,7 +332,7 @@ def get_streaming_feature_extractor(self) -> FeatureExtractor: Returns: FeatureExtractor """ - local_path = torchaudio.utils.download_asset(self._global_stats_path) + local_path = torchaudio.utils._download_asset(self._global_stats_path) return _ModuleFeatureExtractor( torch.nn.Sequential( torchaudio.transforms.MelSpectrogram( @@ -350,7 +350,7 @@ def get_token_processor(self) -> TokenProcessor: Returns: TokenProcessor """ - local_path = torchaudio.utils.download_asset(self._sp_model_path) + local_path = torchaudio.utils._download_asset(self._sp_model_path) return _SentencePieceTokenProcessor(local_path) diff --git a/src/torchaudio/utils/__init__.py b/src/torchaudio/utils/__init__.py index 89bffaa34d..9d4dd2dd72 100644 --- a/src/torchaudio/utils/__init__.py +++ b/src/torchaudio/utils/__init__.py @@ -1,11 +1,10 @@ from torio.utils import ffmpeg_utils from . import sox_utils -from .download import download_asset +from .download import _download_asset __all__ = [ - "download_asset", "sox_utils", "ffmpeg_utils", ] diff --git a/src/torchaudio/utils/download.py b/src/torchaudio/utils/download.py index a2b4a422ee..b74cd60604 100644 --- a/src/torchaudio/utils/download.py +++ b/src/torchaudio/utils/download.py @@ -32,8 +32,7 @@ def _get_hash(path, hash, chunk_size=1028): from torchaudio._internal.module_utils import dropping_support -@dropping_support -def download_asset( +def _download_asset( key: str, hash: str = "", path: Union[str, PathLike] = "", diff --git a/test/integration_tests/conftest.py b/test/integration_tests/conftest.py index e0c456dab0..bdd863f681 100644 --- a/test/integration_tests/conftest.py +++ b/test/integration_tests/conftest.py @@ -66,7 +66,7 @@ def sample_speech(lang): if lang not in _FILES: raise NotImplementedError(f"Unexpected lang: {lang}") filename = _FILES[lang] - path = torchaudio.utils.download_asset(f"test-assets/{filename}") + path = torchaudio.utils._download_asset(f"test-assets/{filename}") return path @@ -74,7 +74,7 @@ def sample_speech(lang): def mixture_source(task): if task not in _MIXTURE_FILES: raise NotImplementedError(f"Unexpected task: {task}") - path = torchaudio.utils.download_asset(f"test-assets/{_MIXTURE_FILES[task]}") + path = torchaudio.utils._download_asset(f"test-assets/{_MIXTURE_FILES[task]}") return path @@ -84,7 +84,7 @@ def clean_sources(task): raise NotImplementedError(f"Unexpected task: {task}") paths = [] for file in _CLEAN_FILES[task]: - path = torchaudio.utils.download_asset(f"test-assets/{file}") + path = torchaudio.utils._download_asset(f"test-assets/{file}") paths.append(path) return paths @@ -115,5 +115,5 @@ def temp_hub_dir(tmp_path, pytestconfig): @pytest.fixture() def emissions(): - path = torchaudio.utils.download_asset("test-assets/emissions-8555-28447-0012.pt") + path = torchaudio.utils._download_asset("test-assets/emissions-8555-28447-0012.pt") return torch.load(path) diff --git a/test/integration_tests/prototype/vggish_pipeline_test.py b/test/integration_tests/prototype/vggish_pipeline_test.py index 72c6e1e518..17a31e1a71 100644 --- a/test/integration_tests/prototype/vggish_pipeline_test.py +++ b/test/integration_tests/prototype/vggish_pipeline_test.py @@ -6,7 +6,7 @@ def test_vggish(): input_sr = VGGISH.sample_rate input_proc = VGGISH.get_input_processor() model = VGGISH.get_model() - path = torchaudio.utils.download_asset("test-assets/Chopin_Ballade_-1_In_G_Minor,_Op._23_excerpt.mp3") + path = torchaudio.utils._download_asset("test-assets/Chopin_Ballade_-1_In_G_Minor,_Op._23_excerpt.mp3") waveform, sr = torchaudio.load(path, backend="ffmpeg") waveform = waveform.mean(axis=0) waveform = torchaudio.functional.resample(waveform, sr, input_sr)