pytorch · samanklesaria · Jul 31, 2025 · Jul 30, 2025 · Jul 31, 2025 · Jul 31, 2025
diff --git a/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py b/examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py
@@ -64,7 +64,7 @@
 import IPython
 import matplotlib.pyplot as plt
 from torchaudio.models.decoder import ctc_decoder
-from torchaudio.utils import download_asset
+from torchaudio.utils import _download_asset
 
 ######################################################################
 #
@@ -85,7 +85,7 @@
 # We will load a sample from the LibriSpeech test-other dataset.
 #
 
-speech_file = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
+speech_file = _download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
 
 IPython.display.Audio(speech_file)
 

diff --git a/examples/tutorials/asr_inference_with_cuda_ctc_decoder_tutorial.py b/examples/tutorials/asr_inference_with_cuda_ctc_decoder_tutorial.py
@@ -67,7 +67,7 @@
 import IPython
 import sentencepiece as spm
 from torchaudio.models.decoder import cuda_ctc_decoder
-from torchaudio.utils import download_asset
+from torchaudio.utils import _download_asset
 
 ######################################################################
 #
@@ -95,7 +95,7 @@ def download_asset_external(url, key):
 # We will load a sample from the LibriSpeech test-other dataset.
 #
 
-speech_file = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
+speech_file = _download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
 waveform, sample_rate = torchaudio.load(speech_file)
 assert sample_rate == 16000
 IPython.display.Audio(speech_file)

diff --git a/examples/tutorials/audio_data_augmentation_tutorial.py b/examples/tutorials/audio_data_augmentation_tutorial.py
@@ -31,12 +31,12 @@
 
 from IPython.display import Audio
 
-from torchaudio.utils import download_asset
+from torchaudio.utils import _download_asset
 
-SAMPLE_WAV = download_asset("tutorial-assets/steam-train-whistle-daniel_simon.wav")
-SAMPLE_RIR = download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo-8000hz.wav")
-SAMPLE_SPEECH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042-8000hz.wav")
-SAMPLE_NOISE = download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo-8000hz.wav")
+SAMPLE_WAV = _download_asset("tutorial-assets/steam-train-whistle-daniel_simon.wav")
+SAMPLE_RIR = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo-8000hz.wav")
+SAMPLE_SPEECH = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042-8000hz.wav")
+SAMPLE_NOISE = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo-8000hz.wav")
 
 
 ######################################################################

diff --git a/examples/tutorials/audio_feature_augmentation_tutorial.py b/examples/tutorials/audio_feature_augmentation_tutorial.py
@@ -22,15 +22,15 @@
 
 import matplotlib.pyplot as plt
 from IPython.display import Audio
-from torchaudio.utils import download_asset
+from torchaudio.utils import _download_asset
 import torchaudio
 
 ######################################################################
 # In this tutorial, we will use a speech data from
 # `VOiCES dataset <https://iqtlabs.github.io/voices/>`__,
 # which is licensed under Creative Commos BY 4.0.
 
-SAMPLE_WAV_SPEECH_PATH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
+SAMPLE_WAV_SPEECH_PATH = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
 
 
 def _get_sample(path):

diff --git a/examples/tutorials/audio_feature_extractions_tutorial.py b/examples/tutorials/audio_feature_extractions_tutorial.py
@@ -48,11 +48,11 @@
 
 from IPython.display import Audio
 from matplotlib.patches import Rectangle
-from torchaudio.utils import download_asset
+from torchaudio.utils import _download_asset
 
 torch.random.manual_seed(0)
 
-SAMPLE_SPEECH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
+SAMPLE_SPEECH = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
 
 
 def plot_waveform(waveform, sr, title="Waveform", ax=None):

diff --git a/examples/tutorials/ctc_forced_alignment_api_tutorial.py b/examples/tutorials/ctc_forced_alignment_api_tutorial.py
@@ -62,7 +62,7 @@
 # to use.
 #
 
-SPEECH_FILE = torchaudio.utils.download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
+SPEECH_FILE = torchaudio.utils._download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
 waveform, _ = torchaudio.load(SPEECH_FILE)
 TRANSCRIPT = "i had that curiosity beside me at this moment".split()
 

diff --git a/examples/tutorials/forced_alignment_tutorial.py b/examples/tutorials/forced_alignment_tutorial.py
@@ -81,7 +81,7 @@
 
 torch.random.manual_seed(0)
 
-SPEECH_FILE = torchaudio.utils.download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
+SPEECH_FILE = torchaudio.utils._download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
 
 
 ######################################################################

diff --git a/examples/tutorials/hybrid_demucs_tutorial.py b/examples/tutorials/hybrid_demucs_tutorial.py
@@ -51,7 +51,7 @@
 
 from IPython.display import Audio
 from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB_PLUS
-from torchaudio.utils import download_asset
+from torchaudio.utils import _download_asset
 
 ######################################################################
 # 3. Construct the pipeline
@@ -181,7 +181,7 @@ def plot_spectrogram(stft, title="Spectrogram"):
 #
 
 # We download the audio file from our storage. Feel free to download another file and use audio from a specific path
-SAMPLE_SONG = download_asset("tutorial-assets/hdemucs_mix.wav")
+SAMPLE_SONG = _download_asset("tutorial-assets/hdemucs_mix.wav")
 waveform, sample_rate = torchaudio.load(SAMPLE_SONG)  # replace SAMPLE_SONG with desired path for different song
 waveform = waveform.to(device)
 mixture = waveform
@@ -254,10 +254,10 @@ def output_results(original_source: torch.Tensor, predicted_source: torch.Tensor
 frame_start = segment_start * sample_rate
 frame_end = segment_end * sample_rate
 
-drums_original = download_asset("tutorial-assets/hdemucs_drums_segment.wav")
-bass_original = download_asset("tutorial-assets/hdemucs_bass_segment.wav")
-vocals_original = download_asset("tutorial-assets/hdemucs_vocals_segment.wav")
-other_original = download_asset("tutorial-assets/hdemucs_other_segment.wav")
+drums_original = _download_asset("tutorial-assets/hdemucs_drums_segment.wav")
+bass_original = _download_asset("tutorial-assets/hdemucs_bass_segment.wav")
+vocals_original = _download_asset("tutorial-assets/hdemucs_vocals_segment.wav")
+other_original = _download_asset("tutorial-assets/hdemucs_other_segment.wav")
 
 drums_spec = audios["drums"][:, frame_start:frame_end].cpu()
 drums, sample_rate = torchaudio.load(drums_original)

diff --git a/examples/tutorials/mvdr_tutorial.py b/examples/tutorials/mvdr_tutorial.py
@@ -49,7 +49,7 @@
 # 2.1. Import the packages
 #
 
-from torchaudio.utils import download_asset
+from torchaudio.utils import _download_asset
 
 ######################################################################
 # 2.2. Download audio data
@@ -74,8 +74,8 @@
 #
 
 SAMPLE_RATE = 16000
-SAMPLE_CLEAN = download_asset("tutorial-assets/mvdr/clean_speech.wav")
-SAMPLE_NOISE = download_asset("tutorial-assets/mvdr/noise.wav")
+SAMPLE_CLEAN = _download_asset("tutorial-assets/mvdr/clean_speech.wav")
+SAMPLE_NOISE = _download_asset("tutorial-assets/mvdr/noise.wav")
 
 
 ######################################################################

diff --git a/examples/tutorials/speech_recognition_pipeline_tutorial.py b/examples/tutorials/speech_recognition_pipeline_tutorial.py
@@ -51,9 +51,9 @@
 
 import IPython
 import matplotlib.pyplot as plt
-from torchaudio.utils import download_asset
+from torchaudio.utils import _download_asset
 
-SPEECH_FILE = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
+SPEECH_FILE = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
 
 
 ######################################################################

diff --git a/examples/tutorials/squim_tutorial.py b/examples/tutorials/squim_tutorial.py
@@ -109,7 +109,7 @@
 
 import torchaudio.functional as F
 from IPython.display import Audio
-from torchaudio.utils import download_asset
+from torchaudio.utils import _download_asset
 
 
 def si_snr(estimate, reference, epsilon=1e-8):
@@ -150,8 +150,8 @@ def plot(waveform, title, sample_rate=16000):
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 
-SAMPLE_SPEECH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
-SAMPLE_NOISE = download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav")
+SAMPLE_SPEECH = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
+SAMPLE_NOISE = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav")
 
 
 ######################################################################
@@ -326,7 +326,7 @@ def plot(waveform, title, sample_rate=16000):
 # Load a non-matching reference (NMR)
 #
 
-NMR_SPEECH = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
+NMR_SPEECH = _download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
 
 WAVEFORM_NMR, SAMPLE_RATE_NMR = torchaudio.load(NMR_SPEECH)
 if SAMPLE_RATE_NMR != 16000:

@@ -25,7 +25,7 @@
     Dictionary as _Dictionary,
     load_words as _load_words,
 )
-from torchaudio.utils import download_asset
+from torchaudio.utils import _download_asset
 
 try:
     from flashlight.lib.text.decoder.kenlm import KenLM as _KenLM
@@ -554,10 +554,10 @@ def download_pretrained_files(model: str) -> _PretrainedFiles:
     """
 
     files = _get_filenames(model)
-    lexicon_file = download_asset(files.lexicon)
-    tokens_file = download_asset(files.tokens)
+    lexicon_file = _download_asset(files.lexicon)
+    tokens_file = _download_asset(files.tokens)
     if files.lm is not None:
-        lm_file = download_asset(files.lm)
+        lm_file = _download_asset(files.lm)
     else:
         lm_file = None
 

@@ -52,7 +52,7 @@ def sample_rate(self) -> int:
     def get_model(self) -> torch.nn.Module:
         """Construct the model and load the pretrained weight."""
         model = self._model_factory_func()
-        path = torchaudio.utils.download_asset(self._model_path)
+        path = torchaudio.utils._download_asset(self._model_path)
         state_dict = torch.load(path)
         model.load_state_dict(state_dict)
         model.eval()

@@ -50,7 +50,7 @@ def get_model(self) -> SquimObjective:
             Variation of :py:class:`~torchaudio.models.SquimObjective`.
         """
         model = squim_objective_base()
-        path = torchaudio.utils.download_asset(f"models/{self._path}")
+        path = torchaudio.utils._download_asset(f"models/{self._path}")
         state_dict = torch.load(path, weights_only=True)
         model.load_state_dict(state_dict)
         model.eval()
@@ -125,7 +125,7 @@ def get_model(self) -> SquimSubjective:
             Variation of :py:class:`~torchaudio.models.SquimObjective`.
         """
         model = squim_subjective_base()
-        path = torchaudio.utils.download_asset(f"models/{self._path}")
+        path = torchaudio.utils._download_asset(f"models/{self._path}")
         state_dict = torch.load(path, weights_only=True)
         model.load_state_dict(state_dict)
         model.eval()

@@ -244,7 +244,7 @@ class TokenProcessor(_TokenProcessor):
 
     def _get_model(self) -> RNNT:
         model = self._rnnt_factory_func()
-        path = torchaudio.utils.download_asset(self._rnnt_path)
+        path = torchaudio.utils._download_asset(self._rnnt_path)
         state_dict = torch.load(path)
         model.load_state_dict(state_dict)
         model.eval()
@@ -313,7 +313,7 @@ def get_feature_extractor(self) -> FeatureExtractor:
         Returns:
             FeatureExtractor
         """
-        local_path = torchaudio.utils.download_asset(self._global_stats_path)
+        local_path = torchaudio.utils._download_asset(self._global_stats_path)
         return _ModuleFeatureExtractor(
             torch.nn.Sequential(
                 torchaudio.transforms.MelSpectrogram(
@@ -332,7 +332,7 @@ def get_streaming_feature_extractor(self) -> FeatureExtractor:
         Returns:
             FeatureExtractor
         """
-        local_path = torchaudio.utils.download_asset(self._global_stats_path)
+        local_path = torchaudio.utils._download_asset(self._global_stats_path)
         return _ModuleFeatureExtractor(
             torch.nn.Sequential(
                 torchaudio.transforms.MelSpectrogram(
@@ -350,7 +350,7 @@ def get_token_processor(self) -> TokenProcessor:
         Returns:
             TokenProcessor
         """
-        local_path = torchaudio.utils.download_asset(self._sp_model_path)
+        local_path = torchaudio.utils._download_asset(self._sp_model_path)
         return _SentencePieceTokenProcessor(local_path)
 
 

@@ -1,11 +1,10 @@
 from torio.utils import ffmpeg_utils
 
 from . import sox_utils
-from .download import download_asset
+from .download import _download_asset
 
 
 __all__ = [
-    "download_asset",
     "sox_utils",
     "ffmpeg_utils",
 ]
@@ -32,8 +32,7 @@ def _get_hash(path, hash, chunk_size=1028):
 
 from torchaudio._internal.module_utils import dropping_support
 
-@dropping_support
-def download_asset(
+def _download_asset(
     key: str,
     hash: str = "",
     path: Union[str, PathLike] = "",

@@ -66,15 +66,15 @@ def sample_speech(lang):
     if lang not in _FILES:
         raise NotImplementedError(f"Unexpected lang: {lang}")
     filename = _FILES[lang]
-    path = torchaudio.utils.download_asset(f"test-assets/{filename}")
+    path = torchaudio.utils._download_asset(f"test-assets/{filename}")
     return path
 
 
 @pytest.fixture
 def mixture_source(task):
     if task not in _MIXTURE_FILES:
         raise NotImplementedError(f"Unexpected task: {task}")
-    path = torchaudio.utils.download_asset(f"test-assets/{_MIXTURE_FILES[task]}")
+    path = torchaudio.utils._download_asset(f"test-assets/{_MIXTURE_FILES[task]}")
     return path
 
 
@@ -84,7 +84,7 @@ def clean_sources(task):
         raise NotImplementedError(f"Unexpected task: {task}")
     paths = []
     for file in _CLEAN_FILES[task]:
-        path = torchaudio.utils.download_asset(f"test-assets/{file}")
+        path = torchaudio.utils._download_asset(f"test-assets/{file}")
         paths.append(path)
     return paths
 
@@ -115,5 +115,5 @@ def temp_hub_dir(tmp_path, pytestconfig):
 
 @pytest.fixture()
 def emissions():
-    path = torchaudio.utils.download_asset("test-assets/emissions-8555-28447-0012.pt")
+    path = torchaudio.utils._download_asset("test-assets/emissions-8555-28447-0012.pt")
     return torch.load(path)
@@ -6,7 +6,7 @@ def test_vggish():
     input_sr = VGGISH.sample_rate
     input_proc = VGGISH.get_input_processor()
     model = VGGISH.get_model()
-    path = torchaudio.utils.download_asset("test-assets/Chopin_Ballade_-1_In_G_Minor,_Op._23_excerpt.mp3")
+    path = torchaudio.utils._download_asset("test-assets/Chopin_Ballade_-1_In_G_Minor,_Op._23_excerpt.mp3")
     waveform, sr = torchaudio.load(path, backend="ffmpeg")
     waveform = waveform.mean(axis=0)
     waveform = torchaudio.functional.resample(waveform, sr, input_sr)