Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/tutorials/asr_inference_with_ctc_decoder_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
import IPython
import matplotlib.pyplot as plt
from torchaudio.models.decoder import ctc_decoder
from torchaudio.utils import download_asset
from torchaudio.utils import _download_asset

######################################################################
#
Expand All @@ -85,7 +85,7 @@
# We will load a sample from the LibriSpeech test-other dataset.
#

speech_file = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
speech_file = _download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")

IPython.display.Audio(speech_file)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
import IPython
import sentencepiece as spm
from torchaudio.models.decoder import cuda_ctc_decoder
from torchaudio.utils import download_asset
from torchaudio.utils import _download_asset

######################################################################
#
Expand Down Expand Up @@ -95,7 +95,7 @@ def download_asset_external(url, key):
# We will load a sample from the LibriSpeech test-other dataset.
#

speech_file = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
speech_file = _download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
waveform, sample_rate = torchaudio.load(speech_file)
assert sample_rate == 16000
IPython.display.Audio(speech_file)
Expand Down
10 changes: 5 additions & 5 deletions examples/tutorials/audio_data_augmentation_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@

from IPython.display import Audio

from torchaudio.utils import download_asset
from torchaudio.utils import _download_asset

SAMPLE_WAV = download_asset("tutorial-assets/steam-train-whistle-daniel_simon.wav")
SAMPLE_RIR = download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo-8000hz.wav")
SAMPLE_SPEECH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042-8000hz.wav")
SAMPLE_NOISE = download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo-8000hz.wav")
SAMPLE_WAV = _download_asset("tutorial-assets/steam-train-whistle-daniel_simon.wav")
SAMPLE_RIR = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo-8000hz.wav")
SAMPLE_SPEECH = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042-8000hz.wav")
SAMPLE_NOISE = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo-8000hz.wav")


######################################################################
Expand Down
4 changes: 2 additions & 2 deletions examples/tutorials/audio_feature_augmentation_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@

import matplotlib.pyplot as plt
from IPython.display import Audio
from torchaudio.utils import download_asset
from torchaudio.utils import _download_asset
import torchaudio

######################################################################
# In this tutorial, we will use a speech data from
# `VOiCES dataset <https://iqtlabs.github.io/voices/>`__,
# which is licensed under Creative Commos BY 4.0.

SAMPLE_WAV_SPEECH_PATH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
SAMPLE_WAV_SPEECH_PATH = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")


def _get_sample(path):
Expand Down
4 changes: 2 additions & 2 deletions examples/tutorials/audio_feature_extractions_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@

from IPython.display import Audio
from matplotlib.patches import Rectangle
from torchaudio.utils import download_asset
from torchaudio.utils import _download_asset

torch.random.manual_seed(0)

SAMPLE_SPEECH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
SAMPLE_SPEECH = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")


def plot_waveform(waveform, sr, title="Waveform", ax=None):
Expand Down
2 changes: 1 addition & 1 deletion examples/tutorials/ctc_forced_alignment_api_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
# to use.
#

SPEECH_FILE = torchaudio.utils.download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
SPEECH_FILE = torchaudio.utils._download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
waveform, _ = torchaudio.load(SPEECH_FILE)
TRANSCRIPT = "i had that curiosity beside me at this moment".split()

Expand Down
2 changes: 1 addition & 1 deletion examples/tutorials/forced_alignment_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@

torch.random.manual_seed(0)

SPEECH_FILE = torchaudio.utils.download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
SPEECH_FILE = torchaudio.utils._download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")


######################################################################
Expand Down
12 changes: 6 additions & 6 deletions examples/tutorials/hybrid_demucs_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@

from IPython.display import Audio
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB_PLUS
from torchaudio.utils import download_asset
from torchaudio.utils import _download_asset

######################################################################
# 3. Construct the pipeline
Expand Down Expand Up @@ -181,7 +181,7 @@ def plot_spectrogram(stft, title="Spectrogram"):
#

# We download the audio file from our storage. Feel free to download another file and use audio from a specific path
SAMPLE_SONG = download_asset("tutorial-assets/hdemucs_mix.wav")
SAMPLE_SONG = _download_asset("tutorial-assets/hdemucs_mix.wav")
waveform, sample_rate = torchaudio.load(SAMPLE_SONG) # replace SAMPLE_SONG with desired path for different song
waveform = waveform.to(device)
mixture = waveform
Expand Down Expand Up @@ -254,10 +254,10 @@ def output_results(original_source: torch.Tensor, predicted_source: torch.Tensor
frame_start = segment_start * sample_rate
frame_end = segment_end * sample_rate

drums_original = download_asset("tutorial-assets/hdemucs_drums_segment.wav")
bass_original = download_asset("tutorial-assets/hdemucs_bass_segment.wav")
vocals_original = download_asset("tutorial-assets/hdemucs_vocals_segment.wav")
other_original = download_asset("tutorial-assets/hdemucs_other_segment.wav")
drums_original = _download_asset("tutorial-assets/hdemucs_drums_segment.wav")
bass_original = _download_asset("tutorial-assets/hdemucs_bass_segment.wav")
vocals_original = _download_asset("tutorial-assets/hdemucs_vocals_segment.wav")
other_original = _download_asset("tutorial-assets/hdemucs_other_segment.wav")

drums_spec = audios["drums"][:, frame_start:frame_end].cpu()
drums, sample_rate = torchaudio.load(drums_original)
Expand Down
6 changes: 3 additions & 3 deletions examples/tutorials/mvdr_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
# 2.1. Import the packages
#

from torchaudio.utils import download_asset
from torchaudio.utils import _download_asset

######################################################################
# 2.2. Download audio data
Expand All @@ -74,8 +74,8 @@
#

SAMPLE_RATE = 16000
SAMPLE_CLEAN = download_asset("tutorial-assets/mvdr/clean_speech.wav")
SAMPLE_NOISE = download_asset("tutorial-assets/mvdr/noise.wav")
SAMPLE_CLEAN = _download_asset("tutorial-assets/mvdr/clean_speech.wav")
SAMPLE_NOISE = _download_asset("tutorial-assets/mvdr/noise.wav")


######################################################################
Expand Down
4 changes: 2 additions & 2 deletions examples/tutorials/speech_recognition_pipeline_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@

import IPython
import matplotlib.pyplot as plt
from torchaudio.utils import download_asset
from torchaudio.utils import _download_asset

SPEECH_FILE = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
SPEECH_FILE = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")


######################################################################
Expand Down
8 changes: 4 additions & 4 deletions examples/tutorials/squim_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@

import torchaudio.functional as F
from IPython.display import Audio
from torchaudio.utils import download_asset
from torchaudio.utils import _download_asset


def si_snr(estimate, reference, epsilon=1e-8):
Expand Down Expand Up @@ -150,8 +150,8 @@ def plot(waveform, title, sample_rate=16000):
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#

SAMPLE_SPEECH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
SAMPLE_NOISE = download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav")
SAMPLE_SPEECH = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
SAMPLE_NOISE = _download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav")


######################################################################
Expand Down Expand Up @@ -326,7 +326,7 @@ def plot(waveform, title, sample_rate=16000):
# Load a non-matching reference (NMR)
#

NMR_SPEECH = download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")
NMR_SPEECH = _download_asset("tutorial-assets/ctc-decoding/1688-142285-0007.wav")

WAVEFORM_NMR, SAMPLE_RATE_NMR = torchaudio.load(NMR_SPEECH)
if SAMPLE_RATE_NMR != 16000:
Expand Down
8 changes: 4 additions & 4 deletions src/torchaudio/models/decoder/_ctc_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
Dictionary as _Dictionary,
load_words as _load_words,
)
from torchaudio.utils import download_asset
from torchaudio.utils import _download_asset

try:
from flashlight.lib.text.decoder.kenlm import KenLM as _KenLM
Expand Down Expand Up @@ -554,10 +554,10 @@ def download_pretrained_files(model: str) -> _PretrainedFiles:
"""

files = _get_filenames(model)
lexicon_file = download_asset(files.lexicon)
tokens_file = download_asset(files.tokens)
lexicon_file = _download_asset(files.lexicon)
tokens_file = _download_asset(files.tokens)
if files.lm is not None:
lm_file = download_asset(files.lm)
lm_file = _download_asset(files.lm)
else:
lm_file = None

Expand Down
2 changes: 1 addition & 1 deletion src/torchaudio/pipelines/_source_separation_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def sample_rate(self) -> int:
def get_model(self) -> torch.nn.Module:
"""Construct the model and load the pretrained weight."""
model = self._model_factory_func()
path = torchaudio.utils.download_asset(self._model_path)
path = torchaudio.utils._download_asset(self._model_path)
state_dict = torch.load(path)
model.load_state_dict(state_dict)
model.eval()
Expand Down
4 changes: 2 additions & 2 deletions src/torchaudio/pipelines/_squim_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def get_model(self) -> SquimObjective:
Variation of :py:class:`~torchaudio.models.SquimObjective`.
"""
model = squim_objective_base()
path = torchaudio.utils.download_asset(f"models/{self._path}")
path = torchaudio.utils._download_asset(f"models/{self._path}")
state_dict = torch.load(path, weights_only=True)
model.load_state_dict(state_dict)
model.eval()
Expand Down Expand Up @@ -125,7 +125,7 @@ def get_model(self) -> SquimSubjective:
Variation of :py:class:`~torchaudio.models.SquimObjective`.
"""
model = squim_subjective_base()
path = torchaudio.utils.download_asset(f"models/{self._path}")
path = torchaudio.utils._download_asset(f"models/{self._path}")
state_dict = torch.load(path, weights_only=True)
model.load_state_dict(state_dict)
model.eval()
Expand Down
8 changes: 4 additions & 4 deletions src/torchaudio/pipelines/rnnt_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ class TokenProcessor(_TokenProcessor):

def _get_model(self) -> RNNT:
model = self._rnnt_factory_func()
path = torchaudio.utils.download_asset(self._rnnt_path)
path = torchaudio.utils._download_asset(self._rnnt_path)
state_dict = torch.load(path)
model.load_state_dict(state_dict)
model.eval()
Expand Down Expand Up @@ -313,7 +313,7 @@ def get_feature_extractor(self) -> FeatureExtractor:
Returns:
FeatureExtractor
"""
local_path = torchaudio.utils.download_asset(self._global_stats_path)
local_path = torchaudio.utils._download_asset(self._global_stats_path)
return _ModuleFeatureExtractor(
torch.nn.Sequential(
torchaudio.transforms.MelSpectrogram(
Expand All @@ -332,7 +332,7 @@ def get_streaming_feature_extractor(self) -> FeatureExtractor:
Returns:
FeatureExtractor
"""
local_path = torchaudio.utils.download_asset(self._global_stats_path)
local_path = torchaudio.utils._download_asset(self._global_stats_path)
return _ModuleFeatureExtractor(
torch.nn.Sequential(
torchaudio.transforms.MelSpectrogram(
Expand All @@ -350,7 +350,7 @@ def get_token_processor(self) -> TokenProcessor:
Returns:
TokenProcessor
"""
local_path = torchaudio.utils.download_asset(self._sp_model_path)
local_path = torchaudio.utils._download_asset(self._sp_model_path)
return _SentencePieceTokenProcessor(local_path)


Expand Down
3 changes: 1 addition & 2 deletions src/torchaudio/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from torio.utils import ffmpeg_utils

from . import sox_utils
from .download import download_asset
from .download import _download_asset


__all__ = [
"download_asset",
"sox_utils",
"ffmpeg_utils",
]
3 changes: 1 addition & 2 deletions src/torchaudio/utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ def _get_hash(path, hash, chunk_size=1028):

from torchaudio._internal.module_utils import dropping_support

@dropping_support
def download_asset(
def _download_asset(
key: str,
hash: str = "",
path: Union[str, PathLike] = "",
Expand Down
8 changes: 4 additions & 4 deletions test/integration_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,15 @@ def sample_speech(lang):
if lang not in _FILES:
raise NotImplementedError(f"Unexpected lang: {lang}")
filename = _FILES[lang]
path = torchaudio.utils.download_asset(f"test-assets/{filename}")
path = torchaudio.utils._download_asset(f"test-assets/{filename}")
return path


@pytest.fixture
def mixture_source(task):
if task not in _MIXTURE_FILES:
raise NotImplementedError(f"Unexpected task: {task}")
path = torchaudio.utils.download_asset(f"test-assets/{_MIXTURE_FILES[task]}")
path = torchaudio.utils._download_asset(f"test-assets/{_MIXTURE_FILES[task]}")
return path


Expand All @@ -84,7 +84,7 @@ def clean_sources(task):
raise NotImplementedError(f"Unexpected task: {task}")
paths = []
for file in _CLEAN_FILES[task]:
path = torchaudio.utils.download_asset(f"test-assets/{file}")
path = torchaudio.utils._download_asset(f"test-assets/{file}")
paths.append(path)
return paths

Expand Down Expand Up @@ -115,5 +115,5 @@ def temp_hub_dir(tmp_path, pytestconfig):

@pytest.fixture()
def emissions():
path = torchaudio.utils.download_asset("test-assets/emissions-8555-28447-0012.pt")
path = torchaudio.utils._download_asset("test-assets/emissions-8555-28447-0012.pt")
return torch.load(path)
2 changes: 1 addition & 1 deletion test/integration_tests/prototype/vggish_pipeline_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def test_vggish():
input_sr = VGGISH.sample_rate
input_proc = VGGISH.get_input_processor()
model = VGGISH.get_model()
path = torchaudio.utils.download_asset("test-assets/Chopin_Ballade_-1_In_G_Minor,_Op._23_excerpt.mp3")
path = torchaudio.utils._download_asset("test-assets/Chopin_Ballade_-1_In_G_Minor,_Op._23_excerpt.mp3")
waveform, sr = torchaudio.load(path, backend="ffmpeg")
waveform = waveform.mean(axis=0)
waveform = torchaudio.functional.resample(waveform, sr, input_sr)
Expand Down
Loading