Skip to content

Commit

Permalink
Propagate channel selector for AED model + add channel selector to ge…
Browse files Browse the repository at this point in the history
…t_lhotse_dataloader_from config

Signed-off-by: Ante Jukić <ajukic@nvidia.com>
  • Loading branch information
anteju committed Apr 23, 2024
1 parent a3825d5 commit 5a7bc84
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
1 change: 1 addition & 0 deletions nemo/collections/asr/models/aed_multitask_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,7 @@ def _setup_transcribe_dataloader(self, config: Dict) -> 'torch.utils.data.DataLo
'drop_last': False,
'text_field': config.get('text_field', 'answer'),
'lang_field': config.get('lang_field', 'target_lang'),
'channel_selector': config.get('channel_selector', None),
}

temporary_datalayer = self._setup_dataloader_from_config(config=DictConfig(dl_config), inference=True)
Expand Down
23 changes: 23 additions & 0 deletions nemo/collections/common/data/lhotse/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class LhotseDataLoadingConfig:
seed: int | str = "randomized" # int | "randomized" | "trng"; the latter two are lazily resolved by Lhotse in dloading worker processes
num_workers: int = 0
pin_memory: bool = False
channel_selector: int | str | None = None

# 4. Optional Lhotse data augmentation.
# a. On-the-fly noise/audio mixing.
Expand Down Expand Up @@ -157,6 +158,11 @@ def get_lhotse_dataloader_from_config(
# 1. Load a manifest as a Lhotse CutSet.
cuts, is_tarred = read_cutset_from_config(config)

# Apply channel selector
if config.channel_selector is not None:
logging.info('Using channel selector %s.', config.channel_selector)
cuts = cuts.map(partial(_select_channel, channel_selector=config.channel_selector), apply_fn=None)

# Resample as a safeguard; it's a no-op when SR is already OK
cuts = cuts.resample(config.sample_rate)

Expand Down Expand Up @@ -438,3 +444,20 @@ def _flatten_alt_text(cut) -> list:
text_instance.custom = {"text": data.pop("text"), "lang": data.pop("lang"), **data}
ans.append(text_instance)
return ans


def _select_channel(cut, channel_selector: int | str) -> list:
if isinstance(channel_selector, int):
channel_idx = channel_selector
elif isinstance(channel_selector, str):
if channel_selector in cut.custom:
channel_idx = cut.custom[channel_selector]
else:
raise ValueError(f"Channel selector {channel_selector} not found in cut.custom")

if channel_idx >= cut.num_channels:
raise ValueError(
f"Channel index {channel_idx} is larger than the actual number of channels {cut.num_channels}"
)

return cut.with_channels(channel_idx)

0 comments on commit 5a7bc84

Please sign in to comment.