Skip to content

Commit

Permalink
Quick patch to check that Dataset Keys contain non-None Values (#1228)
Browse files Browse the repository at this point in the history
* quick patch

* also seperately check for local path

* typo

* typo

---------

Co-authored-by: Daniel King <43149077+dakinggg@users.noreply.github.com>
  • Loading branch information
KuuCi and dakinggg authored May 22, 2024
1 parent c891bed commit 9cc945c
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions llmfoundry/utils/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ def _process_data_source(
('uc_volume', source_dataset_path[len('dbfs:'):], true_split),
)
# Check for HF path
elif 'hf_name' in dataset:
elif 'hf_name' in dataset and dataset['hf_name']:
hf_path = dataset['hf_name']
backend, _, _ = parse_uri(hf_path)
if backend:
Expand All @@ -600,7 +600,7 @@ def _process_data_source(
else:
data_paths.append(('hf', hf_path, true_split))
# Check for remote path
elif 'remote' in dataset:
elif 'remote' in dataset and dataset['remote']:
remote_path = dataset['remote']
backend, _, _ = parse_uri(remote_path)
if backend:
Expand All @@ -610,7 +610,11 @@ def _process_data_source(
) if cfg_split else remote_path
data_paths.append((backend, remote_path, true_split))
else:
# No backend detected so assume local path
data_paths.append(('local', remote_path, true_split))
# Check for local path
elif 'local' in dataset and dataset['local']:
data_paths.append(('local', dataset['local'], true_split))
else:
log.warning('DataSource Not Found.')

Expand Down

0 comments on commit 9cc945c

Please sign in to comment.