From 9cc945c62a56395da2d23d69f64271394ebb001d Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Wed, 22 May 2024 16:34:36 -0700 Subject: [PATCH] Quick patch to check that Dataset Keys contain non-None Values (#1228) * quick patch * also seperately check for local path * typo * typo --------- Co-authored-by: Daniel King <43149077+dakinggg@users.noreply.github.com> --- llmfoundry/utils/config_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llmfoundry/utils/config_utils.py b/llmfoundry/utils/config_utils.py index 6010b19b6f..72ca19834b 100644 --- a/llmfoundry/utils/config_utils.py +++ b/llmfoundry/utils/config_utils.py @@ -589,7 +589,7 @@ def _process_data_source( ('uc_volume', source_dataset_path[len('dbfs:'):], true_split), ) # Check for HF path - elif 'hf_name' in dataset: + elif 'hf_name' in dataset and dataset['hf_name']: hf_path = dataset['hf_name'] backend, _, _ = parse_uri(hf_path) if backend: @@ -600,7 +600,7 @@ def _process_data_source( else: data_paths.append(('hf', hf_path, true_split)) # Check for remote path - elif 'remote' in dataset: + elif 'remote' in dataset and dataset['remote']: remote_path = dataset['remote'] backend, _, _ = parse_uri(remote_path) if backend: @@ -610,7 +610,11 @@ def _process_data_source( ) if cfg_split else remote_path data_paths.append((backend, remote_path, true_split)) else: + # No backend detected so assume local path data_paths.append(('local', remote_path, true_split)) + # Check for local path + elif 'local' in dataset and dataset['local']: + data_paths.append(('local', dataset['local'], true_split)) else: log.warning('DataSource Not Found.')