From d453c764ab1552741fc8d3503b7b18abee49b60f Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Thu, 12 Dec 2024 17:09:40 -0800 Subject: [PATCH] use pathlib instead --- llmfoundry/utils/config_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llmfoundry/utils/config_utils.py b/llmfoundry/utils/config_utils.py index 6f3c4d2212..252841cb50 100644 --- a/llmfoundry/utils/config_utils.py +++ b/llmfoundry/utils/config_utils.py @@ -6,9 +6,9 @@ import logging import math import os -import re import warnings from dataclasses import dataclass, fields +from pathlib import Path from typing import ( Any, Callable, @@ -705,7 +705,7 @@ def _process_data_source( data_paths (List[Tuple[str, str, str]]): A list of tuples formatted as (data type, path, split) """ if source_dataset_path: - source_dataset_path = re.sub(r'/+', '/', source_dataset_path) + source_dataset_path = str(Path(source_dataset_path)) # Check for Delta table if source_dataset_path and len(source_dataset_path.split('.')) == 3: data_paths.append(('delta_table', source_dataset_path, true_split))