Skip to content

Commit

Permalink
catch double slash
Browse files Browse the repository at this point in the history
  • Loading branch information
Vincent Chen committed Dec 13, 2024
1 parent 5ce2048 commit 58092c0
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion llmfoundry/utils/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import mlflow
from composer.loggers import Logger
from composer.utils import dist, parse_uri
from exceptions import UCNotFoundError
from mlflow.data import (
delta_dataset_source,
http_dataset_source,
Expand All @@ -37,6 +36,7 @@
from llmfoundry.layers_registry import ffns_with_megablocks
from llmfoundry.models.utils import init_empty_weights
from llmfoundry.registry import config_transforms
from llmfoundry.utils.exceptions import UCNotFoundError

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -706,6 +706,8 @@ def _process_data_source(
true_split (str): The split of the dataset to be added (i.e. train or eval)
data_paths (List[Tuple[str, str, str]]): A list of tuples formatted as (data type, path, split)
"""
if source_dataset_path:
source_dataset_path = re.sub(r'/+', '/', source_dataset_path)
# Check for Delta table
if source_dataset_path and len(source_dataset_path.split('.')) == 3:
data_paths.append(('delta_table', source_dataset_path, true_split))
Expand Down

0 comments on commit 58092c0

Please sign in to comment.