Skip to content

Commit

Permalink
there is a bug with large zip files and path joining so we need to in…
Browse files Browse the repository at this point in the history
…vestigate it
  • Loading branch information
mitya52 committed Jun 20, 2024
1 parent e693243 commit 4629d91
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions self_hosting_machinery/finetune/scripts/finetune_train.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import sys
import click
import copy
import json
Expand All @@ -18,7 +17,8 @@
import torch.distributed as dist

from refact_utils.scripts import env
from refact_utils.scripts.env import safe_paths_join
# TODO: there is a bug with large zip files and path joining so we need to investigate it
# from refact_utils.scripts.env import safe_paths_join
from refact_utils.finetune.utils import finetune_train_defaults
from self_hosting_machinery.finetune.configuration.finetune_config import base_config, ConfigBuilder
from self_hosting_machinery.finetune.scripts.auxiliary.dataset import (
Expand Down Expand Up @@ -170,8 +170,8 @@ def gpu_filter_and_build_config(
def _copy_source_files(jsonl_src, jsonl_dst, pname, run_id):
for d in jsonlines.open(jsonl_src):
try:
src_path = safe_paths_join(env.PP_DIR_UNPACKED(pname), d["path"])
dst_path = safe_paths_join(env.PERRUN_DIR_UNPACKED(run_id), d["path"])
src_path = os.path.join(env.PP_DIR_UNPACKED(pname), d["path"])
dst_path = os.path.join(env.PERRUN_DIR_UNPACKED(run_id), d["path"])
except ValueError as e:
raise ValueError(f'copy source files error: {e}')
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
Expand Down

0 comments on commit 4629d91

Please sign in to comment.