Skip to content

Commit

Permalink
Fix windows encoding gbk (#2741)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jintao-Huang authored Dec 24, 2024
1 parent c1f10f4 commit f913bca
Show file tree
Hide file tree
Showing 16 changed files with 37 additions and 35 deletions.
8 changes: 4 additions & 4 deletions scripts/benchmark/exp_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def assert_gpu_not_overlap(self):

def run(self, exp: Experiment):
if os.path.exists(os.path.join(exp.input_args.save_dir, exp.name + '.json')):
with open(os.path.join(exp.input_args.save_dir, exp.name + '.json'), 'r') as f:
with open(os.path.join(exp.input_args.save_dir, exp.name + '.json'), 'r', encoding='utf-8') as f:
_json = json.load(f)
if exp.eval_dataset and 'eval_result' not in _json['record']:
if not exp.do_eval:
Expand Down Expand Up @@ -238,7 +238,7 @@ def _find_free_gpu(self, n):
def prepare_experiments(self, args: Any):
experiments = []
for config_file in args.config:
with open(config_file, 'r') as f:
with open(config_file, 'r', encoding='utf-8') as f:
group = os.path.basename(config_file)
group = group[:-5]
content = json.load(f)
Expand Down Expand Up @@ -275,7 +275,7 @@ def prepare_experiments(self, args: Any):
def _get_metric(exp: Experiment):
if exp.do_eval:
if os.path.isfile(os.path.join('exp', f'{exp.name}.eval.log')):
with open(os.path.join('exp', f'{exp.name}.eval.log'), 'r') as f:
with open(os.path.join('exp', f'{exp.name}.eval.log'), 'r', encoding='utf-8') as f:
for line in f.readlines():
if 'Final report:' in line:
return json.loads(line.split('Final report:')[1].replace('\'', '"'))
Expand All @@ -301,7 +301,7 @@ def _get_metric(exp: Experiment):
logging_dir = exp.runtime.get('logging_dir')
logging_file = os.path.join(logging_dir, '..', 'logging.jsonl')
if os.path.isfile(logging_file):
with open(logging_file, 'r') as f:
with open(logging_file, 'r', encoding='utf-8') as f:
for line in f.readlines():
if 'model_info' in line:
return json.loads(line)
Expand Down
2 changes: 1 addition & 1 deletion scripts/benchmark/generate_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def generate_export_report(outputs: List[ModelOutput]):


def parse_output(file):
with open(file, 'r') as f:
with open(file, 'r', encoding='utf-8') as f:
content = json.load(f)

name = content['name']
Expand Down
4 changes: 2 additions & 2 deletions scripts/utils/run_model_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,15 @@ def get_model_info_table():
result[i] += text[i]

for i, fpath in enumerate(fpaths):
with open(fpath, 'r') as f:
with open(fpath, 'r', encoding='utf-8') as f:
text = f.read()
llm_start_idx = text.find('| Model ID |')
mllm_start_idx = text[llm_start_idx + 1:].find('| Model ID |') + llm_start_idx + 1
llm_end_idx = text.find(end_words[i][0])
mllm_end_idx = text.find(end_words[i][1])
output = text[:llm_start_idx] + result[0] + '\n\n' + text[llm_end_idx:mllm_start_idx] + result[
1] + '\n\n' + text[mllm_end_idx:]
with open(fpath, 'w') as f:
with open(fpath, 'w', encoding='utf-8') as f:
f.write(output)


Expand Down
2 changes: 1 addition & 1 deletion swift/hub/hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def push_to_hub(cls,
if commit_description:
commit_message = commit_message + '\n' + commit_description
if not os.path.exists(os.path.join(folder_path, 'configuration.json')):
with open(os.path.join(folder_path, 'configuration.json'), 'w') as f:
with open(os.path.join(folder_path, 'configuration.json'), 'w', encoding='utf-8') as f:
f.write('{"framework": "pytorch", "task": "text-generation", "allow_remote": true}')
if ignore_patterns:
ignore_patterns = [p for p in ignore_patterns if p != '_*']
Expand Down
2 changes: 1 addition & 1 deletion swift/llm/argument/base_args/model_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def parse_to_dict(value: Union[str, Dict, None], strict: bool = True) -> Union[s
value = {}
elif isinstance(value, str):
if os.path.exists(value): # local path
with open(value, 'r') as f:
with open(value, 'r', encoding='utf-8') as f:
value = json.load(f)
else: # json str
try:
Expand Down
2 changes: 1 addition & 1 deletion swift/llm/dataset/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def register_dataset_info(dataset_info: Union[str, List[str], None] = None) -> L
if os.path.isfile(dataset_path):
log_msg = dataset_path
base_dir = os.path.dirname(dataset_path)
with open(dataset_path, 'r') as f:
with open(dataset_path, 'r', encoding='utf-8') as f:
dataset_info = json.load(f)
else:
dataset_info = json.loads(dataset_info) # json
Expand Down
2 changes: 1 addition & 1 deletion swift/llm/export/merge_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def merge_lora(args: ExportArguments, device_map=None, replace_if_exists=False)
'{base_model}', base_model)
try:
yamlfile = os.path.join(tempdir, 'mergekit.yaml')
with open(yamlfile, 'w') as f:
with open(yamlfile, 'w', encoding='utf-8') as f:
f.write(merge_yaml)
logger.info(f'Merging with config: {merge_yaml}')
os.system(f'mergekit-yaml {yamlfile} {mergekit_path}')
Expand Down
2 changes: 1 addition & 1 deletion swift/llm/export/ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def export_to_ollama(args: ExportArguments):
pt_engine = PtEngine.from_model_template(model, template)
logger.info(f'Using model_dir: {pt_engine.model_dir}')
template_meta = template.template_meta
with open(os.path.join(args.output_dir, 'Modelfile'), 'w') as f:
with open(os.path.join(args.output_dir, 'Modelfile'), 'w', encoding='utf-8') as f:
f.write(f'FROM {pt_engine.model_dir}\n')
f.write(f'TEMPLATE """{{{{ if .System }}}}'
f'{replace_and_concat(template, template_meta.system_prefix, "{{SYSTEM}}", "{{ .System }}")}'
Expand Down
2 changes: 1 addition & 1 deletion swift/plugin/loss_scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def __init__(self):
if self.loss_scale_config is not None:
path = os.path.dirname(os.path.abspath(__file__))
config_path = os.path.join(path, 'agent', self.loss_scale_config)
with open(config_path, 'r') as json_file:
with open(config_path, 'r', encoding='utf-8') as json_file:
self.loss_scale_map = json.load(json_file)
else:
self.loss_scale_map = None
Expand Down
21 changes: 11 additions & 10 deletions swift/tuners/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def from_pretrained(cls,
raise ValueError(f'Please pass in a local dir or a model id, not a local file: {model_dir}')
extra_state_keys = kwargs.pop('extra_state_keys', None)
if extra_state_keys is None and os.path.isfile(os.path.join(model_dir, cls.EXTRA_STATE_DIR, CONFIG_NAME)):
with open(os.path.join(model_dir, cls.EXTRA_STATE_DIR, CONFIG_NAME), 'r') as file:
with open(os.path.join(model_dir, cls.EXTRA_STATE_DIR, CONFIG_NAME), 'r', encoding='utf-8') as file:
_json = json.load(file)
extra_state_keys = _json.get('extra_state_keys')
if adapter_name is None:
Expand All @@ -340,7 +340,7 @@ def from_pretrained(cls,
logger.warning(f'{_name} is not a valid tuner')
continue

with open(config_file, 'r') as file:
with open(config_file, 'r', encoding='utf-8') as file:
json_object = json.load(file)

if SWIFT_TYPE_KEY not in json_object:
Expand Down Expand Up @@ -395,7 +395,7 @@ def create_or_update_model_card(self, output_dir: str):
if not os.path.exists(os.path.join(output_dir, 'README.md')):
lines = []
else:
with open(os.path.join(output_dir, 'README.md'), 'r') as f:
with open(os.path.join(output_dir, 'README.md'), 'r', encoding='utf-8') as f:
lines = f.readlines()

quantization_config = None
Expand Down Expand Up @@ -426,7 +426,7 @@ def create_or_update_model_card(self, output_dir: str):
lines.append(f'{base_model_heading}\n\n- BaseModel Class {self.base_model.__class__.__name__}\n')

# write the lines back to README.md
with open(os.path.join(output_dir, 'README.md'), 'w') as f:
with open(os.path.join(output_dir, 'README.md'), 'w', encoding='utf-8') as f:
f.writelines(lines)

def add_weighted_adapter(
Expand Down Expand Up @@ -587,13 +587,14 @@ def save_pretrained(self,
os.makedirs(os.path.join(save_directory, self.EXTRA_STATE_DIR), exist_ok=True)
self._save_state_dict(output_state_dict, os.path.join(save_directory, self.EXTRA_STATE_DIR),
safe_serialization)
with open(os.path.join(save_directory, self.EXTRA_STATE_DIR, CONFIG_NAME), 'w') as file:
with open(
os.path.join(save_directory, self.EXTRA_STATE_DIR, CONFIG_NAME), 'w', encoding='utf-8') as file:
json.dump({'extra_state_keys': self.extra_state_keys}, file)
else:
logger.error('Full parameter training, save_extra_states will be ignored')

if not os.path.exists(os.path.join(save_directory, 'configuration.json')):
with open(os.path.join(save_directory, 'configuration.json'), 'w') as f:
with open(os.path.join(save_directory, 'configuration.json'), 'w', encoding='utf-8') as f:
f.write('{}')

@staticmethod
Expand Down Expand Up @@ -776,7 +777,7 @@ def has_custom_content(_json):
return not LoRAConfig(**_json).can_be_saved_to_peft()

for adapter in adapter_names:
with open(os.path.join(ckpt_dir, adapter, CONFIG_NAME)) as f:
with open(os.path.join(ckpt_dir, adapter, CONFIG_NAME), encoding='utf-8') as f:
_json = json.load(f)
if has_custom_content(_json):
raise AssertionError('Cannot transfer to peft format, '
Expand All @@ -802,7 +803,7 @@ def has_custom_content(_json):
state_dict = new_state_dict
SwiftModel._save_state_dict(state_dict, os.path.join(output_dir, adapter), safe_serialization)
from swift import LoRAConfig
with open(os.path.join(output_dir, adapter, CONFIG_NAME)) as f:
with open(os.path.join(output_dir, adapter, CONFIG_NAME), encoding='utf-8') as f:
_json = json.load(f)
peft_config = LoRAConfig(**_json).to_peft_config()
peft_config.save_pretrained(os.path.join(output_dir, adapter))
Expand Down Expand Up @@ -836,7 +837,7 @@ def from_pretrained(model: Union[nn.Module, SwiftModel, PeftModel],
model_id = snapshot_download(model_id, revision=revision)
is_peft_model = False
if os.path.exists(os.path.join(model_id, CONFIG_NAME)):
with open(os.path.join(model_id, CONFIG_NAME), 'r') as f:
with open(os.path.join(model_id, CONFIG_NAME), 'r', encoding='utf-8') as f:
_json = json.load(f)
is_peft_model = SWIFT_TYPE_KEY not in _json

Expand All @@ -845,7 +846,7 @@ def from_pretrained(model: Union[nn.Module, SwiftModel, PeftModel],
if isinstance(adapter_name, list) else list(adapter_name.keys())[0]
_name = _name or ''
if os.path.exists(os.path.join(model_id, _name, CONFIG_NAME)):
with open(os.path.join(model_id, _name, CONFIG_NAME), 'r') as f:
with open(os.path.join(model_id, _name, CONFIG_NAME), 'r', encoding='utf-8') as f:
_json = json.load(f)
is_peft_model = SWIFT_TYPE_KEY not in _json and 'extra_state_keys' not in _json
if is_peft_model:
Expand Down
5 changes: 3 additions & 2 deletions swift/tuners/peft.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def save_pretrained(self, save_directory: str, **kwargs) -> None:
'lorap_lr_ratio': self.lorap_lr_ratio,
'lorap_emb_lr': self.lorap_emb_lr,
}
with open(os.path.join(save_directory, 'additional_config.json'), 'w') as f:
with open(os.path.join(save_directory, 'additional_config.json'), 'w', encoding='utf-8') as f:
json.dump(additional_args, f)

@classmethod
Expand All @@ -75,7 +75,8 @@ def from_pretrained(cls, pretrained_model_name_or_path: str, subfolder: Optional
self = LoraConfig(**self.to_dict())

if os.path.isfile(os.path.join(pretrained_model_name_or_path, 'additional_config.json')):
with open(os.path.join(pretrained_model_name_or_path, 'additional_config.json'), 'r') as f:
with open(
os.path.join(pretrained_model_name_or_path, 'additional_config.json'), 'r', encoding='utf-8') as f:
_json = json.load(f)
for key, value in _json.items():
setattr(self, key, value)
Expand Down
4 changes: 2 additions & 2 deletions swift/tuners/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def save_pretrained(self, save_directory, **kwargs):
output_path = os.path.join(save_directory, CONFIG_NAME)

# save it
with open(output_path, 'w') as writer:
with open(output_path, 'w', encoding='utf-8') as writer:
writer.write(json.dumps(output_dict, indent=2, sort_keys=True))

@classmethod
Expand Down Expand Up @@ -103,7 +103,7 @@ def from_json_file(cls, path_json_file, **kwargs):
path_json_file (`str`):
The path to the json file.
"""
with open(path_json_file, 'r') as file:
with open(path_json_file, 'r', encoding='utf-8') as file:
json_object = json.load(file)

return json_object
Expand Down
4 changes: 2 additions & 2 deletions swift/ui/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def save_cache(cls, key, value):
timestamp = str(int(time.time()))
key = key.replace('/', '-')
filename = os.path.join(cls.cache_dir, key + '-' + timestamp)
with open(filename, 'w') as f:
with open(filename, 'w', encoding='utf-8') as f:
json.dump(value, f)

@classmethod
Expand All @@ -161,7 +161,7 @@ def load_cache(cls, key, timestamp) -> BaseArguments:
timestamp = int(dt_object.timestamp())
key = key.replace('/', '-')
filename = key + '-' + str(timestamp)
with open(os.path.join(cls.cache_dir, filename), 'r') as f:
with open(os.path.join(cls.cache_dir, filename), 'r', encoding='utf-8') as f:
return json.load(f)

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion swift/ui/llm_infer/llm_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def deploy(cls, *args):
model = kwargs.get('model')
if os.path.exists(model) and os.path.exists(os.path.join(model, 'args.json')):
kwargs['ckpt_dir'] = kwargs.pop('model')
with open(os.path.join(kwargs['ckpt_dir'], 'args.json'), 'r') as f:
with open(os.path.join(kwargs['ckpt_dir'], 'args.json'), 'r', encoding='utf-8') as f:
_json = json.load(f)
kwargs['model_type'] = _json['model_type']
kwargs['train_type'] = _json['train_type']
Expand Down
4 changes: 2 additions & 2 deletions swift/ui/llm_infer/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def wait(cls, task):
latest_data = ''
lines = collections.deque(maxlen=int(os.environ.get('MAX_LOG_LINES', 50)))
try:
with open(log_file, 'r') as input:
with open(log_file, 'r', encoding='utf-8') as input:
input.seek(offset)
fail_cnt = 0
while True:
Expand Down Expand Up @@ -268,7 +268,7 @@ def task_changed(cls, task, base_tab):
ret.append(gr.update())
train_type = None
if is_custom_path:
with open(os.path.join(all_args['ckpt_dir'], 'args.json'), 'r') as f:
with open(os.path.join(all_args['ckpt_dir'], 'args.json'), 'r', encoding='utf-8') as f:
_json = json.load(f)
train_type = _json.get('train_type')
return ret + [gr.update(value=None), [all_args.get('model_type'), all_args.get('template_type'), train_type]]
6 changes: 3 additions & 3 deletions swift/ui/llm_train/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def wait(cls, logging_dir, task):
latest_data = ''
lines = collections.deque(maxlen=int(os.environ.get('MAX_LOG_LINES', 50)))
try:
with open(log_file, 'r') as input:
with open(log_file, 'r', encoding='utf-8') as input:
input.seek(offset)
fail_cnt = 0
while True:
Expand Down Expand Up @@ -451,8 +451,8 @@ def parse_info_from_cmdline(task):
all_args[splits[0]] = splits[1]

output_dir = all_args['output_dir']
if os.path.exists(os.path.join(output_dir, 'sft_args.json')):
with open(os.path.join(output_dir, 'sft_args.json'), 'r') as f:
if os.path.exists(os.path.join(output_dir, 'args.json')):
with open(os.path.join(output_dir, 'args.json'), 'r', encoding='utf-8') as f:
_json = json.load(f)
for key in all_args.keys():
all_args[key] = _json.get(key)
Expand Down

0 comments on commit f913bca

Please sign in to comment.