From 43d162298633811cdfb97544c0c6167d17fce37a Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Tue, 2 Apr 2024 15:06:57 +0800 Subject: [PATCH 01/30] add llmsuite Signed-off-by: Yu Wu --- python/fate_test/_parser.py | 67 ++++++++ python/fate_test/scripts/_utils.py | 12 +- python/fate_test/scripts/cli.py | 2 + python/fate_test/scripts/llmsuite_cli.py | 206 +++++++++++++++++++++++ 4 files changed, 286 insertions(+), 1 deletion(-) create mode 100644 python/fate_test/scripts/llmsuite_cli.py diff --git a/python/fate_test/_parser.py b/python/fate_test/_parser.py index 55e2aa3..8c18958 100644 --- a/python/fate_test/_parser.py +++ b/python/fate_test/_parser.py @@ -344,7 +344,74 @@ def load(path: Path): suite = PerformanceSuite(dataset, pipeline_jobs, path) return suite +""" +class LlmJob(object): + def __init__(self, job_name: str, script_path: Path, conf_path: Path, + loader: str, loader_conf_path: Path, tasks: typing.List[str], include_path: Path): + self.job_name = job_name + self.script_path = script_path + self.conf_path = conf_path + self.loader = loader + self.loader_conf_path = loader_conf_path + self.tasks = tasks + self.include_path = include_path + + +class LlmPair(object): + def __init__( + self, pair_name: str, jobs: typing.List[LlmJob] + ): + self.pair_name = pair_name + self.jobs = jobs + + +class LlmSuite(object): + def __init__( + self, pairs: typing.List[LlmPair], path: Path + ): + self.pairs = pairs + self.path = path + + @staticmethod + def load(path: Path): + with path.open("r") as f: + testsuite_config = yaml.safe_load(f) + + pairs = [] + for pair_name, pair_configs in testsuite_config.items(): + jobs = [] + for job_name, job_configs in pair_configs.items(): + script_path = path.parent.joinpath(job_configs["script"]).resolve() + if job_configs.get("conf"): + conf_path = path.parent.joinpath(job_configs["conf"]).resolve() + else: + conf_path = "" + loader = job_configs.get("loader", "") + if job_configs.get("loader_conf"): + loader_conf_path = path.parent.joinpath(job_configs["loader_conf"]).resolve() + else: + loader_conf_path = "" + tasks = job_configs.get("tasks", []) + include_path = job_configs.get("include_path", "") + if include_path and not os.path.isabs(include_path): + include_path = path.parent.joinpath(job_configs["include_path"]).resolve() + + jobs.append( + LlmJob( + job_name=job_name, script_path=script_path, conf_path=conf_path, + loader=loader, loader_conf_path=loader_conf_path, tasks=tasks, include_path=include_path + ) + ) + + pairs.append( + LlmPair( + pair_name=pair_name, jobs=jobs + ) + ) + suite = LlmSuite(pairs=pairs, path=path) + return suite +""" def non_success_summary(): status = {} for job in _config.non_success_jobs: diff --git a/python/fate_test/scripts/_utils.py b/python/fate_test/scripts/_utils.py index b3f60e3..576d83c 100644 --- a/python/fate_test/scripts/_utils.py +++ b/python/fate_test/scripts/_utils.py @@ -6,6 +6,7 @@ from pathlib import Path import click +from fate_llm.utils import LlmSuite from fate_test._client import Clients from fate_test._config import Config @@ -19,7 +20,7 @@ def _big_data_task(includes, guest_data_size, host_data_size, guest_feature_num, from fate_test.scripts import generate_mock_data def _find_testsuite_files(path): - suffix = ["testsuite.yaml", "benchmark.yaml", "performance.yaml"] + suffix = ["testsuite.yaml", "benchmark.yaml", "performance.yaml", "llmsuite.yaml"] if isinstance(path, str): path = Path(path) if path.is_file(): @@ -85,6 +86,8 @@ def _find_testsuite_files(path): suite = BenchmarkSuite.load(suite_path.resolve()) elif suite_type == "performance": suite = PerformanceSuite.load(suite_path.resolve()) + elif suite_type == "llmsuite": + suite = LlmSuite.load(suite_path.resolve()) else: raise ValueError(f"Unsupported suite type: {suite_type}. Only accept type 'testsuite' or 'benchmark'.") except Exception as e: @@ -207,3 +210,10 @@ def _update_data_config(suite, partitions=None): for data in suite.dataset: data.config['partitions'] = partitions data.partitions = partitions + + +def _obtain_model_output_path(config, job_id, task_name, client, role, party_id): + + output_path = os.path.join(config.data_base_dir, "fate_flow", + "model", job_id, role, party_id, task_name, "0", "output", "output_model") + return output_path diff --git a/python/fate_test/scripts/cli.py b/python/fate_test/scripts/cli.py index f59bd6c..48bcfaf 100644 --- a/python/fate_test/scripts/cli.py +++ b/python/fate_test/scripts/cli.py @@ -20,6 +20,7 @@ from fate_test.scripts.benchmark_cli import run_benchmark from fate_test.scripts.config_cli import config_group from fate_test.scripts.data_cli import data_group +from fate_test.scripts.llmsuite_cli import run_llmsuite # from fate_test.scripts.flow_test_cli import flow_group from fate_test.scripts.performance_cli import run_task # from fate_test.scripts.quick_test_cli import unittest_group @@ -32,6 +33,7 @@ "performance": run_task, "benchmark-quality": run_benchmark, "data": data_group, + "llmsuite": run_llmsuite # "unittest": unittest_group } diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py new file mode 100644 index 0000000..2c94fe2 --- /dev/null +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -0,0 +1,206 @@ +# +# Copyright 2019 The FATE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import time +import uuid +from datetime import timedelta +from inspect import signature + +import click +import yaml +from fate_llm.scripts.eval_cli import run_job_eval + +from fate_test._client import Clients +from fate_test._config import Config +from fate_test._io import LOGGER, echo +from fate_test.scripts._options import SharedOptions +from fate_test.scripts._utils import _load_testsuites, _load_module_from_script +from fate_test.utils import extract_job_status + +""" +@click.option('-uj', '--update-job-parameters', default="{}", type=str, + help="a json string that represents mapping for replacing fields in job conf, example format: "'{job_name: param_name1: param_val1, param_name2=param_val2}'") +""" +@click.command("llmsuite") +@click.option('-i', '--include', required=True, type=click.Path(exists=True), multiple=True, + metavar="", + help="include *llmsuite.yaml under these paths") +@click.option('-e', '--exclude', type=click.Path(exists=True), multiple=True, + help="exclude *llmsuite.yaml under these paths") +@click.option('-a', '--algorithm-suite', type=str, multiple=True, + help="run built-in algorithm suite, if given, ignore include/exclude") +@click.option('-p', '--task-cores', type=int, help="processors per node") +@click.option('-m', '--timeout', type=int, + help="maximum running time of job") +@click.option("-g", '--glob', type=str, + help="glob string to filter sub-directory of path specified by ") +@click.option("--provider", type=str, + help="Select the fate version, for example: fate@2.0-beta") +@click.option('-c', '--eval-config', optional=True, type=click.Path(exists=True), help='Path to FATE Llm evaluation config. ' + 'If none, use default config.') +@click.option('--skip-evaluate', is_flag=True, default=False, + help="skip evaluation after training model") +@SharedOptions.get_shared_options(hidden=True) +@click.pass_context +def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_cores, timeout, eval_config, skip_evaluate, **kwargs): + """ + process llmsuite + """ + ctx.obj.update(**kwargs) + ctx.obj.post_process() + config_inst = ctx.obj["config"] + if ctx.obj["engine_run"][0] is not None: + config_inst.update_conf(engine_run=dict(ctx.obj["engine_run"])) + if task_cores is not None: + config_inst.update_conf(task_cores=task_cores) + if timeout is not None: + config_inst.update_conf(timeout=timeout) + + + namespace = ctx.obj["namespace"] + yes = ctx.obj["yes"] + data_namespace_mangling = ctx.obj["namespace_mangling"] + # prepare output dir and json hooks + # _add_replace_hook(replace) + echo.welcome() + echo.echo(f"llmsuite namespace: {namespace}", fg='red') + echo.echo("loading llmsuites:") + if algorithm_suite: + #@todo: find built-in llmsuite path + algorithm_suite_path = [None] + suites = _load_testsuites(includes=algorithm_suite_path, excludes=None, glob=None, provider=provider, + suffix="llmsuite.yaml", suite_type="llmsuite") + else: + suites = _load_testsuites(includes=include, excludes=exclude, glob=glob, provider=provider, + suffix="llmsuite.yaml", suite_type="llmsuite") + for suite in suites: + echo.echo(f"\tllm groups({len(suite.pairs)}) {suite.path}") + if not yes and not click.confirm("running?"): + return + + echo.stdout_newline() + # with Clients(config_inst) as client: + client = Clients(config_inst) + + for i, suite in enumerate(suites): + # noinspection PyBroadException + try: + start = time.time() + echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red') + os.environ['enable_pipeline_job_info_callback'] = '1' + try: + if eval_config: + config = {} + if eval_config is not None: + with eval_config.open("r") as f: + config.update(yaml.safe_load(f)) + eval_conf = config + else: + from fate_llm.utils.config import default_eval_config + eval_conf = default_eval_config() + _run_llmsuite_pairs(config_inst, suite, namespace, data_namespace_mangling, client, + skip_evaluate, eval_conf) + except Exception as e: + raise RuntimeError(f"exception occur while running benchmark jobs for {suite.path}") from e + + echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red') + except Exception: + exception_id = uuid.uuid1() + echo.echo(f"exception in {suite.path}, exception_id={exception_id}") + LOGGER.exception(f"exception id: {exception_id}") + finally: + echo.stdout_newline() + # non_success_summary() + echo.farewell() + echo.echo(f"llmsuite namespace: {namespace}", fg='red') + + +@LOGGER.catch +def _run_llmsuite_pairs(config: Config, suite, namespace: str, + data_namespace_mangling: bool, clients: Clients, skip_evaluate: bool, eval_conf: dict): + client = clients['guest_0'] + guest_party_id = config.parties.role_to_party("guest")[0] + # pipeline demo goes here + pair_n = len(suite.pairs) + # fate_base = config.fate_base + # PYTHONPATH = os.environ.get('PYTHONPATH') + ":" + os.path.join(fate_base, "python") + # os.environ['PYTHONPATH'] = PYTHONPATH + suite_results = dict() + for i, pair in enumerate(suite.pairs): + echo.echo(f"Running [{i + 1}/{pair_n}] group: {pair.pair_name}") + job_n = len(pair.jobs) + # time_dict = dict() + job_results = dict() + for j, job in enumerate(pair.jobs): + echo.echo(f"Running [{j + 1}/{job_n}] job: {job.job_name}") + + def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None): + exception_id = str(uuid.uuid1()) + # suite.update_status(job_name=job_name, job_id=job_id, exception_id=exception_id, status=status, + # event=event, time_elapsed=time_elapsed) + echo.file(f"exception({exception_id}), error message:\n{err_msg}") + + job_name, script_path, conf_path = job.job_name, job.script_path, job.conf_path + param = Config.load_from_file(conf_path) + mod = _load_module_from_script(script_path) + input_params = signature(mod.main).parameters + + try: + # @todo: add update status api to suite + _run_mod(mod, input_params, config, param, namespace, data_namespace_mangling) + job_info = os.environ.get("pipeline_job_info") + job_id, status, time_elapsed, event = extract_job_status(job_info, client, guest_party_id) + """suite.update_status(job_name=job_name, job_id=job_id, status=status, + time_elapsed=time_elapsed, + event=event)""" + if not skip_evaluate: + # @todo: load model with flow api & record evaluate result + job.pretrained_model_path, job.heft_path = None, None + result = run_job_eval(job, eval_conf) + job_results[job_name] = result + os.environ.pop("pipeline_job_info") + + except Exception as e: + job_info = os.environ.get("pipeline_job_info") + if job_info is None: + job_id, status, time_elapsed, event = None, 'failed', None, None + else: + job_id, status, time_elapsed, event = extract_job_status(job_info, client, + guest_party_id) + _raise(e, job_id=job_id, status=status, event=event, time_elapsed=time_elapsed) + os.environ.pop("pipeline_job_info") + continue + suite_results[pair.pair_name] = job_results + for job_name, result in job_results.items(): + echo.echo(f"Job: {job_name}") + echo.echo(result) + # todo: record time elapse + + +def _run_mod(mod, input_params, config, param, namespace, data_namespace_mangling): + if len(input_params) == 1: + mod.main(param=param) + elif len(input_params) == 2: + mod.main(config=config, param=param) + # pipeline script + elif len(input_params) == 3: + if data_namespace_mangling: + mod.main(config=config, param=param, namespace=f"_{namespace}") + else: + mod.main(config=config, param=param) + else: + mod.main() From 72120b4e9e222365e1a6f86648bb714820c3bdb6 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Wed, 3 Apr 2024 16:53:07 +0800 Subject: [PATCH 02/30] add non success job summary for llmsuite Signed-off-by: Yu Wu --- python/fate_test/_io.py | 4 + python/fate_test/_parser.py | 79 ++----------- python/fate_test/scripts/_utils.py | 9 +- python/fate_test/scripts/llmsuite_cli.py | 136 ++++++++++++++--------- 4 files changed, 105 insertions(+), 123 deletions(-) diff --git a/python/fate_test/_io.py b/python/fate_test/_io.py index edfaeee..47ee682 100644 --- a/python/fate_test/_io.py +++ b/python/fate_test/_io.py @@ -32,6 +32,10 @@ def echo(cls, message, **kwargs): click.secho(message, **kwargs) click.secho(message, file=cls._file, **kwargs) + @classmethod + def sep_line(cls): + click.secho("-------------------------------------------------") + @classmethod def file(cls, message, **kwargs): click.secho(message, file=cls._file, **kwargs) diff --git a/python/fate_test/_parser.py b/python/fate_test/_parser.py index 8c18958..da4918c 100644 --- a/python/fate_test/_parser.py +++ b/python/fate_test/_parser.py @@ -344,74 +344,7 @@ def load(path: Path): suite = PerformanceSuite(dataset, pipeline_jobs, path) return suite -""" -class LlmJob(object): - def __init__(self, job_name: str, script_path: Path, conf_path: Path, - loader: str, loader_conf_path: Path, tasks: typing.List[str], include_path: Path): - self.job_name = job_name - self.script_path = script_path - self.conf_path = conf_path - self.loader = loader - self.loader_conf_path = loader_conf_path - self.tasks = tasks - self.include_path = include_path - - -class LlmPair(object): - def __init__( - self, pair_name: str, jobs: typing.List[LlmJob] - ): - self.pair_name = pair_name - self.jobs = jobs - - -class LlmSuite(object): - def __init__( - self, pairs: typing.List[LlmPair], path: Path - ): - self.pairs = pairs - self.path = path - - @staticmethod - def load(path: Path): - with path.open("r") as f: - testsuite_config = yaml.safe_load(f) - - pairs = [] - for pair_name, pair_configs in testsuite_config.items(): - jobs = [] - for job_name, job_configs in pair_configs.items(): - script_path = path.parent.joinpath(job_configs["script"]).resolve() - if job_configs.get("conf"): - conf_path = path.parent.joinpath(job_configs["conf"]).resolve() - else: - conf_path = "" - loader = job_configs.get("loader", "") - if job_configs.get("loader_conf"): - loader_conf_path = path.parent.joinpath(job_configs["loader_conf"]).resolve() - else: - loader_conf_path = "" - tasks = job_configs.get("tasks", []) - include_path = job_configs.get("include_path", "") - if include_path and not os.path.isabs(include_path): - include_path = path.parent.joinpath(job_configs["include_path"]).resolve() - jobs.append( - LlmJob( - job_name=job_name, script_path=script_path, conf_path=conf_path, - loader=loader, loader_conf_path=loader_conf_path, tasks=tasks, include_path=include_path - ) - ) - - pairs.append( - LlmPair( - pair_name=pair_name, jobs=jobs - ) - ) - suite = LlmSuite(pairs=pairs, path=path) - return suite - -""" def non_success_summary(): status = {} for job in _config.non_success_jobs: @@ -476,3 +409,15 @@ def _hook(d): return d return _hook + + +def record_non_success_jobs(suite, suite_file=None): + for status in suite.get_final_status().values(): + if isinstance(status.status, str) and status.status != "success": + status.suite_file = suite_file + _config.non_success_jobs.append(status) + if isinstance(status.status, list): + for job_status in status.status: + if job_status.status != "success": + status.suite_file = suite_file + _config.non_success_jobs.append(status) diff --git a/python/fate_test/scripts/_utils.py b/python/fate_test/scripts/_utils.py index 576d83c..0b6117a 100644 --- a/python/fate_test/scripts/_utils.py +++ b/python/fate_test/scripts/_utils.py @@ -12,7 +12,8 @@ from fate_test._config import Config from fate_test._flow_client import DataProgress, UploadDataResponse, QueryJobResponse from fate_test._io import echo, LOGGER, set_logger -from fate_test._parser import Testsuite, BenchmarkSuite, PerformanceSuite, DATA_LOAD_HOOK, CONF_LOAD_HOOK, DSL_LOAD_HOOK +from fate_test._parser import (Testsuite, BenchmarkSuite, PerformanceSuite, FinalStatus, + DATA_LOAD_HOOK, CONF_LOAD_HOOK, DSL_LOAD_HOOK) def _big_data_task(includes, guest_data_size, host_data_size, guest_feature_num, host_feature_num, host_data_type, @@ -88,6 +89,12 @@ def _find_testsuite_files(path): suite = PerformanceSuite.load(suite_path.resolve()) elif suite_type == "llmsuite": suite = LlmSuite.load(suite_path.resolve()) + suite_status = {} + for pair in suite.pairs: + for job in pair.jobs: + if not job.evaluate_only: + suite_status[f"{pair.pair_name}-{job.job_name}"] = FinalStatus(f"{pair.pair_name}-{job.job_name}") + suite._final_status = suite_status else: raise ValueError(f"Unsupported suite type: {suite_type}. Only accept type 'testsuite' or 'benchmark'.") except Exception as e: diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index 2c94fe2..67ffba5 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -23,10 +23,12 @@ import click import yaml from fate_llm.scripts.eval_cli import run_job_eval +from fate_llm.utils.llm_evaluator import aggregate_table from fate_test._client import Clients from fate_test._config import Config from fate_test._io import LOGGER, echo +from fate_test._parser import record_non_success_jobs, non_success_summary from fate_test.scripts._options import SharedOptions from fate_test.scripts._utils import _load_testsuites, _load_module_from_script from fate_test.utils import extract_job_status @@ -50,8 +52,8 @@ help="glob string to filter sub-directory of path specified by ") @click.option("--provider", type=str, help="Select the fate version, for example: fate@2.0-beta") -@click.option('-c', '--eval-config', optional=True, type=click.Path(exists=True), help='Path to FATE Llm evaluation config. ' - 'If none, use default config.') +@click.option('--eval-config', type=click.Path(exists=True), + help='Path to FATE Llm evaluation config. If none, use default config.') @click.option('--skip-evaluate', is_flag=True, default=False, help="skip evaluation after training model") @SharedOptions.get_shared_options(hidden=True) @@ -80,7 +82,7 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co echo.echo(f"llmsuite namespace: {namespace}", fg='red') echo.echo("loading llmsuites:") if algorithm_suite: - #@todo: find built-in llmsuite path + # @todo: find built-in llmsuite path algorithm_suite_path = [None] suites = _load_testsuites(includes=algorithm_suite_path, excludes=None, glob=None, provider=provider, suffix="llmsuite.yaml", suite_type="llmsuite") @@ -103,19 +105,17 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red') os.environ['enable_pipeline_job_info_callback'] = '1' try: - if eval_config: - config = {} - if eval_config is not None: - with eval_config.open("r") as f: - config.update(yaml.safe_load(f)) - eval_conf = config - else: + if not eval_config: from fate_llm.utils.config import default_eval_config - eval_conf = default_eval_config() + eval_config = default_eval_config() + + eval_config_dict = {} + with eval_config.open("r") as f: + eval_config_dict.update(yaml.safe_load(f)) _run_llmsuite_pairs(config_inst, suite, namespace, data_namespace_mangling, client, - skip_evaluate, eval_conf) + skip_evaluate, eval_config_dict) except Exception as e: - raise RuntimeError(f"exception occur while running benchmark jobs for {suite.path}") from e + raise RuntimeError(f"exception occur while running llmsuite jobs for {suite.path}") from e echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red') except Exception: @@ -124,14 +124,17 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co LOGGER.exception(f"exception id: {exception_id}") finally: echo.stdout_newline() - # non_success_summary() + suite_file = str(suite.path).split("/")[-1] + record_non_success_jobs(suite, suite_file) + non_success_summary() echo.farewell() echo.echo(f"llmsuite namespace: {namespace}", fg='red') @LOGGER.catch def _run_llmsuite_pairs(config: Config, suite, namespace: str, - data_namespace_mangling: bool, clients: Clients, skip_evaluate: bool, eval_conf: dict): + data_namespace_mangling: bool, clients: Clients, skip_evaluate: bool, eval_conf: dict, + output_path: str = None): client = clients['guest_0'] guest_party_id = config.parties.role_to_party("guest")[0] # pipeline demo goes here @@ -150,57 +153,80 @@ def _run_llmsuite_pairs(config: Config, suite, namespace: str, def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None): exception_id = str(uuid.uuid1()) - # suite.update_status(job_name=job_name, job_id=job_id, exception_id=exception_id, status=status, - # event=event, time_elapsed=time_elapsed) + suite.update_status(job_name=job_name, job_id=job_id, exception_id=exception_id, status=status, + event=event, time_elapsed=time_elapsed) echo.file(f"exception({exception_id}), error message:\n{err_msg}") - - job_name, script_path, conf_path = job.job_name, job.script_path, job.conf_path - param = Config.load_from_file(conf_path) - mod = _load_module_from_script(script_path) - input_params = signature(mod.main).parameters - - try: - # @todo: add update status api to suite - _run_mod(mod, input_params, config, param, namespace, data_namespace_mangling) - job_info = os.environ.get("pipeline_job_info") - job_id, status, time_elapsed, event = extract_job_status(job_info, client, guest_party_id) - """suite.update_status(job_name=job_name, job_id=job_id, status=status, - time_elapsed=time_elapsed, - event=event)""" + # evaluate_only + if job.evaluate_only and not skip_evaluate: + job_results[job.job_name] = run_job_eval(job, eval_conf) + # run pipeline job then evaluate + else: + job_name, script_path, conf_path = job.job_name, job.script_path, job.conf_path + param = Config.load_from_file(conf_path) + mod = _load_module_from_script(script_path) + input_params = signature(mod.main).parameters + + try: + # todo: add update status api to suite + # pipeline should return pretrained model path + pretrained_model_path = _run_mod(mod, input_params, config, param, + namespace, data_namespace_mangling) + job.pretrained_model_path = pretrained_model_path + job_info = os.environ.get("pipeline_job_info") + job_id, status, time_elapsed, event = extract_job_status(job_info, client, guest_party_id) + suite.update_status(job_name=job_name, job_id=job_id, status=status, + time_elapsed=time_elapsed, + event=event) + + except Exception as e: + job_info = os.environ.get("pipeline_job_info") + if job_info is None: + job_id, status, time_elapsed, event = None, 'failed', None, None + else: + job_id, status, time_elapsed, event = extract_job_status(job_info, client, + guest_party_id) + _raise(e, job_id=job_id, status=status, event=event, time_elapsed=time_elapsed) + os.environ.pop("pipeline_job_info") + continue if not skip_evaluate: - # @todo: load model with flow api & record evaluate result - job.pretrained_model_path, job.heft_path = None, None - result = run_job_eval(job, eval_conf) - job_results[job_name] = result + model_task_name = "nn_0" + if job.model_task_name: + model_task_name = job.model_task_name + peft_path = os.path.join(config.fate_base, "fate_flow", "model", job_id, + "guest", guest_party_id, model_task_name, + "0", "output", "output_model", "model_directory") + job.peft_path = peft_path + try: + result = run_job_eval(job, eval_conf) + job_results[job_name] = result + except Exception as e: + _raise(f"evaluate failed: {e}") os.environ.pop("pipeline_job_info") - - except Exception as e: - job_info = os.environ.get("pipeline_job_info") - if job_info is None: - job_id, status, time_elapsed, event = None, 'failed', None, None - else: - job_id, status, time_elapsed, event = extract_job_status(job_info, client, - guest_party_id) - _raise(e, job_id=job_id, status=status, event=event, time_elapsed=time_elapsed) - os.environ.pop("pipeline_job_info") - continue suite_results[pair.pair_name] = job_results - for job_name, result in job_results.items(): - echo.echo(f"Job: {job_name}") - echo.echo(result) - # todo: record time elapse + suite_writers = aggregate_table(suite_results) + for pair_name, pair_writer in suite_writers.items(): + echo.sep_line() + echo.echo(f"Pair: {pair_name}") + echo.sep_line() + echo.echo(pair_writer.dumps()) + echo.stdout_newline() + + if output_path: + with open(output_path, 'w') as f: + for pair_name, pair_writer in suite_writers.items(): + pair_writer.dumps(f) def _run_mod(mod, input_params, config, param, namespace, data_namespace_mangling): if len(input_params) == 1: - mod.main(param=param) + return mod.main(param=param) elif len(input_params) == 2: - mod.main(config=config, param=param) + return mod.main(config=config, param=param) # pipeline script elif len(input_params) == 3: if data_namespace_mangling: - mod.main(config=config, param=param, namespace=f"_{namespace}") + return mod.main(config=config, param=param, namespace=f"_{namespace}") else: - mod.main(config=config, param=param) + return mod.main(config=config, param=param) else: - mod.main() + return mod.main() From 7ff4bfa2db0d82a3631aab8efebdd9667a3ed7b5 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Mon, 20 May 2024 14:14:53 +0800 Subject: [PATCH 03/30] update import path for fate llm evaluate Signed-off-by: Yu Wu --- python/fate_test/scripts/llmsuite_cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index 67ffba5..c5fab89 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -22,8 +22,8 @@ import click import yaml -from fate_llm.scripts.eval_cli import run_job_eval -from fate_llm.utils.llm_evaluator import aggregate_table +from fate_llm.evaluate.scripts.eval_cli import run_job_eval +from fate_llm.evaluate.utils.llm_evaluator import aggregate_table from fate_test._client import Clients from fate_test._config import Config @@ -106,7 +106,7 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co os.environ['enable_pipeline_job_info_callback'] = '1' try: if not eval_config: - from fate_llm.utils.config import default_eval_config + from fate_llm.evaluate.utils.config import default_eval_config eval_config = default_eval_config() eval_config_dict = {} From b59678a727596a93438269352a2d78c8c8b64703 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Mon, 20 May 2024 14:20:06 +0800 Subject: [PATCH 04/30] update import path for fate llm evaluate Signed-off-by: Yu Wu --- python/fate_test/scripts/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/fate_test/scripts/_utils.py b/python/fate_test/scripts/_utils.py index 0b6117a..56e7b28 100644 --- a/python/fate_test/scripts/_utils.py +++ b/python/fate_test/scripts/_utils.py @@ -6,7 +6,6 @@ from pathlib import Path import click -from fate_llm.utils import LlmSuite from fate_test._client import Clients from fate_test._config import Config @@ -88,6 +87,7 @@ def _find_testsuite_files(path): elif suite_type == "performance": suite = PerformanceSuite.load(suite_path.resolve()) elif suite_type == "llmsuite": + from fate_llm.evaluate.utils import LlmSuite suite = LlmSuite.load(suite_path.resolve()) suite_status = {} for pair in suite.pairs: From 5d021504bc51b04026e694deffd54018eca72490 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Mon, 20 May 2024 15:52:49 +0800 Subject: [PATCH 05/30] allow optionally import fate-llm in fate-test scripts add cli to control optional import of extra packages Signed-off-by: Yu Wu --- python/fate_test/scripts/cli.py | 8 ++++++-- python/fate_test/scripts/config_cli.py | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/python/fate_test/scripts/cli.py b/python/fate_test/scripts/cli.py index 48bcfaf..b01e6ce 100644 --- a/python/fate_test/scripts/cli.py +++ b/python/fate_test/scripts/cli.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import os import click @@ -20,7 +21,6 @@ from fate_test.scripts.benchmark_cli import run_benchmark from fate_test.scripts.config_cli import config_group from fate_test.scripts.data_cli import data_group -from fate_test.scripts.llmsuite_cli import run_llmsuite # from fate_test.scripts.flow_test_cli import flow_group from fate_test.scripts.performance_cli import run_task # from fate_test.scripts.quick_test_cli import unittest_group @@ -33,10 +33,14 @@ "performance": run_task, "benchmark-quality": run_benchmark, "data": data_group, - "llmsuite": run_llmsuite + # "unittest": unittest_group } +if os.environ.get("INCLUDE_FATE_LLM", None): + from fate_test.scripts.llmsuite_cli import run_llmsuite + commands["llmsuite"] = run_llmsuite + commands_alias = { "bq": "benchmark-quality", "bp": "performance" diff --git a/python/fate_test/scripts/config_cli.py b/python/fate_test/scripts/config_cli.py index 55f0b4c..4cfcfdb 100644 --- a/python/fate_test/scripts/config_cli.py +++ b/python/fate_test/scripts/config_cli.py @@ -13,10 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # - +import os from pathlib import Path import click + from fate_test._client import Clients from fate_test._config import create_config, default_config, parse_config from fate_test.scripts._options import SharedOptions @@ -77,3 +78,16 @@ def _config(ctx, **kwargs): click.echo(f"[X]connection fail, role is {r}, exception is {e.args}") else: click.echo(f"[✓]connection {address} ok, fate version is {version}, role is {r}") + + +@config_group.command(name="enable") +@click.option('-i', '--include', required=True, type=str, multiple=True, + help="packages to be loaded in FATE-Test scripts") +def _enable(include): + """ + allow import of extra packages, currently only for FATE-Llm + """ + for p in include: + if isinstance(p, str) and p.lower() == "fate-llm": + os.environ['INCLUDE_FATE_LLM'] = "1" + click.echo(f"FATE-Test will allow import {include}.") From 86ea69294c109da12a157b1df7b77d2e840d67b1 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Tue, 21 May 2024 15:33:53 +0800 Subject: [PATCH 06/30] use template for loading fate-llm trained model Signed-off-by: Yu Wu --- python/fate_test/scripts/llmsuite_cli.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index c5fab89..d7b6e14 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -22,8 +22,6 @@ import click import yaml -from fate_llm.evaluate.scripts.eval_cli import run_job_eval -from fate_llm.evaluate.utils.llm_evaluator import aggregate_table from fate_test._client import Clients from fate_test._config import Config @@ -135,6 +133,7 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co def _run_llmsuite_pairs(config: Config, suite, namespace: str, data_namespace_mangling: bool, clients: Clients, skip_evaluate: bool, eval_conf: dict, output_path: str = None): + from fate_llm.evaluate.scripts.eval_cli import run_job_eval client = clients['guest_0'] guest_party_id = config.parties.role_to_party("guest")[0] # pipeline demo goes here @@ -192,9 +191,16 @@ def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None) model_task_name = "nn_0" if job.model_task_name: model_task_name = job.model_task_name - peft_path = os.path.join(config.fate_base, "fate_flow", "model", job_id, + from lm_eval.utils import apply_template + peft_path = apply_template(job.peft_path_format, + {"fate_base": config.fate_base, + "job_id": job_id, + "party_id": guest_party_id, + "model_task_name": model_task_name} + ) + """peft_path = os.path.join(config.fate_base, "fate_flow", "model", job_id, "guest", guest_party_id, model_task_name, - "0", "output", "output_model", "model_directory") + "0", "output", "output_model", "model_directory")""" job.peft_path = peft_path try: result = run_job_eval(job, eval_conf) @@ -203,6 +209,8 @@ def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None) _raise(f"evaluate failed: {e}") os.environ.pop("pipeline_job_info") suite_results[pair.pair_name] = job_results + + from fate_llm.evaluate.utils.llm_evaluator import aggregate_table suite_writers = aggregate_table(suite_results) for pair_name, pair_writer in suite_writers.items(): echo.sep_line() From a81f944f0074d377ff42febb3158e89c2c3dea1b Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Tue, 21 May 2024 20:12:40 +0800 Subject: [PATCH 07/30] fix record Signed-off-by: Yu Wu --- python/fate_test/scripts/llmsuite_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index d7b6e14..851de9e 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -152,7 +152,7 @@ def _run_llmsuite_pairs(config: Config, suite, namespace: str, def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None): exception_id = str(uuid.uuid1()) - suite.update_status(job_name=job_name, job_id=job_id, exception_id=exception_id, status=status, + suite.update_status(pair_name=pair.pair_name, job_name=job_name, job_id=job_id, exception_id=exception_id, status=status, event=event, time_elapsed=time_elapsed) echo.file(f"exception({exception_id}), error message:\n{err_msg}") # evaluate_only @@ -173,7 +173,7 @@ def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None) job.pretrained_model_path = pretrained_model_path job_info = os.environ.get("pipeline_job_info") job_id, status, time_elapsed, event = extract_job_status(job_info, client, guest_party_id) - suite.update_status(job_name=job_name, job_id=job_id, status=status, + suite.update_status(pair_name=pair.pair_name, job_name=job_name, job_id=job_id, status=status, time_elapsed=time_elapsed, event=event) From 57613124c905e1c7ff737068a4c5d1bfd3aba98b Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Tue, 21 May 2024 20:51:03 +0800 Subject: [PATCH 08/30] fix apply template Signed-off-by: Yu Wu --- python/fate_test/scripts/llmsuite_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index 851de9e..081a9b8 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -194,7 +194,7 @@ def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None) from lm_eval.utils import apply_template peft_path = apply_template(job.peft_path_format, {"fate_base": config.fate_base, - "job_id": job_id, + "job_id": job_id[0], "party_id": guest_party_id, "model_task_name": model_task_name} ) From c4e3341019356ab16726a10435f6dd2f8b712b9c Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Wed, 22 May 2024 10:08:01 +0800 Subject: [PATCH 09/30] add init tasks when using llmsuite Signed-off-by: Yu Wu --- python/fate_test/scripts/llmsuite_cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index 081a9b8..b6406bb 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -95,7 +95,8 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co echo.stdout_newline() # with Clients(config_inst) as client: client = Clients(config_inst) - + from fate_llm.evaluate.utils import llm_evaluator + llm_evaluator.init_tasks() for i, suite in enumerate(suites): # noinspection PyBroadException try: From 16d01b76f982682c164a6f377dca76e277420d04 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Wed, 22 May 2024 17:21:30 +0800 Subject: [PATCH 10/30] add pellm suite to algorithm default suites Signed-off-by: Yu Wu --- python/fate_test/scripts/llmsuite_cli.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index b6406bb..6a6e5b9 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -80,9 +80,15 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co echo.echo(f"llmsuite namespace: {namespace}", fg='red') echo.echo("loading llmsuites:") if algorithm_suite: - # @todo: find built-in llmsuite path - algorithm_suite_path = [None] - suites = _load_testsuites(includes=algorithm_suite_path, excludes=None, glob=None, provider=provider, + algorithm_suite_path_dict = {"pellm": os.path.join(ctx.obj.get("fate_base"), "fate_llm", "examples")} + suite_paths = [] + for alg in algorithm_suite: + algorithm_suite_path = algorithm_suite_path_dict.get(alg, None) + if algorithm_suite_path is None: + echo.echo(f"algorithm suite {alg} not found", fg='red') + else: + suite_paths.append(algorithm_suite_path) + suites = _load_testsuites(includes=suite_paths, excludes=None, glob=None, provider=provider, suffix="llmsuite.yaml", suite_type="llmsuite") else: suites = _load_testsuites(includes=include, excludes=exclude, glob=glob, provider=provider, @@ -137,7 +143,6 @@ def _run_llmsuite_pairs(config: Config, suite, namespace: str, from fate_llm.evaluate.scripts.eval_cli import run_job_eval client = clients['guest_0'] guest_party_id = config.parties.role_to_party("guest")[0] - # pipeline demo goes here pair_n = len(suite.pairs) # fate_base = config.fate_base # PYTHONPATH = os.environ.get('PYTHONPATH') + ":" + os.path.join(fate_base, "python") @@ -167,14 +172,14 @@ def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None) input_params = signature(mod.main).parameters try: - # todo: add update status api to suite # pipeline should return pretrained model path pretrained_model_path = _run_mod(mod, input_params, config, param, namespace, data_namespace_mangling) job.pretrained_model_path = pretrained_model_path job_info = os.environ.get("pipeline_job_info") job_id, status, time_elapsed, event = extract_job_status(job_info, client, guest_party_id) - suite.update_status(pair_name=pair.pair_name, job_name=job_name, job_id=job_id, status=status, + suite.update_status(pair_name=pair.pair_name, job_name=job_name, + job_id=job_id, status=status, time_elapsed=time_elapsed, event=event) @@ -183,8 +188,7 @@ def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None) if job_info is None: job_id, status, time_elapsed, event = None, 'failed', None, None else: - job_id, status, time_elapsed, event = extract_job_status(job_info, client, - guest_party_id) + job_id, status, time_elapsed, event = extract_job_status(job_info, client, guest_party_id) _raise(e, job_id=job_id, status=status, event=event, time_elapsed=time_elapsed) os.environ.pop("pipeline_job_info") continue From a96827f97af7b4076a61d9694270b41aa6bb49bd Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Wed, 22 May 2024 17:22:31 +0800 Subject: [PATCH 11/30] use consts to record include_fate_llm var Signed-off-by: Yu Wu --- python/fate_test/_config.py | 2 +- python/fate_test/fate_test_config.yaml | 4 ++-- python/fate_test/scripts/cli.py | 4 ++-- python/fate_test/scripts/config_cli.py | 4 ++-- python/fate_test/utils.py | 3 +++ 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/python/fate_test/_config.py b/python/fate_test/_config.py index d3a6012..e5b2009 100644 --- a/python/fate_test/_config.py +++ b/python/fate_test/_config.py @@ -44,7 +44,7 @@ # directory where FATE code locates, default installation location={FATE}/fate # python/ml -> $fate_base/python/ml -fate_base: path(FATE)/fate +fate_base: path(FATE)/ # whether to delete data in suites after all jobs done clean_data: true diff --git a/python/fate_test/fate_test_config.yaml b/python/fate_test/fate_test_config.yaml index 7bb641f..f086391 100644 --- a/python/fate_test/fate_test_config.yaml +++ b/python/fate_test/fate_test_config.yaml @@ -19,7 +19,7 @@ all_examples_data_config: examples/data/upload_config/all_examples_data_testsuit # directory where FATE code locates, default installation location={FATE}/fate # python/ml -> $fate_base/python/ml -fate_base: path(FATE)/fate +fate_base: path(FATE)/ # whether to delete data in suites after all jobs done clean_data: true @@ -34,4 +34,4 @@ services: - flow_services: - { address: 127.0.0.1:9380, parties: [ '9999', '10000' ] } serving_setting: - address: 127.0.0.1:8059 \ No newline at end of file + address: 127.0.0.1:8059 diff --git a/python/fate_test/scripts/cli.py b/python/fate_test/scripts/cli.py index b01e6ce..ac3f821 100644 --- a/python/fate_test/scripts/cli.py +++ b/python/fate_test/scripts/cli.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import os import click @@ -37,7 +36,8 @@ # "unittest": unittest_group } -if os.environ.get("INCLUDE_FATE_LLM", None): +from fate_test import utils +if utils.INCLUDE_FATE_LLM: from fate_test.scripts.llmsuite_cli import run_llmsuite commands["llmsuite"] = run_llmsuite diff --git a/python/fate_test/scripts/config_cli.py b/python/fate_test/scripts/config_cli.py index 4cfcfdb..87a86a7 100644 --- a/python/fate_test/scripts/config_cli.py +++ b/python/fate_test/scripts/config_cli.py @@ -13,11 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import os from pathlib import Path import click +from fate_test import utils from fate_test._client import Clients from fate_test._config import create_config, default_config, parse_config from fate_test.scripts._options import SharedOptions @@ -89,5 +89,5 @@ def _enable(include): """ for p in include: if isinstance(p, str) and p.lower() == "fate-llm": - os.environ['INCLUDE_FATE_LLM'] = "1" + utils.INCLUDE_FATE_LLM = '1' click.echo(f"FATE-Test will allow import {include}.") diff --git a/python/fate_test/utils.py b/python/fate_test/utils.py index 12a9e39..677c757 100644 --- a/python/fate_test/utils.py +++ b/python/fate_test/utils.py @@ -32,6 +32,9 @@ RELATIVE = "relative" ABSOLUTE = "absolute" +DEFAULT_INCLUDE_FATE_LLM = None +INCLUDE_FATE_LLM = os.getenv("INCLUDE_FATE_LLM") or DEFAULT_INCLUDE_FATE_LLM + class TxtStyle: TRUE_VAL = Fore.GREEN From ebf1843677e7d04c304f97eb57dea630ca6ef235 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Wed, 22 May 2024 20:36:58 +0800 Subject: [PATCH 12/30] add doc Signed-off-by: Yu Wu --- doc/fate_test.md | 14 +++- doc/fate_test_command.md | 151 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 1 deletion(-) diff --git a/doc/fate_test.md b/doc/fate_test.md index 746dfbd..071e9b7 100644 --- a/doc/fate_test.md +++ b/doc/fate_test.md @@ -88,4 +88,16 @@ shown in last step ```bash fate_test data generate -i -ng 10000 -fg 10 -fh 10 -m 1.0 --upload-data fate_test performance -i --skip-data - ``` \ No newline at end of file + ``` + +- [llm-suite](./fate_test_command.md#fate-llmsuite): used for running FATE-Llm testsuites, collection of FATE-Llm jobs and/or evaluations + + Before running llmsuite for the first time, make sure to install FATE-Llm and allow its import in FATE-Test scripts: + + ```bash + fate_test config include fate-llm + ``` + + ```bash + fate_test llmsuite -i + ``` diff --git a/doc/fate_test_command.md b/doc/fate_test_command.md index dabe682..21acc0c 100644 --- a/doc/fate_test_command.md +++ b/doc/fate_test_command.md @@ -867,3 +867,154 @@ fate_test data --help data after generate and upload dataset in testsuites *path1* + +## FATE Llmsuite + +FATE Llmsuite is used for running a collection of FATE-Llm jobs in sequence and then evaluate them on user-specified tasks. +It also allows users to compare the results of different llm jobs. + +### command options + +```bash +fate_test llmsuite --help +``` + +1. include: + + ```bash + fate_test llmsuite -i + ``` + + will run llm testsuites in + *path1* + +2. exclude: + + ```bash + fate_test llmsuite -i -e -e ... + ``` + + will run llm testsuites in *path1* but not in *path2* and *path3* + +3. glob: + + ```bash + fate_test llmsuite -i -g "hetero*" + ``` + + will run llm testsuites in sub directory start with *hetero* of + *path1* + +4. algorithm-suite: + + ```bash + fate_test llmsuite -a pellm' + ``` + + will run built-in 'pellm' llm testsuite, which will train and evaluate a FATE-Llm model and a zero-shot model + +5. timeout: + + ```bash + fate_test llmsuite -i -m 3600 + ``` + + will run llm testsuites in *path1* and timeout when job does not finish + within 3600s; if tasks need more time, use a larger threshold + +6. task-cores + + ```bash + fate_test llmsuite -i -p 4 + ``` + + will run llm testsuites in *path1* with script config "task-cores" set to 4 + +7. eval-config: + + ```bash + fate_test llmsuite -i --eval-config + ``` + + will run llm testsuites in *path1* with evaluation configuration set to *path2* + +8. skip-evaluate: + + ```bash + fate_test llmsuite -i --skip-evaluate + ``` + + will run llm testsuites in *path1* without running evaluation + +9. provider: + + ```bash + fate_test llmsuite -i --provider + ``` + + will run llm testsuites in *path1* with FATE provider set to *provider_name* + +10. yes: + + ```bash + fate_test llmsuite -i --yes + ``` + + will run llm testsuites in *path1* directly, skipping double check + + +### FATE-Llm job configuration + +Configuration of jobs should be specified in a llm testsuite whose +file name ends with "\*llmsuite.yaml". For llm testsuite example, +please refer [here](https://github.com/FederatedAI/FATE-LLM). + +A FATE-Llm testsuite includes the following elements: + +- job group: each group includes arbitrary number of jobs with paths + to corresponding script and configuration + + - job: name of evaluation job to be run, must be unique within each group + list + + - script: path to [testing script](#testing-script), should be + relative to testsuite, optional for evaluation-only jobs + - conf: path to job configuration file for script, should be + relative to testsuite, optional for evaluation-only jobs + - pretrained: path to pretrained model, should be relative to + testsuite, optional for jobs needed to run FATE-Llm training job, where the + script should return path to the pretrained model + - peft: path to peft file, should be relative to testsuite, + optional for jobs needed to run FATE-Llm training job + - tasks: list of tasks to be evaluated, optional for jobs skipping evaluation + - include_path: should be specified if tasks are user-defined + - eval_conf: path to evaluation configuration file, should be + relative to testsuite; if not provided, will use default conf + + ```yaml + bloom_lora: + pretrained: "/data/cephfs/llm/models/bloom-560m" + script: "./test_bloom_lora.py" + conf: "./bloom_lora_config.yaml" + peft_path_format: "{{fate_base}}/fate_flow/model/{{job_id}}/guest/{{party_id}}/{{model_task_name}}/0/output/output_model/model_directory" + tasks: + - "dolly-15k" + + ``` + + - + + ```yaml + hetero_nn_sshe_binary_0: + bloom_lora: + pretrained: "/data/cephfs/llm/models/bloom-560m" + script: "./test_bloom_lora.py" + conf: "./bloom_lora_config.yaml" + peft_path_format: "{{fate_base}}/fate_flow/model/{{job_id}}/guest/{{party_id}}/{{model_task_name}}/0/output/output_model/model_directory" + tasks: + - "dolly-15k" + bloom_zero_shot: + pretrained: "/data/cephfs/llm/models/bloom-560m" + tasks: + - "dolly-15k" + ``` From f61546c18cbfd46054b91b3e1726921efae1e763 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Wed, 22 May 2024 20:44:39 +0800 Subject: [PATCH 13/30] add doc Signed-off-by: Yu Wu --- doc/fate_test_command.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/fate_test_command.md b/doc/fate_test_command.md index 21acc0c..55f0138 100644 --- a/doc/fate_test_command.md +++ b/doc/fate_test_command.md @@ -1002,7 +1002,7 @@ A FATE-Llm testsuite includes the following elements: ``` - - +- llm suite ```yaml hetero_nn_sshe_binary_0: From c36998a68679a3f6ac41da17223cd545adce1c49 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Fri, 24 May 2024 16:34:47 +0800 Subject: [PATCH 14/30] edit doc Signed-off-by: Yu Wu --- doc/fate_test_command.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/fate_test_command.md b/doc/fate_test_command.md index 55f0138..2b5be41 100644 --- a/doc/fate_test_command.md +++ b/doc/fate_test_command.md @@ -978,10 +978,11 @@ A FATE-Llm testsuite includes the following elements: list - script: path to [testing script](#testing-script), should be - relative to testsuite, optional for evaluation-only jobs + relative to testsuite, optional for evaluation-only jobs; + note that pretrained model, if available, should be returned at the end of the script - conf: path to job configuration file for script, should be relative to testsuite, optional for evaluation-only jobs - - pretrained: path to pretrained model, should be relative to + - pretrained: path to pretrained model, should be either model name from Huggingface or relative path to testsuite, optional for jobs needed to run FATE-Llm training job, where the script should return path to the pretrained model - peft: path to peft file, should be relative to testsuite, @@ -993,7 +994,7 @@ A FATE-Llm testsuite includes the following elements: ```yaml bloom_lora: - pretrained: "/data/cephfs/llm/models/bloom-560m" + pretrained: "models/bloom-560m" script: "./test_bloom_lora.py" conf: "./bloom_lora_config.yaml" peft_path_format: "{{fate_base}}/fate_flow/model/{{job_id}}/guest/{{party_id}}/{{model_task_name}}/0/output/output_model/model_directory" @@ -1007,14 +1008,14 @@ A FATE-Llm testsuite includes the following elements: ```yaml hetero_nn_sshe_binary_0: bloom_lora: - pretrained: "/data/cephfs/llm/models/bloom-560m" + pretrained: "bloom-560m" script: "./test_bloom_lora.py" conf: "./bloom_lora_config.yaml" peft_path_format: "{{fate_base}}/fate_flow/model/{{job_id}}/guest/{{party_id}}/{{model_task_name}}/0/output/output_model/model_directory" tasks: - "dolly-15k" bloom_zero_shot: - pretrained: "/data/cephfs/llm/models/bloom-560m" + pretrained: "bloom-560m" tasks: - "dolly-15k" ``` From bec9bec9ca121ba455face76c7c9f115d6e07ba7 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Tue, 28 May 2024 17:34:47 +0800 Subject: [PATCH 15/30] edit doc Signed-off-by: Yu Wu --- doc/fate_test_command.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/fate_test_command.md b/doc/fate_test_command.md index 2b5be41..971511c 100644 --- a/doc/fate_test_command.md +++ b/doc/fate_test_command.md @@ -908,7 +908,7 @@ fate_test llmsuite --help 4. algorithm-suite: ```bash - fate_test llmsuite -a pellm' + fate_test llmsuite -a "pellm" ``` will run built-in 'pellm' llm testsuite, which will train and evaluate a FATE-Llm model and a zero-shot model From 91e9e54e330cd36f59b6243651a885a2221d3e4e Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Thu, 30 May 2024 15:39:23 +0800 Subject: [PATCH 16/30] lazy import FATE-Test subcommands rename subcommands entry point functions Signed-off-by: Yu Wu --- python/fate_test/_config.py | 2 +- ...chmark_cli.py => benchmark_quality_cli.py} | 4 +- python/fate_test/scripts/cli.py | 60 ++++++++++--------- python/fate_test/scripts/llmsuite_cli.py | 6 +- python/fate_test/scripts/performance_cli.py | 2 +- .../{testsuite_cli.py => suite_cli.py} | 9 --- python/setup.py | 2 +- 7 files changed, 38 insertions(+), 47 deletions(-) rename python/fate_test/scripts/{benchmark_cli.py => benchmark_quality_cli.py} (97%) rename python/fate_test/scripts/{testsuite_cli.py => suite_cli.py} (94%) diff --git a/python/fate_test/_config.py b/python/fate_test/_config.py index e5b2009..e00b82d 100644 --- a/python/fate_test/_config.py +++ b/python/fate_test/_config.py @@ -49,7 +49,7 @@ # whether to delete data in suites after all jobs done clean_data: true -# participating parties' id and correponding flow service ip & port information +# participating parties' id and corresponding flow service ip & port information parties: guest: ['9999'] host: ['10000', '9999'] diff --git a/python/fate_test/scripts/benchmark_cli.py b/python/fate_test/scripts/benchmark_quality_cli.py similarity index 97% rename from python/fate_test/scripts/benchmark_cli.py rename to python/fate_test/scripts/benchmark_quality_cli.py index fa6b155..c171ebf 100644 --- a/python/fate_test/scripts/benchmark_cli.py +++ b/python/fate_test/scripts/benchmark_quality_cli.py @@ -45,8 +45,8 @@ @click.option("--enable-clean-data", "clean_data", flag_value=True, default=None) @SharedOptions.get_shared_options(hidden=True) @click.pass_context -def run_benchmark(ctx, include, exclude, glob, skip_data, tol, clean_data, storage_tag, history_tag, match_details, - task_cores, timeout, **kwargs): +def run_benchmark_quality(ctx, include, exclude, glob, skip_data, tol, clean_data, storage_tag, history_tag, match_details, + task_cores, timeout, **kwargs): """ process benchmark suite, alias: bq """ diff --git a/python/fate_test/scripts/cli.py b/python/fate_test/scripts/cli.py index ac3f821..56df9ac 100644 --- a/python/fate_test/scripts/cli.py +++ b/python/fate_test/scripts/cli.py @@ -14,32 +14,11 @@ # limitations under the License. # +import os + import click from fate_test.scripts._options import SharedOptions -from fate_test.scripts.benchmark_cli import run_benchmark -from fate_test.scripts.config_cli import config_group -from fate_test.scripts.data_cli import data_group -# from fate_test.scripts.flow_test_cli import flow_group -from fate_test.scripts.performance_cli import run_task -# from fate_test.scripts.quick_test_cli import unittest_group -# from fate_test.scripts.secure_protocol_cli import secure_protocol_group -from fate_test.scripts.testsuite_cli import run_suite - -commands = { - "config": config_group, - "suite": run_suite, - "performance": run_task, - "benchmark-quality": run_benchmark, - "data": data_group, - - # "unittest": unittest_group -} - -from fate_test import utils -if utils.INCLUDE_FATE_LLM: - from fate_test.scripts.llmsuite_cli import run_llmsuite - commands["llmsuite"] = run_llmsuite commands_alias = { "bq": "benchmark-quality", @@ -48,16 +27,39 @@ class MultiCLI(click.MultiCommand): + def __init__(self, *args, **kwargs): + super(MultiCLI, self).__init__(*args, **kwargs) + self.plugin_folder = os.path.dirname(__file__) + """self._commands = { + "config": config_group, + "suite": run_suite, + "performance": run_task, + "benchmark-quality": run_benchmark, + "data": data_group} + self._load_extra_commands() + + def _load_extra_commands(self): + from fate_test.scripts.llmsuite_cli import run_llmsuite + self._commands["llmsuite"] = run_llmsuite""" def list_commands(self, ctx): - return list(commands) + rv = [] + for filename in os.listdir(self.plugin_folder): + if filename.endswith("_cli.py"): + rv.append(filename[:-7]) + rv.sort() + print(f"rv: {rv}") + return rv def get_command(self, ctx, name): - if name not in commands and name in commands_alias: - name = commands_alias[name] - if name not in commands: - ctx.fail("No such command '{}'.".format(name)) - return commands[name] + name = commands_alias.get(name, name).replace("-", "_") + ns = {} + fn = os.path.join(self.plugin_folder, name + "_cli.py") + with open(fn) as f: + code = compile(f.read(), fn, 'exec') + eval(code, ns, ns) + command_name = f"{name}_group" if name in ["data", "config"] else f"run_{name}" + return ns[command_name] @click.command(cls=MultiCLI, help="A collection of useful tools to running FATE's test.", diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index 6a6e5b9..1a6b0b1 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -31,10 +31,7 @@ from fate_test.scripts._utils import _load_testsuites, _load_module_from_script from fate_test.utils import extract_job_status -""" -@click.option('-uj', '--update-job-parameters', default="{}", type=str, - help="a json string that represents mapping for replacing fields in job conf, example format: "'{job_name: param_name1: param_val1, param_name2=param_val2}'") -""" + @click.command("llmsuite") @click.option('-i', '--include', required=True, type=click.Path(exists=True), multiple=True, metavar="", @@ -101,6 +98,7 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co echo.stdout_newline() # with Clients(config_inst) as client: client = Clients(config_inst) + print(f"\n called import llm evaluator\n") from fate_llm.evaluate.utils import llm_evaluator llm_evaluator.init_tasks() for i, suite in enumerate(suites): diff --git a/python/fate_test/scripts/performance_cli.py b/python/fate_test/scripts/performance_cli.py index 2f0d151..cc5afc7 100644 --- a/python/fate_test/scripts/performance_cli.py +++ b/python/fate_test/scripts/performance_cli.py @@ -54,7 +54,7 @@ @click.option("--disable-clean-data", "clean_data", flag_value=False, default=None) @SharedOptions.get_shared_options(hidden=True) @click.pass_context -def run_task(ctx, job_type, include, timeout, epochs, +def run_performance(ctx, job_type, include, timeout, epochs, max_depth, num_trees, task_cores, storage_tag, history_tag, skip_data, clean_data, **kwargs): """ Test the performance of big data tasks, alias: bp diff --git a/python/fate_test/scripts/testsuite_cli.py b/python/fate_test/scripts/suite_cli.py similarity index 94% rename from python/fate_test/scripts/testsuite_cli.py rename to python/fate_test/scripts/suite_cli.py index 82194de..7235c7d 100644 --- a/python/fate_test/scripts/testsuite_cli.py +++ b/python/fate_test/scripts/suite_cli.py @@ -30,15 +30,6 @@ from fate_test.scripts._utils import _load_testsuites, _upload_data, _delete_data, _load_module_from_script from fate_test.utils import extract_job_status -""" -@click.option('-uj', '--update-job-parameters', default="{}", type=JSON_STRING, - help="a json string represents mapping for replacing fields in conf.job_parameters") -@click.option('-uc', '--update-component-parameters', default="{}", type=JSON_STRING, - help="a json string represents mapping for replacing fields in conf.component_parameters") -@click.option('-m', '--timeout', type=int, default=3600, help="maximun running time of job") -@click.option('-p', '--task-cores', type=int, help="processors per node") -""" - @click.command("suite") @click.option('-i', '--include', required=True, type=click.Path(exists=True), multiple=True, metavar="", diff --git a/python/setup.py b/python/setup.py index d26a151..de698d5 100644 --- a/python/setup.py +++ b/python/setup.py @@ -20,7 +20,7 @@ setup_kwargs = { "name": "fate-test", - "version": "2.1.0", + "version": "2.2.0", "description": "test tools for FATE", "long_description": 'FATE Test\n=========\n\nA collection of useful tools to running FATE\'s test.\n\n.. image:: images/tutorial.gif\n :align: center\n :alt: tutorial\n\nquick start\n-----------\n\n1. (optional) create virtual env\n\n .. code-block:: bash\n\n python -m venv venv\n source venv/bin/activate\n pip install -U pip\n\n\n2. install fate_test\n\n .. code-block:: bash\n\n pip install fate_test\n fate_test --help\n\n\n3. edit default fate_test_config.yaml\n\n .. code-block:: bash\n\n # edit priority config file with system default editor\n # filling some field according to comments\n fate_test config edit\n\n4. configure FATE-Pipeline and FATE-Flow Commandline server setting\n\n.. code-block:: bash\n\n # configure FATE-Pipeline server setting\n pipeline init --port 9380 --ip 127.0.0.1\n # configure FATE-Flow Commandline server setting\n flow init --port 9380 --ip 127.0.0.1\n\n5. run some fate_test suite\n\n .. code-block:: bash\n\n fate_test suite -i \n\n\n6. run some fate_test benchmark\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i \n\n7. useful logs or exception will be saved to logs dir with namespace shown in last step\n\ndevelop install\n---------------\nIt is more convenient to use the editable mode during development: replace step 2 with flowing steps\n\n.. code-block:: bash\n\n pip install -e ${FATE}/python/fate_client && pip install -e ${FATE}/python/fate_test\n\n\n\ncommand types\n-------------\n\n- suite: used for running testsuites, collection of FATE jobs\n\n .. code-block:: bash\n\n fate_test suite -i \n\n\n- benchmark-quality used for comparing modeling quality between FATE and other machine learning systems\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i \n\n\n\nconfiguration by examples\n--------------------------\n\n1. no need ssh tunnel:\n\n - 9999, service: service_a\n - 10000, service: service_b\n\n and both service_a, service_b can be requested directly:\n\n .. code-block:: yaml\n\n work_mode: 1 # 0 for standalone, 1 for cluster\n data_base_dir: \n parties:\n guest: [10000]\n host: [9999, 10000]\n arbiter: [9999]\n services:\n - flow_services:\n - {address: service_a, parties: [9999]}\n - {address: service_b, parties: [10000]}\n\n2. need ssh tunnel:\n\n - 9999, service: service_a\n - 10000, service: service_b\n\n service_a, can be requested directly while service_b don\'t,\n but you can request service_b in other node, say B:\n\n .. code-block:: yaml\n\n work_mode: 0 # 0 for standalone, 1 for cluster\n data_base_dir: \n parties:\n guest: [10000]\n host: [9999, 10000]\n arbiter: [9999]\n services:\n - flow_services:\n - {address: service_a, parties: [9999]}\n - flow_services:\n - {address: service_b, parties: [10000]}\n ssh_tunnel: # optional\n enable: true\n ssh_address: :\n ssh_username: \n ssh_password: # optional\n ssh_priv_key: "~/.ssh/id_rsa"\n\n\nTestsuite\n---------\n\nTestsuite is used for running a collection of jobs in sequence. Data used for jobs could be uploaded before jobs are\nsubmitted, and are cleaned when jobs finished. This tool is useful for FATE\'s release test.\n\ncommand options\n~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n fate_test suite --help\n\n1. include:\n\n .. code-block:: bash\n\n fate_test suite -i \n\n will run testsuites in *path1*\n\n2. exclude:\n\n .. code-block:: bash\n\n fate_test suite -i -e -e ...\n\n will run testsuites in *path1* but not in *path2* and *path3*\n\n3. glob:\n\n .. code-block:: bash\n\n fate_test suite -i -g "hetero*"\n\n will run testsuites in sub directory start with *hetero* of *path1*\n\n4. replace:\n\n .. code-block:: bash\n\n fate_test suite -i -r \'{"maxIter": 5}\'\n\n will find all key-value pair with key "maxIter" in `data conf` or `conf` or `dsl` and replace the value with 5\n\n\n5. skip-data:\n\n .. code-block:: bash\n\n fate_test suite -i --skip-data\n\n will run testsuites in *path1* without uploading data specified in *benchmark.json*.\n\n\n6. yes:\n\n .. code-block:: bash\n\n fate_test suite -i --yes\n\n will run testsuites in *path1* directly, skipping double check\n\n7. skip-dsl-jobs:\n\n .. code-block:: bash\n\n fate_test suite -i --skip-dsl-jobs\n\n will run testsuites in *path1* but skip all *tasks* in testsuites. It\'s would be useful when only pipeline tasks needed.\n\n8. skip-pipeline-jobs:\n\n .. code-block:: bash\n\n fate_test suite -i --skip-pipeline-jobs\n\n will run testsuites in *path1* but skip all *pipeline tasks* in testsuites. It\'s would be useful when only dsl tasks needed.\n\n\nBenchmark Quality\n------------------\n\nBenchmark-quality is used for comparing modeling quality between FATE\nand other machine learning systems. Benchmark produces a metrics comparison\nsummary for each benchmark job group.\n\n.. code-block:: bash\n\n fate_test benchmark-quality -i examples/benchmark_quality/hetero_linear_regression\n\n.. code-block:: bash\n\n +-------+--------------------------------------------------------------+\n | Data | Name |\n +-------+--------------------------------------------------------------+\n | train | {\'guest\': \'motor_hetero_guest\', \'host\': \'motor_hetero_host\'} |\n | test | {\'guest\': \'motor_hetero_guest\', \'host\': \'motor_hetero_host\'} |\n +-------+--------------------------------------------------------------+\n +------------------------------------+--------------------+--------------------+-------------------------+---------------------+\n | Model Name | explained_variance | r2_score | root_mean_squared_error | mean_squared_error |\n +------------------------------------+--------------------+--------------------+-------------------------+---------------------+\n | local-linear_regression-regression | 0.9035168452250094 | 0.9035070863155368 | 0.31340413289880553 | 0.09822215051805216 |\n | FATE-linear_regression-regression | 0.903146386539082 | 0.9031411831961411 | 0.3139977881119483 | 0.09859461093919596 |\n +------------------------------------+--------------------+--------------------+-------------------------+---------------------+\n +-------------------------+-----------+\n | Metric | All Match |\n +-------------------------+-----------+\n | explained_variance | True |\n | r2_score | True |\n | root_mean_squared_error | True |\n | mean_squared_error | True |\n +-------------------------+-----------+\n\ncommand options\n~~~~~~~~~~~~~~~\n\nuse the following command to show help message\n\n.. code-block:: bash\n\n fate_test benchmark-quality --help\n\n1. include:\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i \n\n will run benchmark testsuites in *path1*\n\n2. exclude:\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i -e -e ...\n\n will run benchmark testsuites in *path1* but not in *path2* and *path3*\n\n3. glob:\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i -g "hetero*"\n\n will run benchmark testsuites in sub directory start with *hetero* of *path1*\n\n4. tol:\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i -t 1e-3\n\n will run benchmark testsuites in *path1* with absolute tolerance of difference between metrics set to 0.001.\n If absolute difference between metrics is smaller than *tol*, then metrics are considered\n almost equal. Check benchmark testsuite `writing guide <#benchmark-testsuite>`_ on setting alternative tolerance.\n\n5. skip-data:\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i --skip-data\n\n will run benchmark testsuites in *path1* without uploading data specified in *benchmark.json*.\n\n\n6. yes:\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i --yes\n\n will run benchmark testsuites in *path1* directly, skipping double check\n\n\nbenchmark testsuite\n~~~~~~~~~~~~~~~~~~~\n\nConfiguration of jobs should be specified in a benchmark testsuite whose file name ends\nwith "\\*benchmark.json". For benchmark testsuite example,\nplease refer `here <../../examples/benchmark_quality>`_.\n\nA benchmark testsuite includes the following elements:\n\n- data: list of local data to be uploaded before running FATE jobs\n\n - file: path to original data file to be uploaded, should be relative to testsuite or FATE installation path\n - head: whether file includes header\n - partition: number of partition for data storage\n - table_name: table name in storage\n - namespace: table namespace in storage\n - role: which role to upload the data, as specified in fate_test.config;\n naming format is: "{role_type}_{role_index}", index starts at 0\n\n .. code-block:: json\n\n "data": [\n {\n "file": "examples/data/motor_hetero_host.csv",\n "head": 1,\n "partition": 8,\n "table_name": "motor_hetero_host",\n "namespace": "experiment",\n "role": "host_0"\n }\n ]\n\n- job group: each group includes arbitrary number of jobs with paths to corresponding script and configuration\n\n - job: name of job to be run, must be unique within each group list\n\n - script: path to `testing script <#testing-script>`_, should be relative to testsuite\n - conf: path to job configuration file for script, should be relative to testsuite\n\n .. code-block:: json\n\n "local": {\n "script": "./local-linr.py",\n "conf": "./linr_config.yaml"\n }\n\n - compare_setting: additional setting for quality metrics comparison, currently only takes ``relative_tol``\n\n If metrics *a* and *b* satisfy *abs(a-b) <= max(relative_tol \\* max(abs(a), abs(b)), absolute_tol)*\n (from `math module `_),\n they are considered almost equal. In the below example, metrics from "local" and "FATE" jobs are\n considered almost equal if their relative difference is smaller than\n *0.05 \\* max(abs(local_metric), abs(pipeline_metric)*.\n\n .. code-block:: json\n\n "linear_regression-regression": {\n "local": {\n "script": "./local-linr.py",\n "conf": "./linr_config.yaml"\n },\n "FATE": {\n "script": "./fate-linr.py",\n "conf": "./linr_config.yaml"\n },\n "compare_setting": {\n "relative_tol": 0.01\n }\n }\n\n\ntesting script\n~~~~~~~~~~~~~~\n\nAll job scripts need to have ``Main`` function as an entry point for executing jobs; scripts should\nreturn two dictionaries: first with data information key-value pairs: {data_type}: {data_name_dictionary};\nthe second contains {metric_name}: {metric_value} key-value pairs for metric comparison.\n\nBy default, the final data summary shows the output from the job named "FATE"; if no such job exists,\ndata information returned by the first job is shown. For clear presentation, we suggest that user follow\nthis general `guideline <../../examples/data/README.md#data-set-naming-rule>`_ for data set naming. In the case of multi-host\ntask, consider numbering host as such:\n\n::\n\n {\'guest\': \'default_credit_homo_guest\',\n \'host_1\': \'default_credit_homo_host_1\',\n \'host_2\': \'default_credit_homo_host_2\'}\n\nReturned quality metrics of the same key are to be compared.\nNote that only **real-value** metrics can be compared.\n\n- FATE script: ``Main`` always has three inputs:\n\n - config: job configuration, `JobConfig <../fate_client/pipeline/utils/tools.py#L64>`_ object loaded from "fate_test_config.yaml"\n - param: job parameter setting, dictionary loaded from "conf" file specified in benchmark testsuite\n - namespace: namespace suffix, user-given *namespace* or generated timestamp string when using *namespace-mangling*\n\n- non-FATE script: ``Main`` always has one input:\n\n - param: job parameter setting, dictionary loaded from "conf" file specified in benchmark testsuite\n\n\ndata\n----\n\n`Data` sub-command is used for upload or delete dataset in suite\'s.\n\ncommand options\n~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n fate_test data --help\n\n1. include:\n\n .. code-block:: bash\n\n fate_test data [upload|delete] -i \n\n will upload/delete dataset in testsuites in *path1*\n\n2. exclude:\n\n .. code-block:: bash\n\n fate_test data [upload|delete] -i -e -e ...\n\n will upload/delete dataset in testsuites in *path1* but not in *path2* and *path3*\n\n3. glob:\n\n .. code-block:: bash\n\n fate_test data [upload|delete] -i -g "hetero*"\n\n will upload/delete dataset in testsuites in sub directory start with *hetero* of *path1*\n\n\nfull command options\n---------------------\n\n.. click:: fate_test.scripts.cli:cli\n :prog: fate_test\n :show-nested:\n', "author": "FederatedAI", From dbd26783a0bb75ed587856565f88957dee8dc2ec Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Thu, 30 May 2024 17:06:49 +0800 Subject: [PATCH 17/30] clean up code, fix typo edit doc Signed-off-by: Yu Wu --- doc/fate_test.md | 2 +- doc/fate_test_command.md | 4 ++-- python/fate_test/scripts/cli.py | 12 ------------ python/fate_test/scripts/config_cli.py | 25 ++++++++++++------------ python/fate_test/scripts/llmsuite_cli.py | 1 - 5 files changed, 16 insertions(+), 28 deletions(-) diff --git a/doc/fate_test.md b/doc/fate_test.md index 071e9b7..c80efaf 100644 --- a/doc/fate_test.md +++ b/doc/fate_test.md @@ -90,7 +90,7 @@ shown in last step fate_test performance -i --skip-data ``` -- [llm-suite](./fate_test_command.md#fate-llmsuite): used for running FATE-Llm testsuites, collection of FATE-Llm jobs and/or evaluations +- [llm-suite](./fate_test_command.md#llmsuite): used for running FATE-Llm testsuites, collection of FATE-Llm jobs and/or evaluations Before running llmsuite for the first time, make sure to install FATE-Llm and allow its import in FATE-Test scripts: diff --git a/doc/fate_test_command.md b/doc/fate_test_command.md index 971511c..4e9e1fb 100644 --- a/doc/fate_test_command.md +++ b/doc/fate_test_command.md @@ -868,9 +868,9 @@ fate_test data --help *path1* -## FATE Llmsuite +## Llmsuite -FATE Llmsuite is used for running a collection of FATE-Llm jobs in sequence and then evaluate them on user-specified tasks. +Llmsuite is used for running a collection of FATE-Llm jobs in sequence and then evaluate them on user-specified tasks. It also allows users to compare the results of different llm jobs. ### command options diff --git a/python/fate_test/scripts/cli.py b/python/fate_test/scripts/cli.py index 56df9ac..1c4358e 100644 --- a/python/fate_test/scripts/cli.py +++ b/python/fate_test/scripts/cli.py @@ -30,17 +30,6 @@ class MultiCLI(click.MultiCommand): def __init__(self, *args, **kwargs): super(MultiCLI, self).__init__(*args, **kwargs) self.plugin_folder = os.path.dirname(__file__) - """self._commands = { - "config": config_group, - "suite": run_suite, - "performance": run_task, - "benchmark-quality": run_benchmark, - "data": data_group} - self._load_extra_commands() - - def _load_extra_commands(self): - from fate_test.scripts.llmsuite_cli import run_llmsuite - self._commands["llmsuite"] = run_llmsuite""" def list_commands(self, ctx): rv = [] @@ -48,7 +37,6 @@ def list_commands(self, ctx): if filename.endswith("_cli.py"): rv.append(filename[:-7]) rv.sort() - print(f"rv: {rv}") return rv def get_command(self, ctx, name): diff --git a/python/fate_test/scripts/config_cli.py b/python/fate_test/scripts/config_cli.py index 87a86a7..1f07a3e 100644 --- a/python/fate_test/scripts/config_cli.py +++ b/python/fate_test/scripts/config_cli.py @@ -17,7 +17,6 @@ import click -from fate_test import utils from fate_test._client import Clients from fate_test._config import create_config, default_config, parse_config from fate_test.scripts._options import SharedOptions @@ -80,14 +79,16 @@ def _config(ctx, **kwargs): click.echo(f"[✓]connection {address} ok, fate version is {version}, role is {r}") -@config_group.command(name="enable") -@click.option('-i', '--include', required=True, type=str, multiple=True, - help="packages to be loaded in FATE-Test scripts") -def _enable(include): - """ - allow import of extra packages, currently only for FATE-Llm - """ - for p in include: - if isinstance(p, str) and p.lower() == "fate-llm": - utils.INCLUDE_FATE_LLM = '1' - click.echo(f"FATE-Test will allow import {include}.") +"""@config_group.command(name="set-extra-command") +@SharedOptions.get_shared_options(hidden=True) +@click.argument('enable', required=True, type=click.BOOL) +@click.pass_context +def _enable(ctx, enable, **kwargs): +""" +""" + allow extra commands, currently only FATE-Llm + + ctx.obj.update(**kwargs) + ctx.obj.update(include_fate_llm=enable) + os.environ["INCLUDE_FATE_LLM"] = '1' if enable else '0' + click.echo(f"Extra command {'enabled' if enable else 'disabled'}.")""" diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index 1a6b0b1..2ae4531 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -98,7 +98,6 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co echo.stdout_newline() # with Clients(config_inst) as client: client = Clients(config_inst) - print(f"\n called import llm evaluator\n") from fate_llm.evaluate.utils import llm_evaluator llm_evaluator.init_tasks() for i, suite in enumerate(suites): From 81be3dc2260d65b3155384c6f5d01fd80066b791 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Mon, 3 Jun 2024 17:41:37 +0800 Subject: [PATCH 18/30] fix loading default config from invalid path Signed-off-by: Yu Wu --- python/fate_test/scripts/llmsuite_cli.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index 2ae4531..a4ba894 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -21,7 +21,6 @@ from inspect import signature import click -import yaml from fate_test._client import Clients from fate_test._config import Config @@ -107,15 +106,21 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red') os.environ['enable_pipeline_job_info_callback'] = '1' try: + # eval_config_dict = {} if not eval_config: from fate_llm.evaluate.utils.config import default_eval_config eval_config = default_eval_config() - - eval_config_dict = {} - with eval_config.open("r") as f: - eval_config_dict.update(yaml.safe_load(f)) + if not os.path.exists(eval_config): + """eval_config = os.path.abspath(eval_config) + eval_config_dict = {} + with eval_config.open("r") as f: + eval_config_dict.update(yaml.safe_load(f))""" + eval_config = None + + """_run_llmsuite_pairs(config_inst, suite, namespace, data_namespace_mangling, client, + skip_evaluate, eval_config_dict)""" _run_llmsuite_pairs(config_inst, suite, namespace, data_namespace_mangling, client, - skip_evaluate, eval_config_dict) + skip_evaluate, eval_config) except Exception as e: raise RuntimeError(f"exception occur while running llmsuite jobs for {suite.path}") from e @@ -135,7 +140,7 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co @LOGGER.catch def _run_llmsuite_pairs(config: Config, suite, namespace: str, - data_namespace_mangling: bool, clients: Clients, skip_evaluate: bool, eval_conf: dict, + data_namespace_mangling: bool, clients: Clients, skip_evaluate: bool, eval_conf: str, output_path: str = None): from fate_llm.evaluate.scripts.eval_cli import run_job_eval client = clients['guest_0'] From 12ceea611cce7eb3e3c16529593a87e16879979e Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Thu, 6 Jun 2024 17:21:06 +0800 Subject: [PATCH 19/30] tidy up printout message Signed-off-by: Yu Wu --- python/fate_test/scripts/llmsuite_cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index a4ba894..1f0a054 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -90,7 +90,7 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co suites = _load_testsuites(includes=include, excludes=exclude, glob=glob, provider=provider, suffix="llmsuite.yaml", suite_type="llmsuite") for suite in suites: - echo.echo(f"\tllm groups({len(suite.pairs)}) {suite.path}") + echo.echo(f"\tllm suite count: ({len(suite.pairs)}) from {suite.path}") if not yes and not click.confirm("running?"): return @@ -165,6 +165,7 @@ def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None) echo.file(f"exception({exception_id}), error message:\n{err_msg}") # evaluate_only if job.evaluate_only and not skip_evaluate: + echo.echo(f"Evaluating job: {job.job_name} with tasks: {job.tasks}") job_results[job.job_name] = run_job_eval(job, eval_conf) # run pipeline job then evaluate else: @@ -209,6 +210,7 @@ def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None) "guest", guest_party_id, model_task_name, "0", "output", "output_model", "model_directory")""" job.peft_path = peft_path + echo.echo(f"Evaluating job: {job.job_name} with tasks: {job.tasks}") try: result = run_job_eval(job, eval_conf) job_results[job_name] = result From 5e61c9fd9009a695754cf1b7a2186369ad694f6e Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Thu, 6 Jun 2024 17:49:33 +0800 Subject: [PATCH 20/30] redirect default data upload path to updated yaml file Signed-off-by: Yu Wu --- python/fate_test/_config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/fate_test/_config.py b/python/fate_test/_config.py index e00b82d..ebf5479 100644 --- a/python/fate_test/_config.py +++ b/python/fate_test/_config.py @@ -36,11 +36,11 @@ # st_config_directory: examples/flow_test_template/hetero_lr/flow_test_config.yaml # directory stores testsuite file with min_test data sets to upload, -# default location={FATE}/examples/data/upload_config/min_test_data_testsuite.json -min_test_data_config: examples/data/upload_config/min_test_data_testsuite.json +# default location={FATE}/examples/data/upload_config/min_test_data_testsuite.yaml +min_test_data_config: examples/data/upload_config/min_test_data_testsuite.yaml # directory stores testsuite file with all example data sets to upload, -# default location={FATE}/examples/data/upload_config/all_examples_data_testsuite.json -all_examples_data_config: examples/data/upload_config/all_examples_data_testsuite.json +# default location={FATE}/examples/data/upload_config/all_examples_data_testsuite.yaml +all_examples_data_config: examples/data/upload_config/all_examples_data_testsuite.yaml # directory where FATE code locates, default installation location={FATE}/fate # python/ml -> $fate_base/python/ml From e2973b391dbc518268b75ae6fef93a8861b36ee7 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Thu, 6 Jun 2024 17:49:54 +0800 Subject: [PATCH 21/30] redirect default data upload path to updated yaml file Signed-off-by: Yu Wu --- python/fate_test/fate_test_config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/fate_test/fate_test_config.yaml b/python/fate_test/fate_test_config.yaml index f086391..92c1d63 100644 --- a/python/fate_test/fate_test_config.yaml +++ b/python/fate_test/fate_test_config.yaml @@ -11,11 +11,11 @@ performance_template_directory: examples/benchmark_performance/ flow_test_config_directory: examples/flow_test_template/hetero_lr/flow_test_config.yaml # directory stores testsuite file with min_test data sets to upload, -# default location={FATE}/examples/data/upload_config/min_test_data_testsuite.json -min_test_data_config: examples/data/upload_config/min_test_data_testsuite.json +# default location={FATE}/examples/data/upload_config/min_test_data_testsuite.yaml +min_test_data_config: examples/data/upload_config/min_test_data_testsuite.yaml # directory stores testsuite file with all example data sets to upload, -# default location={FATE}/examples/data/upload_config/all_examples_data_testsuite.json -all_examples_data_config: examples/data/upload_config/all_examples_data_testsuite.json +# default location={FATE}/examples/data/upload_config/all_examples_data_testsuite.yaml +all_examples_data_config: examples/data/upload_config/all_examples_data_testsuite.yaml # directory where FATE code locates, default installation location={FATE}/fate # python/ml -> $fate_base/python/ml From 85ec8f09e0050b1a86005bc32fd3560c63b0d895 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Fri, 7 Jun 2024 10:59:21 +0800 Subject: [PATCH 22/30] clean up code Signed-off-by: Yu Wu --- python/fate_test/scripts/llmsuite_cli.py | 10 ---------- python/fate_test/scripts/performance_cli.py | 2 -- python/fate_test/scripts/suite_cli.py | 2 -- 3 files changed, 14 deletions(-) diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index 1f0a054..2e74606 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -111,14 +111,7 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co from fate_llm.evaluate.utils.config import default_eval_config eval_config = default_eval_config() if not os.path.exists(eval_config): - """eval_config = os.path.abspath(eval_config) - eval_config_dict = {} - with eval_config.open("r") as f: - eval_config_dict.update(yaml.safe_load(f))""" eval_config = None - - """_run_llmsuite_pairs(config_inst, suite, namespace, data_namespace_mangling, client, - skip_evaluate, eval_config_dict)""" _run_llmsuite_pairs(config_inst, suite, namespace, data_namespace_mangling, client, skip_evaluate, eval_config) except Exception as e: @@ -206,9 +199,6 @@ def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None) "party_id": guest_party_id, "model_task_name": model_task_name} ) - """peft_path = os.path.join(config.fate_base, "fate_flow", "model", job_id, - "guest", guest_party_id, model_task_name, - "0", "output", "output_model", "model_directory")""" job.peft_path = peft_path echo.echo(f"Evaluating job: {job.job_name} with tasks: {job.tasks}") try: diff --git a/python/fate_test/scripts/performance_cli.py b/python/fate_test/scripts/performance_cli.py index cc5afc7..ea550fb 100644 --- a/python/fate_test/scripts/performance_cli.py +++ b/python/fate_test/scripts/performance_cli.py @@ -70,8 +70,6 @@ def run_performance(ctx, job_type, include, timeout, epochs, config_inst.update_conf(timeout=timeout) if ctx.obj["engine_run"][0] is not None: config_inst.update_conf(engine_run=dict(ctx.obj["engine_run"])) - """if ctx.obj["auto_increasing_sid"] is not None: - config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]""" namespace = ctx.obj["namespace"] yes = ctx.obj["yes"] data_namespace_mangling = ctx.obj["namespace_mangling"] diff --git a/python/fate_test/scripts/suite_cli.py b/python/fate_test/scripts/suite_cli.py index 7235c7d..f7eaa4b 100644 --- a/python/fate_test/scripts/suite_cli.py +++ b/python/fate_test/scripts/suite_cli.py @@ -70,8 +70,6 @@ def run_suite(ctx, include, exclude, glob, if timeout is not None: config_inst.update_conf(timeout=timeout) - """if ctx.obj["auto_increasing_sid"] is not None: - config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]""" if clean_data is None: clean_data = config_inst.clean_data namespace = ctx.obj["namespace"] From 1afa00840ebd75feded89442a9282cfde446f44b Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Fri, 7 Jun 2024 16:37:52 +0800 Subject: [PATCH 23/30] allow algorithm option by making include optional Signed-off-by: Yu Wu --- python/fate_test/scripts/llmsuite_cli.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index 2e74606..f327767 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -32,7 +32,7 @@ @click.command("llmsuite") -@click.option('-i', '--include', required=True, type=click.Path(exists=True), multiple=True, +@click.option('-i', '--include', required=False, type=click.Path(exists=True), multiple=True, metavar="", help="include *llmsuite.yaml under these paths") @click.option('-e', '--exclude', type=click.Path(exists=True), multiple=True, @@ -76,7 +76,8 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co echo.echo(f"llmsuite namespace: {namespace}", fg='red') echo.echo("loading llmsuites:") if algorithm_suite: - algorithm_suite_path_dict = {"pellm": os.path.join(ctx.obj.get("fate_base"), "fate_llm", "examples")} + algorithm_suite_path_dict = {"pellm": os.path.join(config_inst.fate_base, "fate_llm", "examples", "pellm")} + # algorithm_suite_path_dict = {"pellm": os.path.join(config_inst.fate_base,"examples", "pellm")} suite_paths = [] for alg in algorithm_suite: algorithm_suite_path = algorithm_suite_path_dict.get(alg, None) @@ -84,14 +85,16 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co echo.echo(f"algorithm suite {alg} not found", fg='red') else: suite_paths.append(algorithm_suite_path) - suites = _load_testsuites(includes=suite_paths, excludes=None, glob=None, provider=provider, + suites = _load_testsuites(includes=suite_paths, excludes=[], glob=None, provider=provider, suffix="llmsuite.yaml", suite_type="llmsuite") - else: + elif len(include) > 0: suites = _load_testsuites(includes=include, excludes=exclude, glob=glob, provider=provider, suffix="llmsuite.yaml", suite_type="llmsuite") - for suite in suites: - echo.echo(f"\tllm suite count: ({len(suite.pairs)}) from {suite.path}") - if not yes and not click.confirm("running?"): + for suite in suites: + echo.echo(f"\tllm suite count: ({len(suite.pairs)}) from {suite.path}") + if not yes and not click.confirm("running?"): + return + else: return echo.stdout_newline() From 36966aef16f74b611b7d6a2de3208d85011695f8 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Tue, 11 Jun 2024 11:18:01 +0800 Subject: [PATCH 24/30] edit doc Signed-off-by: Yu Wu --- doc/fate_test.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/fate_test.md b/doc/fate_test.md index c80efaf..8428d41 100644 --- a/doc/fate_test.md +++ b/doc/fate_test.md @@ -9,7 +9,7 @@ A collection of useful tools to running FATE tests and PipeLine tasks. ```bash pip install -e python/fate_test ``` -2. edit default fate\_test\_config.yaml +2. edit default fate\_test\_config.yaml; edit path to fate base/data base accordingly ```bash # edit priority config file with system default editor From 0e4940824c13ce244d9e5dceb727c6bff775c579 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Tue, 11 Jun 2024 17:55:37 +0800 Subject: [PATCH 25/30] load binding table in llmsuite config; add bind table option to fate-test llmsuite subcommand Signed-off-by: Yu Wu --- python/fate_test/_flow_client.py | 85 +++++++----------------- python/fate_test/_parser.py | 16 ++--- python/fate_test/scripts/_utils.py | 52 ++++++++++++++- python/fate_test/scripts/llmsuite_cli.py | 16 ++++- 4 files changed, 97 insertions(+), 72 deletions(-) diff --git a/python/fate_test/_flow_client.py b/python/fate_test/_flow_client.py index 6a29d19..c997bcb 100644 --- a/python/fate_test/_flow_client.py +++ b/python/fate_test/_flow_client.py @@ -41,6 +41,29 @@ def __init__(self, def set_address(self, address): self.address = address + def bind_table(self, data: Data, callback=None): + conf = data.config + conf['file'] = os.path.join(str(self._data_base_dir), conf.get('file')) + path = Path(conf.get('file')) + if not path.exists(): + raise Exception('The file is obtained from the fate flow client machine, but it does not exist, ' + f'please check the path: {path}') + response = self._client.table.bind_path(file=str(path), + namespace=data.namespace, + name=data.table_name) + try: + if callback is not None: + callback(response) + status = str(response['message']).lower() + else: + status = response["message"] + code = response["code"] + if code != 0: + raise RuntimeError(f"Return code {code} != 0, bind path failed") + except BaseException: + raise ValueError(f"Bind path failed, response={response}") + return status + def transform_local_file_to_dataframe(self, data: Data, callback=None, output_path=None): #data_warehouse = self.upload_data(data, callback, output_path) #status = self.transform_to_dataframe(data.namespace, data.table_name, data_warehouse, callback) @@ -82,44 +105,6 @@ def upload_file_and_convert_to_dataframe(self, data: Data, callback=None, output self._awaiting(job_id, "local", 0) return status - """def upload_data(self, data: Data, callback=None, output_path=None): - response, file_path = self._upload_data(data, output_path=output_path) - try: - if callback is not None: - callback(response) - code = response["code"] - if code != 0: - raise ValueError(f"Return code {code}!=0") - - namespace = response["data"]["namespace"] - name = response["data"]["name"] - job_id = response["job_id"] - except BaseException: - raise ValueError(f"Upload data fails, response={response}") - # self.monitor_status(job_id, role=self.role, party_id=self.party_id) - self._awaiting(job_id, "local", 0) - - return dict(namespace=namespace, name=name) - - def transform_to_dataframe(self, namespace, table_name, data_warehouse, callback=None): - response = self._client.data.dataframe_transformer(namespace=namespace, - name=table_name, - data_warehouse=data_warehouse) - - try: - if callback is not None: - callback(response) - status = self._awaiting(response["job_id"], "local", 0) - status = str(status).lower() - else: - status = response["retmsg"] - - except Exception as e: - raise RuntimeError(f"upload data failed") from e - job_id = response["job_id"] - self._awaiting(job_id, "local", 0) - return status""" - def delete_data(self, data: Data): try: table_name = data.config['table_name'] if data.config.get( @@ -154,27 +139,6 @@ def _awaiting(self, job_id, role, party_id, callback=None): callback(response) time.sleep(1) - """def _upload_data(self, data, output_path=None, verbose=0, destroy=1): - conf = data.config - # if conf.get("engine", {}) != "PATH": - if output_path is not None: - conf['file'] = os.path.join(os.path.abspath(output_path), os.path.basename(conf.get('file'))) - else: - if _config.data_switch is not None: - conf['file'] = os.path.join(str(self._cache_directory), os.path.basename(conf.get('file'))) - else: - conf['file'] = os.path.join(str(self._data_base_dir), conf.get('file')) - path = Path(conf.get('file')) - if not path.exists(): - raise Exception('The file is obtained from the fate flow client machine, but it does not exist, ' - f'please check the path: {path}') - response = self._client.data.upload(file=str(path), - head=data.head, - meta=data.meta, - extend_sid=data.extend_sid, - partitions=data.partitions) - return response, conf["file"]""" - def _output_data_table(self, job_id, role, party_id, task_name): response = self._client.output.data_table(job_id, role=role, party_id=party_id, task_name=task_name) if response.get("code") is not None: @@ -223,7 +187,7 @@ def get_version(self): """def _add_notes(self, job_id, role, party_id, notes): data = dict(job_id=job_id, role=role, party_id=party_id, notes=notes) response = AddNotesResponse(self._post(url='job/update', json=data)) - return response""" + return response def _table_bind(self, data): response = self._post(url='table/bind', json=data) @@ -235,6 +199,7 @@ def _table_bind(self, data): except Exception as e: raise RuntimeError(f"table bind error: {response}") from e return response + """ class Status(object): diff --git a/python/fate_test/_parser.py b/python/fate_test/_parser.py index da4918c..901bd63 100644 --- a/python/fate_test/_parser.py +++ b/python/fate_test/_parser.py @@ -19,7 +19,6 @@ from pathlib import Path import prettytable -# import json from ruamel import yaml from fate_test import _config @@ -62,19 +61,20 @@ def _chain_hooks(hook_funcs, d): class Data(object): - def __init__(self, config: dict, role_str: str): + def __init__(self, config: dict, role_str: str, for_upload=True): self.config = config self.file = config.get("file", "") - self.meta = config.get("meta", {}) - self.partitions = config.get("partitions", 4) - self.head = config.get("head", True) - self.extend_sid = config.get("extend_sid", True) self.namespace = config.get("namespace", "") self.table_name = config.get("table_name", "") self.role_str = role_str + if for_upload: + self.meta = config.get("meta", {}) + self.partitions = config.get("partitions", 4) + self.head = config.get("head", True) + self.extend_sid = config.get("extend_sid", True) @staticmethod - def load(config, path: Path): + def load(config, path: Path, for_upload=True): kwargs = {} for field_name in config.keys(): if field_name not in ["file", "role"]: @@ -86,7 +86,7 @@ def load(config, path: Path): else: kwargs["file"] = file_path role_str = config.get("role") if config.get("role") != "guest" else "guest_0" - return Data(config=kwargs, role_str=role_str) + return Data(config=kwargs, role_str=role_str, for_upload=for_upload) def update(self, config: Config): if config.extend_sid is not None: diff --git a/python/fate_test/scripts/_utils.py b/python/fate_test/scripts/_utils.py index 56e7b28..50177fe 100644 --- a/python/fate_test/scripts/_utils.py +++ b/python/fate_test/scripts/_utils.py @@ -9,10 +9,10 @@ from fate_test._client import Clients from fate_test._config import Config -from fate_test._flow_client import DataProgress, UploadDataResponse, QueryJobResponse +from fate_test._flow_client import DataProgress, UploadDataResponse, QueryJobResponse, Status from fate_test._io import echo, LOGGER, set_logger from fate_test._parser import (Testsuite, BenchmarkSuite, PerformanceSuite, FinalStatus, - DATA_LOAD_HOOK, CONF_LOAD_HOOK, DSL_LOAD_HOOK) + DATA_LOAD_HOOK, CONF_LOAD_HOOK, DSL_LOAD_HOOK, Data) def _big_data_task(includes, guest_data_size, host_data_size, guest_feature_num, host_feature_num, host_data_type, @@ -87,8 +87,18 @@ def _find_testsuite_files(path): elif suite_type == "performance": suite = PerformanceSuite.load(suite_path.resolve()) elif suite_type == "llmsuite": + from ruamel import yaml from fate_llm.evaluate.utils import LlmSuite suite = LlmSuite.load(suite_path.resolve()) + # add data, if any provided + with suite_path.resolve().open("r") as f: + suite_config = yaml.safe_load(f) + dataset = [] + for d in suite_config.get("data"): + d = DATA_LOAD_HOOK.hook(d) + dataset.append(Data.load(d, suite_path, for_upload=False)) + suite.dataset = dataset + # add job status suite_status = {} for pair in suite.pairs: for job in pair.jobs: @@ -104,6 +114,44 @@ def _find_testsuite_files(path): return suites +@LOGGER.catch +def _bind_data(clients: Clients, suite, config: Config): + with click.progressbar(length=len(suite.dataset), + label="dataset", + show_eta=False, + show_pos=True, + width=24) as bar: + for i, data in enumerate(suite.dataset): + data.update(config) + data_progress = DataProgress(f"{data.role_str}<-{data.namespace}.{data.table_name}") + + def update_bar(n_step): + bar.item_show_func = lambda x: data_progress.show() + time.sleep(0.1) + bar.update(n_step) + + def _call_back(resp): + if isinstance(resp, Status): + echo.file(f"[table] bind: {resp}") + update_bar(0) + + try: + echo.stdout_newline() + status = clients[data.role_str].bind_table(data,_call_back) + time.sleep(1) + if status != 'success': + raise RuntimeError(f"binding {i + 1}th data for {suite.path} {status}") + bar.update(1) + + except Exception: + exception_id = str(uuid.uuid1()) + echo.file(f"exception({exception_id})") + LOGGER.exception(f"exception id: {exception_id}") + echo.echo(f"bind {i + 1}th data {data.config} to {data.role_str} fail, exception_id: {exception_id}") + # raise RuntimeError(f"exception uploading {i + 1}th data") from e + + + @LOGGER.catch def _upload_data(clients: Clients, suite, config: Config, output_path=None, **kwargs): if kwargs.get("partitions") is not None: diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index f327767..0f080df 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -27,7 +27,7 @@ from fate_test._io import LOGGER, echo from fate_test._parser import record_non_success_jobs, non_success_summary from fate_test.scripts._options import SharedOptions -from fate_test.scripts._utils import _load_testsuites, _load_module_from_script +from fate_test.scripts._utils import _load_testsuites, _load_module_from_script, _bind_data from fate_test.utils import extract_job_status @@ -50,9 +50,14 @@ help='Path to FATE Llm evaluation config. If none, use default config.') @click.option('--skip-evaluate', is_flag=True, default=False, help="skip evaluation after training model") +@click.option("--skip-data", is_flag=True, default=False, + help="skip binding table specified in llmsuite") +@click.option("--data-only", is_flag=True, default=False, + help="bind data only") @SharedOptions.get_shared_options(hidden=True) @click.pass_context -def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_cores, timeout, eval_config, skip_evaluate, **kwargs): +def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_cores, timeout, eval_config, skip_evaluate, + skip_data, data_only, **kwargs): """ process llmsuite """ @@ -108,6 +113,13 @@ def run_llmsuite(ctx, include, exclude, algorithm_suite, glob, provider, task_co start = time.time() echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red') os.environ['enable_pipeline_job_info_callback'] = '1' + if not skip_data: + try: + _bind_data(client, suite, config_inst) + except Exception as e: + raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e + if data_only: + continue try: # eval_config_dict = {} if not eval_config: From 4f706ca20faf5f8495c3206a5226c0dd0fa219e8 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Tue, 11 Jun 2024 19:02:11 +0800 Subject: [PATCH 26/30] fix bind table api Signed-off-by: Yu Wu --- python/fate_test/_flow_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/fate_test/_flow_client.py b/python/fate_test/_flow_client.py index c997bcb..49f6bdd 100644 --- a/python/fate_test/_flow_client.py +++ b/python/fate_test/_flow_client.py @@ -48,7 +48,7 @@ def bind_table(self, data: Data, callback=None): if not path.exists(): raise Exception('The file is obtained from the fate flow client machine, but it does not exist, ' f'please check the path: {path}') - response = self._client.table.bind_path(file=str(path), + response = self._client.table.bind_path(path=str(path), namespace=data.namespace, name=data.table_name) try: From 64edd3ca3ebe943679e58a7f98597276bf403b8b Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Tue, 11 Jun 2024 19:39:01 +0800 Subject: [PATCH 27/30] fix empty dataset for llmsuite Signed-off-by: Yu Wu --- python/fate_test/scripts/_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/fate_test/scripts/_utils.py b/python/fate_test/scripts/_utils.py index 50177fe..1f0552f 100644 --- a/python/fate_test/scripts/_utils.py +++ b/python/fate_test/scripts/_utils.py @@ -94,7 +94,7 @@ def _find_testsuite_files(path): with suite_path.resolve().open("r") as f: suite_config = yaml.safe_load(f) dataset = [] - for d in suite_config.get("data"): + for d in suite_config.get("data", {}): d = DATA_LOAD_HOOK.hook(d) dataset.append(Data.load(d, suite_path, for_upload=False)) suite.dataset = dataset @@ -116,6 +116,8 @@ def _find_testsuite_files(path): @LOGGER.catch def _bind_data(clients: Clients, suite, config: Config): + if not suite.dataset: + return with click.progressbar(length=len(suite.dataset), label="dataset", show_eta=False, From 48e0e74e5241453251e5ac9f5f05075c44829a82 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Thu, 20 Jun 2024 20:13:34 +0800 Subject: [PATCH 28/30] only assign 'failed' status to job if error occurs when running pipeline module Signed-off-by: Yu Wu --- python/fate_test/scripts/llmsuite_cli.py | 78 +++++++++++++----------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/python/fate_test/scripts/llmsuite_cli.py b/python/fate_test/scripts/llmsuite_cli.py index 0f080df..1201151 100644 --- a/python/fate_test/scripts/llmsuite_cli.py +++ b/python/fate_test/scripts/llmsuite_cli.py @@ -168,8 +168,9 @@ def _run_llmsuite_pairs(config: Config, suite, namespace: str, def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None): exception_id = str(uuid.uuid1()) - suite.update_status(pair_name=pair.pair_name, job_name=job_name, job_id=job_id, exception_id=exception_id, status=status, - event=event, time_elapsed=time_elapsed) + if status is not None: + suite.update_status(pair_name=pair.pair_name, job_name=job_name, job_id=job_id, exception_id=exception_id, status=status, + event=event, time_elapsed=time_elapsed) echo.file(f"exception({exception_id}), error message:\n{err_msg}") # evaluate_only if job.evaluate_only and not skip_evaluate: @@ -177,50 +178,53 @@ def _raise(err_msg, status="failed", job_id=None, event=None, time_elapsed=None) job_results[job.job_name] = run_job_eval(job, eval_conf) # run pipeline job then evaluate else: - job_name, script_path, conf_path = job.job_name, job.script_path, job.conf_path - param = Config.load_from_file(conf_path) - mod = _load_module_from_script(script_path) - input_params = signature(mod.main).parameters - try: - # pipeline should return pretrained model path - pretrained_model_path = _run_mod(mod, input_params, config, param, - namespace, data_namespace_mangling) - job.pretrained_model_path = pretrained_model_path - job_info = os.environ.get("pipeline_job_info") - job_id, status, time_elapsed, event = extract_job_status(job_info, client, guest_party_id) - suite.update_status(pair_name=pair.pair_name, job_name=job_name, - job_id=job_id, status=status, - time_elapsed=time_elapsed, - event=event) + job_name, script_path, conf_path = job.job_name, job.script_path, job.conf_path + param = Config.load_from_file(conf_path) + mod = _load_module_from_script(script_path) + input_params = signature(mod.main).parameters - except Exception as e: - job_info = os.environ.get("pipeline_job_info") - if job_info is None: - job_id, status, time_elapsed, event = None, 'failed', None, None - else: + try: + # pipeline should return pretrained model path + pretrained_model_path = _run_mod(mod, input_params, config, param, + namespace, data_namespace_mangling) + job.pretrained_model_path = pretrained_model_path + job_info = os.environ.get("pipeline_job_info") job_id, status, time_elapsed, event = extract_job_status(job_info, client, guest_party_id) - _raise(e, job_id=job_id, status=status, event=event, time_elapsed=time_elapsed) - os.environ.pop("pipeline_job_info") + suite.update_status(pair_name=pair.pair_name, job_name=job_name, + job_id=job_id, status=status, + time_elapsed=time_elapsed, + event=event) + except Exception as e: + job_info = os.environ.get("pipeline_job_info") + if job_info is None: + job_id, status, time_elapsed, event = None, 'failed', None, None + else: + job_id, status, time_elapsed, event = extract_job_status(job_info, client, guest_party_id) + _raise(e, job_id=job_id, status=status, event=event, time_elapsed=time_elapsed) + os.environ.pop("pipeline_job_info") + continue + except Exception as e: + _raise(f"pipeline failed: {e}", status="not submitted") continue if not skip_evaluate: - model_task_name = "nn_0" - if job.model_task_name: - model_task_name = job.model_task_name - from lm_eval.utils import apply_template - peft_path = apply_template(job.peft_path_format, - {"fate_base": config.fate_base, - "job_id": job_id[0], - "party_id": guest_party_id, - "model_task_name": model_task_name} - ) - job.peft_path = peft_path - echo.echo(f"Evaluating job: {job.job_name} with tasks: {job.tasks}") try: + model_task_name = "nn_0" + if job.model_task_name: + model_task_name = job.model_task_name + from lm_eval.utils import apply_template + peft_path = apply_template(job.peft_path_format, + {"fate_base": config.fate_base, + "job_id": job_id[0], + "party_id": guest_party_id, + "model_task_name": model_task_name} + ) + job.peft_path = peft_path + echo.echo(f"Evaluating job: {job.job_name} with tasks: {job.tasks}") result = run_job_eval(job, eval_conf) job_results[job_name] = result except Exception as e: - _raise(f"evaluate failed: {e}") + _raise(f"evaluate failed: {e}", status=None) os.environ.pop("pipeline_job_info") suite_results[pair.pair_name] = job_results From 2233cd0aa55d8bd65a20b50a93bed9fe41502cd5 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Tue, 25 Jun 2024 10:24:34 +0800 Subject: [PATCH 29/30] update version Signed-off-by: Yu Wu --- python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/setup.py b/python/setup.py index de698d5..4484593 100644 --- a/python/setup.py +++ b/python/setup.py @@ -20,7 +20,7 @@ setup_kwargs = { "name": "fate-test", - "version": "2.2.0", + "version": "2.1.1", "description": "test tools for FATE", "long_description": 'FATE Test\n=========\n\nA collection of useful tools to running FATE\'s test.\n\n.. image:: images/tutorial.gif\n :align: center\n :alt: tutorial\n\nquick start\n-----------\n\n1. (optional) create virtual env\n\n .. code-block:: bash\n\n python -m venv venv\n source venv/bin/activate\n pip install -U pip\n\n\n2. install fate_test\n\n .. code-block:: bash\n\n pip install fate_test\n fate_test --help\n\n\n3. edit default fate_test_config.yaml\n\n .. code-block:: bash\n\n # edit priority config file with system default editor\n # filling some field according to comments\n fate_test config edit\n\n4. configure FATE-Pipeline and FATE-Flow Commandline server setting\n\n.. code-block:: bash\n\n # configure FATE-Pipeline server setting\n pipeline init --port 9380 --ip 127.0.0.1\n # configure FATE-Flow Commandline server setting\n flow init --port 9380 --ip 127.0.0.1\n\n5. run some fate_test suite\n\n .. code-block:: bash\n\n fate_test suite -i \n\n\n6. run some fate_test benchmark\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i \n\n7. useful logs or exception will be saved to logs dir with namespace shown in last step\n\ndevelop install\n---------------\nIt is more convenient to use the editable mode during development: replace step 2 with flowing steps\n\n.. code-block:: bash\n\n pip install -e ${FATE}/python/fate_client && pip install -e ${FATE}/python/fate_test\n\n\n\ncommand types\n-------------\n\n- suite: used for running testsuites, collection of FATE jobs\n\n .. code-block:: bash\n\n fate_test suite -i \n\n\n- benchmark-quality used for comparing modeling quality between FATE and other machine learning systems\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i \n\n\n\nconfiguration by examples\n--------------------------\n\n1. no need ssh tunnel:\n\n - 9999, service: service_a\n - 10000, service: service_b\n\n and both service_a, service_b can be requested directly:\n\n .. code-block:: yaml\n\n work_mode: 1 # 0 for standalone, 1 for cluster\n data_base_dir: \n parties:\n guest: [10000]\n host: [9999, 10000]\n arbiter: [9999]\n services:\n - flow_services:\n - {address: service_a, parties: [9999]}\n - {address: service_b, parties: [10000]}\n\n2. need ssh tunnel:\n\n - 9999, service: service_a\n - 10000, service: service_b\n\n service_a, can be requested directly while service_b don\'t,\n but you can request service_b in other node, say B:\n\n .. code-block:: yaml\n\n work_mode: 0 # 0 for standalone, 1 for cluster\n data_base_dir: \n parties:\n guest: [10000]\n host: [9999, 10000]\n arbiter: [9999]\n services:\n - flow_services:\n - {address: service_a, parties: [9999]}\n - flow_services:\n - {address: service_b, parties: [10000]}\n ssh_tunnel: # optional\n enable: true\n ssh_address: :\n ssh_username: \n ssh_password: # optional\n ssh_priv_key: "~/.ssh/id_rsa"\n\n\nTestsuite\n---------\n\nTestsuite is used for running a collection of jobs in sequence. Data used for jobs could be uploaded before jobs are\nsubmitted, and are cleaned when jobs finished. This tool is useful for FATE\'s release test.\n\ncommand options\n~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n fate_test suite --help\n\n1. include:\n\n .. code-block:: bash\n\n fate_test suite -i \n\n will run testsuites in *path1*\n\n2. exclude:\n\n .. code-block:: bash\n\n fate_test suite -i -e -e ...\n\n will run testsuites in *path1* but not in *path2* and *path3*\n\n3. glob:\n\n .. code-block:: bash\n\n fate_test suite -i -g "hetero*"\n\n will run testsuites in sub directory start with *hetero* of *path1*\n\n4. replace:\n\n .. code-block:: bash\n\n fate_test suite -i -r \'{"maxIter": 5}\'\n\n will find all key-value pair with key "maxIter" in `data conf` or `conf` or `dsl` and replace the value with 5\n\n\n5. skip-data:\n\n .. code-block:: bash\n\n fate_test suite -i --skip-data\n\n will run testsuites in *path1* without uploading data specified in *benchmark.json*.\n\n\n6. yes:\n\n .. code-block:: bash\n\n fate_test suite -i --yes\n\n will run testsuites in *path1* directly, skipping double check\n\n7. skip-dsl-jobs:\n\n .. code-block:: bash\n\n fate_test suite -i --skip-dsl-jobs\n\n will run testsuites in *path1* but skip all *tasks* in testsuites. It\'s would be useful when only pipeline tasks needed.\n\n8. skip-pipeline-jobs:\n\n .. code-block:: bash\n\n fate_test suite -i --skip-pipeline-jobs\n\n will run testsuites in *path1* but skip all *pipeline tasks* in testsuites. It\'s would be useful when only dsl tasks needed.\n\n\nBenchmark Quality\n------------------\n\nBenchmark-quality is used for comparing modeling quality between FATE\nand other machine learning systems. Benchmark produces a metrics comparison\nsummary for each benchmark job group.\n\n.. code-block:: bash\n\n fate_test benchmark-quality -i examples/benchmark_quality/hetero_linear_regression\n\n.. code-block:: bash\n\n +-------+--------------------------------------------------------------+\n | Data | Name |\n +-------+--------------------------------------------------------------+\n | train | {\'guest\': \'motor_hetero_guest\', \'host\': \'motor_hetero_host\'} |\n | test | {\'guest\': \'motor_hetero_guest\', \'host\': \'motor_hetero_host\'} |\n +-------+--------------------------------------------------------------+\n +------------------------------------+--------------------+--------------------+-------------------------+---------------------+\n | Model Name | explained_variance | r2_score | root_mean_squared_error | mean_squared_error |\n +------------------------------------+--------------------+--------------------+-------------------------+---------------------+\n | local-linear_regression-regression | 0.9035168452250094 | 0.9035070863155368 | 0.31340413289880553 | 0.09822215051805216 |\n | FATE-linear_regression-regression | 0.903146386539082 | 0.9031411831961411 | 0.3139977881119483 | 0.09859461093919596 |\n +------------------------------------+--------------------+--------------------+-------------------------+---------------------+\n +-------------------------+-----------+\n | Metric | All Match |\n +-------------------------+-----------+\n | explained_variance | True |\n | r2_score | True |\n | root_mean_squared_error | True |\n | mean_squared_error | True |\n +-------------------------+-----------+\n\ncommand options\n~~~~~~~~~~~~~~~\n\nuse the following command to show help message\n\n.. code-block:: bash\n\n fate_test benchmark-quality --help\n\n1. include:\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i \n\n will run benchmark testsuites in *path1*\n\n2. exclude:\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i -e -e ...\n\n will run benchmark testsuites in *path1* but not in *path2* and *path3*\n\n3. glob:\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i -g "hetero*"\n\n will run benchmark testsuites in sub directory start with *hetero* of *path1*\n\n4. tol:\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i -t 1e-3\n\n will run benchmark testsuites in *path1* with absolute tolerance of difference between metrics set to 0.001.\n If absolute difference between metrics is smaller than *tol*, then metrics are considered\n almost equal. Check benchmark testsuite `writing guide <#benchmark-testsuite>`_ on setting alternative tolerance.\n\n5. skip-data:\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i --skip-data\n\n will run benchmark testsuites in *path1* without uploading data specified in *benchmark.json*.\n\n\n6. yes:\n\n .. code-block:: bash\n\n fate_test benchmark-quality -i --yes\n\n will run benchmark testsuites in *path1* directly, skipping double check\n\n\nbenchmark testsuite\n~~~~~~~~~~~~~~~~~~~\n\nConfiguration of jobs should be specified in a benchmark testsuite whose file name ends\nwith "\\*benchmark.json". For benchmark testsuite example,\nplease refer `here <../../examples/benchmark_quality>`_.\n\nA benchmark testsuite includes the following elements:\n\n- data: list of local data to be uploaded before running FATE jobs\n\n - file: path to original data file to be uploaded, should be relative to testsuite or FATE installation path\n - head: whether file includes header\n - partition: number of partition for data storage\n - table_name: table name in storage\n - namespace: table namespace in storage\n - role: which role to upload the data, as specified in fate_test.config;\n naming format is: "{role_type}_{role_index}", index starts at 0\n\n .. code-block:: json\n\n "data": [\n {\n "file": "examples/data/motor_hetero_host.csv",\n "head": 1,\n "partition": 8,\n "table_name": "motor_hetero_host",\n "namespace": "experiment",\n "role": "host_0"\n }\n ]\n\n- job group: each group includes arbitrary number of jobs with paths to corresponding script and configuration\n\n - job: name of job to be run, must be unique within each group list\n\n - script: path to `testing script <#testing-script>`_, should be relative to testsuite\n - conf: path to job configuration file for script, should be relative to testsuite\n\n .. code-block:: json\n\n "local": {\n "script": "./local-linr.py",\n "conf": "./linr_config.yaml"\n }\n\n - compare_setting: additional setting for quality metrics comparison, currently only takes ``relative_tol``\n\n If metrics *a* and *b* satisfy *abs(a-b) <= max(relative_tol \\* max(abs(a), abs(b)), absolute_tol)*\n (from `math module `_),\n they are considered almost equal. In the below example, metrics from "local" and "FATE" jobs are\n considered almost equal if their relative difference is smaller than\n *0.05 \\* max(abs(local_metric), abs(pipeline_metric)*.\n\n .. code-block:: json\n\n "linear_regression-regression": {\n "local": {\n "script": "./local-linr.py",\n "conf": "./linr_config.yaml"\n },\n "FATE": {\n "script": "./fate-linr.py",\n "conf": "./linr_config.yaml"\n },\n "compare_setting": {\n "relative_tol": 0.01\n }\n }\n\n\ntesting script\n~~~~~~~~~~~~~~\n\nAll job scripts need to have ``Main`` function as an entry point for executing jobs; scripts should\nreturn two dictionaries: first with data information key-value pairs: {data_type}: {data_name_dictionary};\nthe second contains {metric_name}: {metric_value} key-value pairs for metric comparison.\n\nBy default, the final data summary shows the output from the job named "FATE"; if no such job exists,\ndata information returned by the first job is shown. For clear presentation, we suggest that user follow\nthis general `guideline <../../examples/data/README.md#data-set-naming-rule>`_ for data set naming. In the case of multi-host\ntask, consider numbering host as such:\n\n::\n\n {\'guest\': \'default_credit_homo_guest\',\n \'host_1\': \'default_credit_homo_host_1\',\n \'host_2\': \'default_credit_homo_host_2\'}\n\nReturned quality metrics of the same key are to be compared.\nNote that only **real-value** metrics can be compared.\n\n- FATE script: ``Main`` always has three inputs:\n\n - config: job configuration, `JobConfig <../fate_client/pipeline/utils/tools.py#L64>`_ object loaded from "fate_test_config.yaml"\n - param: job parameter setting, dictionary loaded from "conf" file specified in benchmark testsuite\n - namespace: namespace suffix, user-given *namespace* or generated timestamp string when using *namespace-mangling*\n\n- non-FATE script: ``Main`` always has one input:\n\n - param: job parameter setting, dictionary loaded from "conf" file specified in benchmark testsuite\n\n\ndata\n----\n\n`Data` sub-command is used for upload or delete dataset in suite\'s.\n\ncommand options\n~~~~~~~~~~~~~~~\n\n.. code-block:: bash\n\n fate_test data --help\n\n1. include:\n\n .. code-block:: bash\n\n fate_test data [upload|delete] -i \n\n will upload/delete dataset in testsuites in *path1*\n\n2. exclude:\n\n .. code-block:: bash\n\n fate_test data [upload|delete] -i -e -e ...\n\n will upload/delete dataset in testsuites in *path1* but not in *path2* and *path3*\n\n3. glob:\n\n .. code-block:: bash\n\n fate_test data [upload|delete] -i -g "hetero*"\n\n will upload/delete dataset in testsuites in sub directory start with *hetero* of *path1*\n\n\nfull command options\n---------------------\n\n.. click:: fate_test.scripts.cli:cli\n :prog: fate_test\n :show-nested:\n', "author": "FederatedAI", From c1069276c344e6f2e98a0a722641f6f3e45433f9 Mon Sep 17 00:00:00 2001 From: Yu Wu Date: Wed, 26 Jun 2024 14:57:20 +0800 Subject: [PATCH 30/30] edit doc Signed-off-by: Yu Wu --- RELEASE.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index edbd1b7..8567d14 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,8 @@ +## Release 2.1.1 +### Major Features and Improvments +> Fate-Test: FATE Automated Testing Tool +* Add new subcommand `llmsuite` for FATE-LLM training and evaluation + ## Release 2.1.0 ### Major Features and Improvements > Fate-Test: FATE Automated Testing Tool