From ea1501ac3bd7a331ee370f738ccfbbb2c137bfce Mon Sep 17 00:00:00 2001 From: changwangss Date: Thu, 15 Aug 2024 05:16:20 +0000 Subject: [PATCH 1/9] add finetuning job protocol. --- comps/cores/proto/api_protocol.py | 181 ++++++++++++++++++++++++++++++ comps/finetuning/env.py | 11 ++ comps/finetuning/finetuning.py | 14 +++ comps/finetuning/handlers.py | 76 +++++++++++++ 4 files changed, 282 insertions(+) create mode 100644 comps/finetuning/env.py create mode 100644 comps/finetuning/finetuning.py create mode 100644 comps/finetuning/handlers.py diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index 382982d27..d6e1e6ae3 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -463,3 +463,184 @@ def check_requests(request) -> Optional[JSONResponse]: ) return None + + +class Hyperparameters(BaseModel): + batch_size: Optional[Literal["auto"], int] = "auto" + """Number of examples in each batch. + A larger batch size means that model parameters are updated less frequently, but with lower variance.""" + + learning_rate_multiplier: Optional[Literal["auto"], float] = "auto" + """Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting.""" + + n_epochs: Optional[Literal["auto"], int] = "auto" + """The number of epochs to train the model for. + + An epoch refers to one full cycle through the training dataset. "auto" decides + the optimal number of epochs based on the size of the dataset. If setting the + number manually, we support any number between 1 and 50 epochs. + """ + +class FineTuningJobWandbIntegration(BaseModel): + project: str + """The name of the project that the new run will be created under.""" + + entity: Optional[str] = None + """The entity to use for the run. + + This allows you to set the team or username of the WandB user that you would + like associated with the run. If not set, the default entity for the registered + WandB API key is used. + """ + + name: Optional[str] = None + """A display name to set for the run. + + If not set, we will use the Job ID as the name. + """ + + tags: Optional[List[str]] = None + """A list of tags to be attached to the newly created run. + + These tags are passed through directly to WandB. Some default tags are generated + by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}". + """ + + +class FineTuningJobWandbIntegrationObject(BaseModel): + type: Literal["wandb"] + """The type of the integration being enabled for the fine-tuning job""" + + wandb: FineTuningJobWandbIntegration + """The settings for your integration with Weights and Biases. + + This payload specifies the project that metrics will be sent to. Optionally, you + can set an explicit display name for your run, add tags to your run, and set a + default entity (team, username, etc) to be associated with your run. + """ + +class FineTuningJobsRequest(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/fine-tuning/create + model: str + """The name of the model to fine-tune.""" + + training_file: str + """The ID of an uploaded file that contains training data.""" + + hyperparameters: Optional[Hyperparameters] + """The hyperparameters used for the fine-tuning job.""" + + suffix: Optional[str, None] = None + """A string of up to 64 characters that will be added to your fine-tuned model name.""" + + validation_file: Optional[str, None] = None + """The ID of an uploaded file that contains validation data.""" + + integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None + """A list of integrations to enable for your fine-tuning job.""" + + seed: Optional[str, None] = None + + +class Error(BaseModel): + code: str + """A machine-readable error code.""" + + message: str + """A human-readable error message.""" + + param: Optional[str] = None + """The parameter that was invalid, usually `training_file` or `validation_file`. + + This field will be null if the failure was not parameter-specific. + """ + + +class FineTuningJob(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/fine-tuning/object + id: str + """The object identifier, which can be referenced in the API endpoints.""" + + created_at: int + """The Unix timestamp (in seconds) for when the fine-tuning job was created.""" + + error: Optional[Error] = None + """ + For fine-tuning jobs that have `failed`, this will contain more information on + the cause of the failure. + """ + + fine_tuned_model: Optional[str] = None + """The name of the fine-tuned model that is being created. + + The value will be null if the fine-tuning job is still running. + """ + + finished_at: Optional[int] = None + """The Unix timestamp (in seconds) for when the fine-tuning job was finished. + + The value will be null if the fine-tuning job is still running. + """ + + hyperparameters: Hyperparameters + """The hyperparameters used for the fine-tuning job. + + See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) + for more details. + """ + + model: str + """The base model that is being fine-tuned.""" + + object: Literal["fine_tuning.job"] + """The object type, which is always "fine_tuning.job".""" + + organization_id: str + """The organization that owns the fine-tuning job.""" + + result_files: List[str] + """The compiled results file ID(s) for the fine-tuning job. + + You can retrieve the results with the + [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). + """ + + status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"] + """ + The current status of the fine-tuning job, which can be either + `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. + """ + + trained_tokens: Optional[int] = None + """The total number of billable tokens processed by this fine-tuning job. + + The value will be null if the fine-tuning job is still running. + """ + + training_file: str + """The file ID used for training. + + You can retrieve the training data with the + [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). + """ + + validation_file: Optional[str] = None + """The file ID used for validation. + + You can retrieve the validation results with the + [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents). + """ + + integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None + """A list of integrations to enable for this fine-tuning job.""" + + seed: int + """The seed used for the fine-tuning job.""" + + estimated_finish: Optional[int] = None + """ + The Unix timestamp (in seconds) for when the fine-tuning job is estimated to + finish. The value will be null if the fine-tuning job is not running. + """ diff --git a/comps/finetuning/env.py b/comps/finetuning/env.py new file mode 100644 index 000000000..2f24f7b3e --- /dev/null +++ b/comps/finetuning/env.py @@ -0,0 +1,11 @@ + +MODEL_CONFIG_FILE_MAP = { + "meta-llama/Llama-2-7b-chat-hf": "./models/llama-2-7b-chat-hf.yaml", + "mistralai/Mistral-7B-v0.1": "./models/mistral-7b-v0.1.yaml", +} + +DATASET_BASE_PATH = "datasets" + +CHECK_JOB_STATUS_INTERVAL = 5 # Check every 5 secs + +ray_client = None diff --git a/comps/finetuning/finetuning.py b/comps/finetuning/finetuning.py new file mode 100644 index 000000000..b26331282 --- /dev/null +++ b/comps/finetuning/finetuning.py @@ -0,0 +1,14 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from comps import opea_microservices, register_microservice + +from comps.cores.proto.api_protocol import FineTuningJobsRequest + +from handlers import ( + handle_create_finetuning_jobs, +) + +@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001) +def create_finetuning_jobs(request: FineTuningJobsRequest): + return handle_create_finetuning_jobs(request) diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py new file mode 100644 index 000000000..df3dc1aef --- /dev/null +++ b/comps/finetuning/handlers.py @@ -0,0 +1,76 @@ + +import os +import random +import time +import uuid +from typing import Any, Dict, List, Set +from pydantic_yaml import parse_yaml_raw_as, to_yaml_file + + +from comps.cores.proto.api_protocol import FineTuningJobsRequest, FineTuningJob + +from envs import ( + DATASET_BASE_PATH, + MODEL_CONFIG_FILE_MAP, + CHECK_JOB_STATUS_INTERVAL, + ray_client +) + +FineTuningJobID = str +running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {} +finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {} + + +def handle_create_finetuning_jobs(request: FineTuningJobsRequest): + base_model = request.model + train_file = request.training_file + train_file_path = os.path.join(DATASET_BASE_PATH, train_file) + + model_config_file = MODEL_CONFIG_FILE_MAP.get(base_model) + if not model_config_file: + raise HTTPException(status_code=404, detail=f"Base model '{base_model}' not supported!") + + if not os.path.exists(train_file_path): + raise HTTPException(status_code=404, detail=f"Training file '{train_file}' not found!") + + with open(model_config_file) as f: + finetune_config = parse_yaml_raw_as(FinetuneConfig, f) + + finetune_config.Dataset.train_file = train_file_path + + job = FineTuningJob( + id=f"ft-job-{uuid.uuid4()}", + model=base_model, + created_at=int(time.time()), + training_file=train_file, + hyperparameters={ + "n_epochs": finetune_config.Training.epochs, + "batch_size": finetune_config.Training.batch_size, + "learning_rate_multiplier": finetune_config.Training.learning_rate, + }, + status="running", + # TODO: Add seed in finetune config + seed=random.randint(0, 1000), + ) + + finetune_config_file = f"jobs/{job.id}.yaml" + to_yaml_file(finetune_config_file, finetune_config) + + global ray_client + ray_client = JobSubmissionClient() if ray_client is None else ray_client + + ray_job_id = ray_client.submit_job( + # Entrypoint shell command to execute + entrypoint=f"python finetune_runner.py --config_file {finetune_config_file}", + # Path to the local directory that contains the script.py file + runtime_env={"working_dir": "./"}, + ) + print(f"Submitted Ray job: {ray_job_id} ...") + + running_finetuning_jobs[job.id] = job + finetuning_job_to_ray_job[job.id] = ray_job_id + + # background_tasks.add_task(update_job_status, job.id) + + return job + From 2346ead2b26b3de18a358e2fba9a390f836d4eb7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 15 Aug 2024 05:17:33 +0000 Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/cores/proto/api_protocol.py | 34 +++++++++++++++++-------------- comps/finetuning/env.py | 2 ++ comps/finetuning/finetuning.py | 6 ++---- comps/finetuning/handlers.py | 15 +++++--------- 4 files changed, 28 insertions(+), 29 deletions(-) diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index d6e1e6ae3..24af09ed8 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -468,10 +468,15 @@ def check_requests(request) -> Optional[JSONResponse]: class Hyperparameters(BaseModel): batch_size: Optional[Literal["auto"], int] = "auto" """Number of examples in each batch. - A larger batch size means that model parameters are updated less frequently, but with lower variance.""" + + A larger batch size means that model parameters are updated less frequently, but with lower variance. + """ learning_rate_multiplier: Optional[Literal["auto"], float] = "auto" - """Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting.""" + """Scaling factor for the learning rate. + + A smaller learning rate may be useful to avoid overfitting. + """ n_epochs: Optional[Literal["auto"], int] = "auto" """The number of epochs to train the model for. @@ -481,6 +486,7 @@ class Hyperparameters(BaseModel): number manually, we support any number between 1 and 50 epochs. """ + class FineTuningJobWandbIntegration(BaseModel): project: str """The name of the project that the new run will be created under.""" @@ -509,7 +515,7 @@ class FineTuningJobWandbIntegration(BaseModel): class FineTuningJobWandbIntegrationObject(BaseModel): type: Literal["wandb"] - """The type of the integration being enabled for the fine-tuning job""" + """The type of the integration being enabled for the fine-tuning job.""" wandb: FineTuningJobWandbIntegration """The settings for your integration with Weights and Biases. @@ -519,6 +525,7 @@ class FineTuningJobWandbIntegrationObject(BaseModel): default entity (team, username, etc) to be associated with your run. """ + class FineTuningJobsRequest(BaseModel): # Ordered by official OpenAI API documentation # https://platform.openai.com/docs/api-reference/fine-tuning/create @@ -537,7 +544,7 @@ class FineTuningJobsRequest(BaseModel): validation_file: Optional[str, None] = None """The ID of an uploaded file that contains validation data.""" - integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None + integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None """A list of integrations to enable for your fine-tuning job.""" seed: Optional[str, None] = None @@ -567,10 +574,8 @@ class FineTuningJob(BaseModel): """The Unix timestamp (in seconds) for when the fine-tuning job was created.""" error: Optional[Error] = None - """ - For fine-tuning jobs that have `failed`, this will contain more information on - the cause of the failure. - """ + """For fine-tuning jobs that have `failed`, this will contain more information on + the cause of the failure.""" fine_tuned_model: Optional[str] = None """The name of the fine-tuned model that is being created. @@ -608,10 +613,8 @@ class FineTuningJob(BaseModel): """ status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"] - """ - The current status of the fine-tuning job, which can be either - `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`. - """ + """The current status of the fine-tuning job, which can be either + `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`.""" trained_tokens: Optional[int] = None """The total number of billable tokens processed by this fine-tuning job. @@ -640,7 +643,8 @@ class FineTuningJob(BaseModel): """The seed used for the fine-tuning job.""" estimated_finish: Optional[int] = None - """ - The Unix timestamp (in seconds) for when the fine-tuning job is estimated to - finish. The value will be null if the fine-tuning job is not running. + """The Unix timestamp (in seconds) for when the fine-tuning job is estimated to + finish. + + The value will be null if the fine-tuning job is not running. """ diff --git a/comps/finetuning/env.py b/comps/finetuning/env.py index 2f24f7b3e..b4ea5c6b5 100644 --- a/comps/finetuning/env.py +++ b/comps/finetuning/env.py @@ -1,3 +1,5 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 MODEL_CONFIG_FILE_MAP = { "meta-llama/Llama-2-7b-chat-hf": "./models/llama-2-7b-chat-hf.yaml", diff --git a/comps/finetuning/finetuning.py b/comps/finetuning/finetuning.py index b26331282..47dbed8e6 100644 --- a/comps/finetuning/finetuning.py +++ b/comps/finetuning/finetuning.py @@ -1,13 +1,11 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from comps import opea_microservices, register_microservice +from handlers import handle_create_finetuning_jobs +from comps import opea_microservices, register_microservice from comps.cores.proto.api_protocol import FineTuningJobsRequest -from handlers import ( - handle_create_finetuning_jobs, -) @register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001) def create_finetuning_jobs(request: FineTuningJobsRequest): diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py index df3dc1aef..893f81809 100644 --- a/comps/finetuning/handlers.py +++ b/comps/finetuning/handlers.py @@ -1,20 +1,16 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 import os import random import time import uuid from typing import Any, Dict, List, Set -from pydantic_yaml import parse_yaml_raw_as, to_yaml_file - -from comps.cores.proto.api_protocol import FineTuningJobsRequest, FineTuningJob +from envs import CHECK_JOB_STATUS_INTERVAL, DATASET_BASE_PATH, MODEL_CONFIG_FILE_MAP, ray_client +from pydantic_yaml import parse_yaml_raw_as, to_yaml_file -from envs import ( - DATASET_BASE_PATH, - MODEL_CONFIG_FILE_MAP, - CHECK_JOB_STATUS_INTERVAL, - ray_client -) +from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobsRequest FineTuningJobID = str running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {} @@ -73,4 +69,3 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest): # background_tasks.add_task(update_job_status, job.id) return job - From 003acaa687ab80c08288cfdb97396c4eec39c6ce Mon Sep 17 00:00:00 2001 From: changwangss Date: Thu, 15 Aug 2024 08:47:26 +0000 Subject: [PATCH 3/9] update create finetuning job code. --- comps/cores/proto/api_protocol.py | 22 +++++----- comps/finetuning/{env.py => envs.py} | 8 ++++ comps/finetuning/finetuning.py | 3 ++ comps/finetuning/handlers.py | 16 +++++++- .../finetuning/models/llama-2-7b-chat-hf.yaml | 40 +++++++++++++++++++ 5 files changed, 76 insertions(+), 13 deletions(-) rename comps/finetuning/{env.py => envs.py} (66%) create mode 100644 comps/finetuning/models/llama-2-7b-chat-hf.yaml diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index 24af09ed8..b533e04c2 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -466,19 +466,19 @@ def check_requests(request) -> Optional[JSONResponse]: class Hyperparameters(BaseModel): - batch_size: Optional[Literal["auto"], int] = "auto" + batch_size: Optional[Union[Literal["auto"], int]] = "auto" """Number of examples in each batch. A larger batch size means that model parameters are updated less frequently, but with lower variance. """ - learning_rate_multiplier: Optional[Literal["auto"], float] = "auto" + learning_rate_multiplier: Optional[Union[Literal["auto"], float]] = "auto" """Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting. """ - n_epochs: Optional[Literal["auto"], int] = "auto" + n_epochs: Optional[Union[Literal["auto"], int]] = "auto" """The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. "auto" decides @@ -535,19 +535,19 @@ class FineTuningJobsRequest(BaseModel): training_file: str """The ID of an uploaded file that contains training data.""" - hyperparameters: Optional[Hyperparameters] + hyperparameters: Optional[Hyperparameters] = Hyperparameters """The hyperparameters used for the fine-tuning job.""" - suffix: Optional[str, None] = None + suffix: Optional[str] = None """A string of up to 64 characters that will be added to your fine-tuned model name.""" - validation_file: Optional[str, None] = None + validation_file: Optional[str] = None """The ID of an uploaded file that contains validation data.""" integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None """A list of integrations to enable for your fine-tuning job.""" - seed: Optional[str, None] = None + seed: Optional[str] = None class Error(BaseModel): @@ -599,13 +599,13 @@ class FineTuningJob(BaseModel): model: str """The base model that is being fine-tuned.""" - object: Literal["fine_tuning.job"] + object: Literal["fine_tuning.job"] = "fine_tuning.job" """The object type, which is always "fine_tuning.job".""" - organization_id: str + organization_id: Optional[str] = None """The organization that owns the fine-tuning job.""" - result_files: List[str] + result_files: List[str] = None """The compiled results file ID(s) for the fine-tuning job. You can retrieve the results with the @@ -639,7 +639,7 @@ class FineTuningJob(BaseModel): integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None """A list of integrations to enable for this fine-tuning job.""" - seed: int + seed: Optional[int] = None """The seed used for the fine-tuning job.""" estimated_finish: Optional[int] = None diff --git a/comps/finetuning/env.py b/comps/finetuning/envs.py similarity index 66% rename from comps/finetuning/env.py rename to comps/finetuning/envs.py index b4ea5c6b5..0d4d41cd5 100644 --- a/comps/finetuning/env.py +++ b/comps/finetuning/envs.py @@ -1,12 +1,20 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os + MODEL_CONFIG_FILE_MAP = { "meta-llama/Llama-2-7b-chat-hf": "./models/llama-2-7b-chat-hf.yaml", "mistralai/Mistral-7B-v0.1": "./models/mistral-7b-v0.1.yaml", } DATASET_BASE_PATH = "datasets" +JOBS_PATH = "jobs" +if not os.path.exists(DATASET_BASE_PATH): + os.path.mkdir(DATASET_BASE_PATH) + +if not os.path.exists(JOBS_PATH): + os.path.mkdir(JOBS_PATH) CHECK_JOB_STATUS_INTERVAL = 5 # Check every 5 secs diff --git a/comps/finetuning/finetuning.py b/comps/finetuning/finetuning.py index 47dbed8e6..36d439bef 100644 --- a/comps/finetuning/finetuning.py +++ b/comps/finetuning/finetuning.py @@ -10,3 +10,6 @@ @register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001) def create_finetuning_jobs(request: FineTuningJobsRequest): return handle_create_finetuning_jobs(request) + +if __name__ == "__main__": + opea_microservices["opea_service@finetuning"].start() diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py index 893f81809..13160acb8 100644 --- a/comps/finetuning/handlers.py +++ b/comps/finetuning/handlers.py @@ -8,10 +8,14 @@ from typing import Any, Dict, List, Set from envs import CHECK_JOB_STATUS_INTERVAL, DATASET_BASE_PATH, MODEL_CONFIG_FILE_MAP, ray_client +from finetune_config import FinetuneConfig + from pydantic_yaml import parse_yaml_raw_as, to_yaml_file from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobsRequest +from ray.job_submission import JobSubmissionClient + FineTuningJobID = str running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {} finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {} @@ -23,6 +27,7 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest): train_file_path = os.path.join(DATASET_BASE_PATH, train_file) model_config_file = MODEL_CONFIG_FILE_MAP.get(base_model) + if not model_config_file: raise HTTPException(status_code=404, detail=f"Base model '{base_model}' not supported!") @@ -34,6 +39,15 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest): finetune_config.Dataset.train_file = train_file_path + if request.hyperparameters.epochs != "auto": + finetune_config.Training.epochs = request.hyperparameters.epochs + + if request.hyperparameters.batch_size != "auto": + finetune_config.Training.batch_size = request.hyperparameters.batch_size + + if request.hyperparameters.learning_rate_multiplier != "auto": + finetune_config.Training.learning_rate = request.hyperparameters.learning_rate_multiplier + job = FineTuningJob( id=f"ft-job-{uuid.uuid4()}", model=base_model, @@ -66,6 +80,4 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest): running_finetuning_jobs[job.id] = job finetuning_job_to_ray_job[job.id] = ray_job_id - # background_tasks.add_task(update_job_status, job.id) - return job diff --git a/comps/finetuning/models/llama-2-7b-chat-hf.yaml b/comps/finetuning/models/llama-2-7b-chat-hf.yaml new file mode 100644 index 000000000..ab62383d2 --- /dev/null +++ b/comps/finetuning/models/llama-2-7b-chat-hf.yaml @@ -0,0 +1,40 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +General: + base_model: meta-llama/Llama-2-7b-chat-hf + gpt_base_model: false + output_dir: /tmp/llm-ray/output + save_strategy: no + config: + trust_remote_code: false + use_auth_token: null + lora_config: + task_type: CAUSAL_LM + r: 8 + lora_alpha: 32 + lora_dropout: 0.1 + target_modules: + - q_proj + - v_proj + enable_gradient_checkpointing: false +Dataset: + train_file: examples/data/sample_finetune_data_small.jsonl + group: false + validation_file: null + validation_split_percentage: 5 +Training: + optimizer: adamw_torch + batch_size: 2 + epochs: 3 + learning_rate: 1.0e-05 + lr_scheduler: linear + weight_decay: 0.0 + mixed_precision: bf16 + device: cpu + num_training_workers: 2 + resources_per_worker: + CPU: 32 + accelerate_mode: DDP + gradient_accumulation_steps: 1 + logging_steps: 10 From 93c9f5ea6bccdf54530b905dc714699d9a685d68 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 15 Aug 2024 08:47:59 +0000 Subject: [PATCH 4/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/finetuning/finetuning.py | 1 + comps/finetuning/handlers.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/comps/finetuning/finetuning.py b/comps/finetuning/finetuning.py index 36d439bef..4c3945ce3 100644 --- a/comps/finetuning/finetuning.py +++ b/comps/finetuning/finetuning.py @@ -11,5 +11,6 @@ def create_finetuning_jobs(request: FineTuningJobsRequest): return handle_create_finetuning_jobs(request) + if __name__ == "__main__": opea_microservices["opea_service@finetuning"].start() diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py index 13160acb8..bfb12fc33 100644 --- a/comps/finetuning/handlers.py +++ b/comps/finetuning/handlers.py @@ -9,13 +9,11 @@ from envs import CHECK_JOB_STATUS_INTERVAL, DATASET_BASE_PATH, MODEL_CONFIG_FILE_MAP, ray_client from finetune_config import FinetuneConfig - from pydantic_yaml import parse_yaml_raw_as, to_yaml_file +from ray.job_submission import JobSubmissionClient from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobsRequest -from ray.job_submission import JobSubmissionClient - FineTuningJobID = str running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {} finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {} From 8a7061b49a7ed16083c2f64b8421ac792ea1495f Mon Sep 17 00:00:00 2001 From: changwangss Date: Thu, 15 Aug 2024 10:23:49 +0000 Subject: [PATCH 5/9] update creating finetuning job code. --- comps/cores/proto/api_protocol.py | 2 +- comps/finetuning/finetune_config.py | 156 ++++++++++++++++++++++++++++ comps/finetuning/handlers.py | 19 ++-- 3 files changed, 168 insertions(+), 9 deletions(-) create mode 100644 comps/finetuning/finetune_config.py diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index b533e04c2..8db7251d1 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -535,7 +535,7 @@ class FineTuningJobsRequest(BaseModel): training_file: str """The ID of an uploaded file that contains training data.""" - hyperparameters: Optional[Hyperparameters] = Hyperparameters + hyperparameters: Optional[Hyperparameters] = None """The hyperparameters used for the fine-tuning job.""" suffix: Optional[str] = None diff --git a/comps/finetuning/finetune_config.py b/comps/finetuning/finetune_config.py new file mode 100644 index 000000000..c53b36131 --- /dev/null +++ b/comps/finetuning/finetune_config.py @@ -0,0 +1,156 @@ +# Copyright 2023 The LLM-on-Ray Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import List, Optional + +from pydantic import BaseModel, validator + +PRECISION_BF16 = "bf16" +PRECISION_FP16 = "fp16" +PRECISION_NO = "no" + +DEVICE_CPU = "cpu" +DEVICE_HPU = "hpu" +DEVICE_GPU = "gpu" + +ACCELERATE_STRATEGY_DDP = "DDP" +ACCELERATE_STRATEGY_FSDP = "FSDP" +ACCELERATE_STRATEGY_DEEPSPEED = "DEEPSPEED" + + +class GeneralConfig(BaseModel): + trust_remote_code: bool + use_auth_token: Optional[str] + + +class LoraConfig(BaseModel): + task_type: str + r: int + lora_alpha: int + lora_dropout: float + target_modules: Optional[List[str]] = None + + +class DeltatunerConfig(BaseModel): + algo: str + denas: bool + best_model_structure: str + + +class General(BaseModel): + base_model: str + tokenizer_name: Optional[str] = None + gaudi_config_name: Optional[str] = None + gpt_base_model: bool + output_dir: str + resume_from_checkpoint: Optional[str] = None + save_strategy: str = "no" + config: GeneralConfig + lora_config: Optional[LoraConfig] = None + deltatuner_config: Optional[DeltatunerConfig] = None + enable_gradient_checkpointing: bool = False + + +class Dataset(BaseModel): + train_file: str + validation_file: Optional[str] + validation_split_percentage: int + max_length: int = 512 + group: bool = True + block_size: int = 512 + shuffle: bool = False + + +class RayResourceConfig(BaseModel): + CPU: int + GPU: int = 0 + HPU: int = 0 + + +class Training(BaseModel): + optimizer: str + batch_size: int + epochs: int + max_train_steps: Optional[int] = None + learning_rate: float + lr_scheduler: str + weight_decay: float + device: str = DEVICE_CPU + hpu_execution_mode: str = "lazy" + num_training_workers: int + resources_per_worker: RayResourceConfig + accelerate_mode: str = ACCELERATE_STRATEGY_DDP + mixed_precision: str = PRECISION_NO + gradient_accumulation_steps: int = 1 + logging_steps: int = 10 + deepspeed_config_file: str = "" + + @validator("device") + def check_device(cls, v: str): + # will convert to lower case + if v: + assert v.lower() in [DEVICE_CPU, DEVICE_GPU, DEVICE_HPU] + return v.lower() + + @validator("hpu_execution_mode") + def check_hpu_execution_mode(cls, v: str): + if v: + assert v in ["lazy", "eager", "eager.compile"] + return v + + @validator("accelerate_mode") + def check_accelerate_mode(cls, v: str): + if v: + assert v in [ + ACCELERATE_STRATEGY_DDP, + ACCELERATE_STRATEGY_FSDP, + ACCELERATE_STRATEGY_DEEPSPEED, + ] + return v + + @validator("mixed_precision") + def check_mixed_precision(cls, v: str): + if v: + assert v in [PRECISION_BF16, PRECISION_FP16, PRECISION_NO] + return v + + @validator("logging_steps") + def check_logging_steps(cls, v: int): + assert v > 0 + return v + + # @model_validator(mode='after') + # def check_device_and_accelerate_mode(self) -> "Training": + # dev = self.device + # res = self.resources_per_worker + # mode = self.accelerate_mode + # if dev == "CPU": + # if res.GPU is not None and res.GPU > 0: + # raise ValueError("Please not specified GPU resource when use CPU only in Ray.") + # if mode != "CPU_DDP": + # raise ValueError("Please specified CPU related accelerate mode when use CPU only in Ray.") + # elif dev == "GPU": + # if res.GPU is None or res.GPU == 0: + # raise ValueError("Please specified GPU resource when use GPU to fine tune in Ray.") + # if mode not in ["GPU_DDP", "GPU_FSDP"]: + # raise ValueError("Please speicifed GPU related accelerate mode when use GPU to fine tune in Ray.") + + # return self + + +class FinetuneConfig(BaseModel): + General: General + Dataset: Dataset + Training: Training diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py index bfb12fc33..08a0f9a59 100644 --- a/comps/finetuning/handlers.py +++ b/comps/finetuning/handlers.py @@ -9,11 +9,13 @@ from envs import CHECK_JOB_STATUS_INTERVAL, DATASET_BASE_PATH, MODEL_CONFIG_FILE_MAP, ray_client from finetune_config import FinetuneConfig + from pydantic_yaml import parse_yaml_raw_as, to_yaml_file -from ray.job_submission import JobSubmissionClient from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobsRequest +from ray.job_submission import JobSubmissionClient + FineTuningJobID = str running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {} finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {} @@ -36,15 +38,16 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest): finetune_config = parse_yaml_raw_as(FinetuneConfig, f) finetune_config.Dataset.train_file = train_file_path + + if request.hyperparameters is not None: + if request.hyperparameters.epochs != "auto": + finetune_config.Training.epochs = request.hyperparameters.epochs - if request.hyperparameters.epochs != "auto": - finetune_config.Training.epochs = request.hyperparameters.epochs - - if request.hyperparameters.batch_size != "auto": - finetune_config.Training.batch_size = request.hyperparameters.batch_size + if request.hyperparameters.batch_size != "auto": + finetune_config.Training.batch_size = request.hyperparameters.batch_size - if request.hyperparameters.learning_rate_multiplier != "auto": - finetune_config.Training.learning_rate = request.hyperparameters.learning_rate_multiplier + if request.hyperparameters.learning_rate_multiplier != "auto": + finetune_config.Training.learning_rate = request.hyperparameters.learning_rate_multiplier job = FineTuningJob( id=f"ft-job-{uuid.uuid4()}", From da99a1022e0c43913df3e91fe142e1b17130ab8b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 15 Aug 2024 10:23:36 +0000 Subject: [PATCH 6/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/finetuning/handlers.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py index 08a0f9a59..41e95942e 100644 --- a/comps/finetuning/handlers.py +++ b/comps/finetuning/handlers.py @@ -9,13 +9,11 @@ from envs import CHECK_JOB_STATUS_INTERVAL, DATASET_BASE_PATH, MODEL_CONFIG_FILE_MAP, ray_client from finetune_config import FinetuneConfig - from pydantic_yaml import parse_yaml_raw_as, to_yaml_file +from ray.job_submission import JobSubmissionClient from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobsRequest -from ray.job_submission import JobSubmissionClient - FineTuningJobID = str running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {} finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {} @@ -38,7 +36,7 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest): finetune_config = parse_yaml_raw_as(FinetuneConfig, f) finetune_config.Dataset.train_file = train_file_path - + if request.hyperparameters is not None: if request.hyperparameters.epochs != "auto": finetune_config.Training.epochs = request.hyperparameters.epochs From 93546a9dda35559e58eaf730f596f2307b9dcaac Mon Sep 17 00:00:00 2001 From: "Yue, Wenjiao" Date: Mon, 19 Aug 2024 12:33:00 +0800 Subject: [PATCH 7/9] add finetuning list job, cancel job, retrieve feature Signed-off-by: Yue, Wenjiao --- comps/cores/mega/micro_service.py | 5 ++-- comps/cores/proto/api_protocol.py | 35 ++++++++++++++++++++++- comps/finetuning/finetuning.py | 22 +++++++++++++-- comps/finetuning/handlers.py | 46 ++++++++++++++++++++++++++++++- 4 files changed, 102 insertions(+), 6 deletions(-) diff --git a/comps/cores/mega/micro_service.py b/comps/cores/mega/micro_service.py index e1276716c..285bc34a6 100644 --- a/comps/cores/mega/micro_service.py +++ b/comps/cores/mega/micro_service.py @@ -3,7 +3,7 @@ import asyncio import multiprocessing -from typing import Any, Optional, Type +from typing import Any, List, Optional, Type from ..proto.docarray import TextDoc from .constants import ServiceRoleType, ServiceType @@ -154,6 +154,7 @@ def register_microservice( output_datatype: Type[Any] = TextDoc, provider: Optional[str] = None, provider_endpoint: Optional[str] = None, + methods: List[str] = ["POST"], ): def decorator(func): if name not in opea_microservices: @@ -173,7 +174,7 @@ def decorator(func): provider_endpoint=provider_endpoint, ) opea_microservices[name] = micro_service - opea_microservices[name].app.router.add_api_route(endpoint, func, methods=["POST"]) + opea_microservices[name].app.router.add_api_route(endpoint, func, methods=methods) return func return decorator diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index 8db7251d1..ca302922d 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -79,7 +79,6 @@ class TokenCheckResponseItem(BaseModel): class TokenCheckResponse(BaseModel): prompts: List[TokenCheckResponseItem] - class EmbeddingRequest(BaseModel): # Ordered by official OpenAI API documentation # https://platform.openai.com/docs/api-reference/embeddings @@ -648,3 +647,37 @@ class FineTuningJob(BaseModel): The value will be null if the fine-tuning job is not running. """ + +class FineTuningJobIDRequest(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/fine-tuning/list + fine_tuning_job_id: str + """The ID of the fine-tuning job.""" + + +class FineTuningJobListRequest(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/fine-tuning/list + after: Optional[str] = None + """Identifier for the last job from the previous pagination request.""" + + limit: Optional[int] = 20 + """Number of fine-tuning jobs to retrieve.""" + + +class FineTuningJobList(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/fine-tuning/list + object: str = "list" + """The object type, which is always "list". This indicates that the returned data is a list of fine-tuning jobs.""" + + data: List[FineTuningJob] + """A list containing FineTuningJob objects.""" + + has_more: bool + """Indicates whether there are more fine-tuning jobs beyond the current list. + + If true, additional requests can be made to retrieve more jobs. + """ + + diff --git a/comps/finetuning/finetuning.py b/comps/finetuning/finetuning.py index 4c3945ce3..7796bdf7e 100644 --- a/comps/finetuning/finetuning.py +++ b/comps/finetuning/finetuning.py @@ -1,16 +1,34 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from handlers import handle_create_finetuning_jobs +from handlers import ( + handle_cancel_finetuning_job, + handle_create_finetuning_jobs, + handle_list_finetuning_jobs, + handle_retrieve_finetuning_job, +) from comps import opea_microservices, register_microservice -from comps.cores.proto.api_protocol import FineTuningJobsRequest +from comps.cores.proto.api_protocol import FineTuningJobIDRequest, FineTuningJobListRequest, FineTuningJobsRequest @register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001) def create_finetuning_jobs(request: FineTuningJobsRequest): return handle_create_finetuning_jobs(request) +@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001, methods=["GET"]) +def list_finetuning_jobs(request: FineTuningJobListRequest): + return handle_list_finetuning_jobs(request) + +@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/{fine_tuning_job_id}", host="0.0.0.0", port=8001, methods=["GET"]) +def retrieve_finetuning_job(request: FineTuningJobIDRequest): + job = handle_retrieve_finetuning_job(request) + return job + +@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel", host="0.0.0.0", port=8001) +def cancel_finetuning_job(request: FineTuningJobIDRequest): + job = handle_cancel_finetuning_job(request) + return job if __name__ == "__main__": opea_microservices["opea_service@finetuning"].start() diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py index 41e95942e..e3b6a4c27 100644 --- a/comps/finetuning/handlers.py +++ b/comps/finetuning/handlers.py @@ -12,7 +12,7 @@ from pydantic_yaml import parse_yaml_raw_as, to_yaml_file from ray.job_submission import JobSubmissionClient -from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobsRequest +from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobIDRequest, FineTuningJobList, FineTuningJobsRequest FineTuningJobID = str running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {} @@ -80,3 +80,47 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest): finetuning_job_to_ray_job[job.id] = ray_job_id return job + + +def handle_list_finetuning_jobs(): + finetuning_jobs_list = FineTuningJobList(data=list(running_finetuning_jobs.values()), has_more=False) + + return finetuning_jobs_list + + +def handle_retrieve_finetuning_job(request: FineTuningJobIDRequest): + fine_tuning_job_id = request.fine_tuning_job_id + + job = running_finetuning_jobs.get(fine_tuning_job_id) + if job is None: + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") + return job + + +def handle_cancel_finetuning_job(request: FineTuningJobIDRequest): + fine_tuning_job_id = request.fine_tuning_job_id + + ray_job_id = finetuning_job_to_ray_job.get(fine_tuning_job_id) + if ray_job_id is None: + raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!") + + global ray_client + ray_client = JobSubmissionClient() if ray_client is None else ray_client + ray_client.stop_job(ray_job_id) + + job = running_finetuning_jobs.get(fine_tuning_job_id) + + if job is None: + raise HTTPException(status_code=404, detail=f"Job with ID '{fine_tuning_job_id}' not found in running jobs!") + + # Check the job status before attempting to cancel + if job.status == "running": + # Stop the Ray job + ray_client.stop_job(ray_job_id) + # Update job status to cancelled + job.status = "cancelled" + else: + # If the job is not running, return a message indicating it cannot be cancelled + raise HTTPException(status_code=400, detail=f"Job with ID '{fine_tuning_job_id}' is not running and cannot be cancelled.") + + return job \ No newline at end of file From 931e5327f6c89508581bed8ad19bb260ba6cdefe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 04:36:51 +0000 Subject: [PATCH 8/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- comps/cores/mega/micro_service.py | 2 +- comps/cores/proto/api_protocol.py | 17 ++++++++++------- comps/finetuning/finetuning.py | 25 +++++++++++++++++++++---- comps/finetuning/handlers.py | 15 +++++++++++---- 4 files changed, 43 insertions(+), 16 deletions(-) diff --git a/comps/cores/mega/micro_service.py b/comps/cores/mega/micro_service.py index 285bc34a6..689fff9dd 100644 --- a/comps/cores/mega/micro_service.py +++ b/comps/cores/mega/micro_service.py @@ -154,7 +154,7 @@ def register_microservice( output_datatype: Type[Any] = TextDoc, provider: Optional[str] = None, provider_endpoint: Optional[str] = None, - methods: List[str] = ["POST"], + methods: List[str] = ["POST"], ): def decorator(func): if name not in opea_microservices: diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index ca302922d..c02e29f48 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -79,6 +79,7 @@ class TokenCheckResponseItem(BaseModel): class TokenCheckResponse(BaseModel): prompts: List[TokenCheckResponseItem] + class EmbeddingRequest(BaseModel): # Ordered by official OpenAI API documentation # https://platform.openai.com/docs/api-reference/embeddings @@ -648,12 +649,13 @@ class FineTuningJob(BaseModel): The value will be null if the fine-tuning job is not running. """ + class FineTuningJobIDRequest(BaseModel): # Ordered by official OpenAI API documentation # https://platform.openai.com/docs/api-reference/fine-tuning/list fine_tuning_job_id: str """The ID of the fine-tuning job.""" - + class FineTuningJobListRequest(BaseModel): # Ordered by official OpenAI API documentation @@ -663,21 +665,22 @@ class FineTuningJobListRequest(BaseModel): limit: Optional[int] = 20 """Number of fine-tuning jobs to retrieve.""" - + class FineTuningJobList(BaseModel): # Ordered by official OpenAI API documentation # https://platform.openai.com/docs/api-reference/fine-tuning/list object: str = "list" - """The object type, which is always "list". This indicates that the returned data is a list of fine-tuning jobs.""" - + """The object type, which is always "list". + + This indicates that the returned data is a list of fine-tuning jobs. + """ + data: List[FineTuningJob] """A list containing FineTuningJob objects.""" has_more: bool """Indicates whether there are more fine-tuning jobs beyond the current list. - + If true, additional requests can be made to retrieve more jobs. """ - - diff --git a/comps/finetuning/finetuning.py b/comps/finetuning/finetuning.py index 7796bdf7e..8e79e5642 100644 --- a/comps/finetuning/finetuning.py +++ b/comps/finetuning/finetuning.py @@ -16,19 +16,36 @@ def create_finetuning_jobs(request: FineTuningJobsRequest): return handle_create_finetuning_jobs(request) -@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001, methods=["GET"]) + +@register_microservice( + name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001, methods=["GET"] +) def list_finetuning_jobs(request: FineTuningJobListRequest): return handle_list_finetuning_jobs(request) - -@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/{fine_tuning_job_id}", host="0.0.0.0", port=8001, methods=["GET"]) + + +@register_microservice( + name="opea_service@finetuning", + endpoint="/v1/fine_tuning/jobs/{fine_tuning_job_id}", + host="0.0.0.0", + port=8001, + methods=["GET"], +) def retrieve_finetuning_job(request: FineTuningJobIDRequest): job = handle_retrieve_finetuning_job(request) return job -@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel", host="0.0.0.0", port=8001) + +@register_microservice( + name="opea_service@finetuning", + endpoint="/v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel", + host="0.0.0.0", + port=8001, +) def cancel_finetuning_job(request: FineTuningJobIDRequest): job = handle_cancel_finetuning_job(request) return job + if __name__ == "__main__": opea_microservices["opea_service@finetuning"].start() diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py index e3b6a4c27..7721b52bc 100644 --- a/comps/finetuning/handlers.py +++ b/comps/finetuning/handlers.py @@ -12,7 +12,12 @@ from pydantic_yaml import parse_yaml_raw_as, to_yaml_file from ray.job_submission import JobSubmissionClient -from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobIDRequest, FineTuningJobList, FineTuningJobsRequest +from comps.cores.proto.api_protocol import ( + FineTuningJob, + FineTuningJobIDRequest, + FineTuningJobList, + FineTuningJobsRequest, +) FineTuningJobID = str running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {} @@ -121,6 +126,8 @@ def handle_cancel_finetuning_job(request: FineTuningJobIDRequest): job.status = "cancelled" else: # If the job is not running, return a message indicating it cannot be cancelled - raise HTTPException(status_code=400, detail=f"Job with ID '{fine_tuning_job_id}' is not running and cannot be cancelled.") - - return job \ No newline at end of file + raise HTTPException( + status_code=400, detail=f"Job with ID '{fine_tuning_job_id}' is not running and cannot be cancelled." + ) + + return job From 09bea718a7fea38ffd5d4572c48e67fdc45b2ca6 Mon Sep 17 00:00:00 2001 From: WenjiaoYue Date: Mon, 19 Aug 2024 14:12:32 +0800 Subject: [PATCH 9/9] Update api_protocol.py --- comps/cores/proto/api_protocol.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index c02e29f48..1a1901d5d 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -652,7 +652,8 @@ class FineTuningJob(BaseModel): class FineTuningJobIDRequest(BaseModel): # Ordered by official OpenAI API documentation - # https://platform.openai.com/docs/api-reference/fine-tuning/list + # https://platform.openai.com/docs/api-reference/fine-tuning/retrieve + # https://platform.openai.com/docs/api-reference/fine-tuning/cancel fine_tuning_job_id: str """The ID of the fine-tuning job."""