From ea1501ac3bd7a331ee370f738ccfbbb2c137bfce Mon Sep 17 00:00:00 2001
From: changwangss <sys_lpot_val@intel.com>
Date: Thu, 15 Aug 2024 05:16:20 +0000
Subject: [PATCH 1/9] add finetuning job protocol.

---
 comps/cores/proto/api_protocol.py | 181 ++++++++++++++++++++++++++++++
 comps/finetuning/env.py           |  11 ++
 comps/finetuning/finetuning.py    |  14 +++
 comps/finetuning/handlers.py      |  76 +++++++++++++
 4 files changed, 282 insertions(+)
 create mode 100644 comps/finetuning/env.py
 create mode 100644 comps/finetuning/finetuning.py
 create mode 100644 comps/finetuning/handlers.py

diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py
index 382982d27..d6e1e6ae3 100644
--- a/comps/cores/proto/api_protocol.py
+++ b/comps/cores/proto/api_protocol.py
@@ -463,3 +463,184 @@ def check_requests(request) -> Optional[JSONResponse]:
         )
 
     return None
+
+
+class Hyperparameters(BaseModel):
+    batch_size: Optional[Literal["auto"], int] = "auto"
+    """Number of examples in each batch.
+    A larger batch size means that model parameters are updated less frequently, but with lower variance."""
+
+    learning_rate_multiplier: Optional[Literal["auto"], float] = "auto"
+    """Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting."""
+
+    n_epochs: Optional[Literal["auto"], int] = "auto"
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset. "auto" decides
+    the optimal number of epochs based on the size of the dataset. If setting the
+    number manually, we support any number between 1 and 50 epochs.
+    """
+
+class FineTuningJobWandbIntegration(BaseModel):
+    project: str
+    """The name of the project that the new run will be created under."""
+
+    entity: Optional[str] = None
+    """The entity to use for the run.
+
+    This allows you to set the team or username of the WandB user that you would
+    like associated with the run. If not set, the default entity for the registered
+    WandB API key is used.
+    """
+
+    name: Optional[str] = None
+    """A display name to set for the run.
+
+    If not set, we will use the Job ID as the name.
+    """
+
+    tags: Optional[List[str]] = None
+    """A list of tags to be attached to the newly created run.
+
+    These tags are passed through directly to WandB. Some default tags are generated
+    by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
+    """
+
+
+class FineTuningJobWandbIntegrationObject(BaseModel):
+    type: Literal["wandb"]
+    """The type of the integration being enabled for the fine-tuning job"""
+
+    wandb: FineTuningJobWandbIntegration
+    """The settings for your integration with Weights and Biases.
+
+    This payload specifies the project that metrics will be sent to. Optionally, you
+    can set an explicit display name for your run, add tags to your run, and set a
+    default entity (team, username, etc) to be associated with your run.
+    """
+
+class FineTuningJobsRequest(BaseModel):
+    # Ordered by official OpenAI API documentation
+    # https://platform.openai.com/docs/api-reference/fine-tuning/create
+    model: str
+    """The name of the model to fine-tune."""
+
+    training_file: str
+    """The ID of an uploaded file that contains training data."""
+
+    hyperparameters: Optional[Hyperparameters]
+    """The hyperparameters used for the fine-tuning job."""
+
+    suffix: Optional[str, None] = None
+    """A string of up to 64 characters that will be added to your fine-tuned model name."""
+
+    validation_file: Optional[str, None] = None
+    """The ID of an uploaded file that contains validation data."""
+
+    integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None 
+    """A list of integrations to enable for your fine-tuning job."""
+
+    seed: Optional[str, None] = None
+
+
+class Error(BaseModel):
+    code: str
+    """A machine-readable error code."""
+
+    message: str
+    """A human-readable error message."""
+
+    param: Optional[str] = None
+    """The parameter that was invalid, usually `training_file` or `validation_file`.
+
+    This field will be null if the failure was not parameter-specific.
+    """
+
+
+class FineTuningJob(BaseModel):
+    # Ordered by official OpenAI API documentation
+    # https://platform.openai.com/docs/api-reference/fine-tuning/object
+    id: str
+    """The object identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the fine-tuning job was created."""
+
+    error: Optional[Error] = None
+    """
+    For fine-tuning jobs that have `failed`, this will contain more information on
+    the cause of the failure.
+    """
+
+    fine_tuned_model: Optional[str] = None
+    """The name of the fine-tuned model that is being created.
+
+    The value will be null if the fine-tuning job is still running.
+    """
+
+    finished_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the fine-tuning job was finished.
+
+    The value will be null if the fine-tuning job is still running.
+    """
+
+    hyperparameters: Hyperparameters
+    """The hyperparameters used for the fine-tuning job.
+
+    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
+    for more details.
+    """
+
+    model: str
+    """The base model that is being fine-tuned."""
+
+    object: Literal["fine_tuning.job"]
+    """The object type, which is always "fine_tuning.job"."""
+
+    organization_id: str
+    """The organization that owns the fine-tuning job."""
+
+    result_files: List[str]
+    """The compiled results file ID(s) for the fine-tuning job.
+
+    You can retrieve the results with the
+    [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
+    """
+
+    status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"]
+    """
+    The current status of the fine-tuning job, which can be either
+    `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`.
+    """
+
+    trained_tokens: Optional[int] = None
+    """The total number of billable tokens processed by this fine-tuning job.
+
+    The value will be null if the fine-tuning job is still running.
+    """
+
+    training_file: str
+    """The file ID used for training.
+
+    You can retrieve the training data with the
+    [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
+    """
+
+    validation_file: Optional[str] = None
+    """The file ID used for validation.
+
+    You can retrieve the validation results with the
+    [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
+    """
+
+    integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
+    """A list of integrations to enable for this fine-tuning job."""
+
+    seed: int
+    """The seed used for the fine-tuning job."""
+
+    estimated_finish: Optional[int] = None
+    """
+    The Unix timestamp (in seconds) for when the fine-tuning job is estimated to
+    finish. The value will be null if the fine-tuning job is not running.
+    """
diff --git a/comps/finetuning/env.py b/comps/finetuning/env.py
new file mode 100644
index 000000000..2f24f7b3e
--- /dev/null
+++ b/comps/finetuning/env.py
@@ -0,0 +1,11 @@
+
+MODEL_CONFIG_FILE_MAP = {
+    "meta-llama/Llama-2-7b-chat-hf": "./models/llama-2-7b-chat-hf.yaml",
+    "mistralai/Mistral-7B-v0.1": "./models/mistral-7b-v0.1.yaml",
+}
+
+DATASET_BASE_PATH = "datasets"
+
+CHECK_JOB_STATUS_INTERVAL = 5  # Check every 5 secs
+
+ray_client = None
diff --git a/comps/finetuning/finetuning.py b/comps/finetuning/finetuning.py
new file mode 100644
index 000000000..b26331282
--- /dev/null
+++ b/comps/finetuning/finetuning.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from comps import opea_microservices, register_microservice
+
+from comps.cores.proto.api_protocol import FineTuningJobsRequest
+
+from handlers import (
+    handle_create_finetuning_jobs,
+)
+
+@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001)
+def create_finetuning_jobs(request: FineTuningJobsRequest):
+    return handle_create_finetuning_jobs(request)
diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py
new file mode 100644
index 000000000..df3dc1aef
--- /dev/null
+++ b/comps/finetuning/handlers.py
@@ -0,0 +1,76 @@
+
+import os
+import random
+import time
+import uuid
+from typing import Any, Dict, List, Set
+from pydantic_yaml import parse_yaml_raw_as, to_yaml_file
+
+
+from comps.cores.proto.api_protocol import FineTuningJobsRequest, FineTuningJob
+
+from envs import (
+    DATASET_BASE_PATH,
+    MODEL_CONFIG_FILE_MAP,
+    CHECK_JOB_STATUS_INTERVAL,
+    ray_client
+)
+
+FineTuningJobID = str
+running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {}
+finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {}
+
+
+def handle_create_finetuning_jobs(request: FineTuningJobsRequest):
+    base_model = request.model
+    train_file = request.training_file
+    train_file_path = os.path.join(DATASET_BASE_PATH, train_file)
+
+    model_config_file = MODEL_CONFIG_FILE_MAP.get(base_model)
+    if not model_config_file:
+        raise HTTPException(status_code=404, detail=f"Base model '{base_model}' not supported!")
+
+    if not os.path.exists(train_file_path):
+        raise HTTPException(status_code=404, detail=f"Training file '{train_file}' not found!")
+
+    with open(model_config_file) as f:
+        finetune_config = parse_yaml_raw_as(FinetuneConfig, f)
+
+    finetune_config.Dataset.train_file = train_file_path
+
+    job = FineTuningJob(
+        id=f"ft-job-{uuid.uuid4()}",
+        model=base_model,
+        created_at=int(time.time()),
+        training_file=train_file,
+        hyperparameters={
+            "n_epochs": finetune_config.Training.epochs,
+            "batch_size": finetune_config.Training.batch_size,
+            "learning_rate_multiplier": finetune_config.Training.learning_rate,
+        },
+        status="running",
+        # TODO: Add seed in finetune config
+        seed=random.randint(0, 1000),
+    )
+
+    finetune_config_file = f"jobs/{job.id}.yaml"
+    to_yaml_file(finetune_config_file, finetune_config)
+
+    global ray_client
+    ray_client = JobSubmissionClient() if ray_client is None else ray_client
+
+    ray_job_id = ray_client.submit_job(
+        # Entrypoint shell command to execute
+        entrypoint=f"python finetune_runner.py --config_file {finetune_config_file}",
+        # Path to the local directory that contains the script.py file
+        runtime_env={"working_dir": "./"},
+    )
+    print(f"Submitted Ray job: {ray_job_id} ...")
+
+    running_finetuning_jobs[job.id] = job
+    finetuning_job_to_ray_job[job.id] = ray_job_id
+
+    # background_tasks.add_task(update_job_status, job.id)
+
+    return job
+

From 2346ead2b26b3de18a358e2fba9a390f836d4eb7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 Aug 2024 05:17:33 +0000
Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/cores/proto/api_protocol.py | 34 +++++++++++++++++--------------
 comps/finetuning/env.py           |  2 ++
 comps/finetuning/finetuning.py    |  6 ++----
 comps/finetuning/handlers.py      | 15 +++++---------
 4 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py
index d6e1e6ae3..24af09ed8 100644
--- a/comps/cores/proto/api_protocol.py
+++ b/comps/cores/proto/api_protocol.py
@@ -468,10 +468,15 @@ def check_requests(request) -> Optional[JSONResponse]:
 class Hyperparameters(BaseModel):
     batch_size: Optional[Literal["auto"], int] = "auto"
     """Number of examples in each batch.
-    A larger batch size means that model parameters are updated less frequently, but with lower variance."""
+
+    A larger batch size means that model parameters are updated less frequently, but with lower variance.
+    """
 
     learning_rate_multiplier: Optional[Literal["auto"], float] = "auto"
-    """Scaling factor for the learning rate. A smaller learning rate may be useful to avoid overfitting."""
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
 
     n_epochs: Optional[Literal["auto"], int] = "auto"
     """The number of epochs to train the model for.
@@ -481,6 +486,7 @@ class Hyperparameters(BaseModel):
     number manually, we support any number between 1 and 50 epochs.
     """
 
+
 class FineTuningJobWandbIntegration(BaseModel):
     project: str
     """The name of the project that the new run will be created under."""
@@ -509,7 +515,7 @@ class FineTuningJobWandbIntegration(BaseModel):
 
 class FineTuningJobWandbIntegrationObject(BaseModel):
     type: Literal["wandb"]
-    """The type of the integration being enabled for the fine-tuning job"""
+    """The type of the integration being enabled for the fine-tuning job."""
 
     wandb: FineTuningJobWandbIntegration
     """The settings for your integration with Weights and Biases.
@@ -519,6 +525,7 @@ class FineTuningJobWandbIntegrationObject(BaseModel):
     default entity (team, username, etc) to be associated with your run.
     """
 
+
 class FineTuningJobsRequest(BaseModel):
     # Ordered by official OpenAI API documentation
     # https://platform.openai.com/docs/api-reference/fine-tuning/create
@@ -537,7 +544,7 @@ class FineTuningJobsRequest(BaseModel):
     validation_file: Optional[str, None] = None
     """The ID of an uploaded file that contains validation data."""
 
-    integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None 
+    integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
     """A list of integrations to enable for your fine-tuning job."""
 
     seed: Optional[str, None] = None
@@ -567,10 +574,8 @@ class FineTuningJob(BaseModel):
     """The Unix timestamp (in seconds) for when the fine-tuning job was created."""
 
     error: Optional[Error] = None
-    """
-    For fine-tuning jobs that have `failed`, this will contain more information on
-    the cause of the failure.
-    """
+    """For fine-tuning jobs that have `failed`, this will contain more information on
+    the cause of the failure."""
 
     fine_tuned_model: Optional[str] = None
     """The name of the fine-tuned model that is being created.
@@ -608,10 +613,8 @@ class FineTuningJob(BaseModel):
     """
 
     status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"]
-    """
-    The current status of the fine-tuning job, which can be either
-    `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`.
-    """
+    """The current status of the fine-tuning job, which can be either
+    `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`."""
 
     trained_tokens: Optional[int] = None
     """The total number of billable tokens processed by this fine-tuning job.
@@ -640,7 +643,8 @@ class FineTuningJob(BaseModel):
     """The seed used for the fine-tuning job."""
 
     estimated_finish: Optional[int] = None
-    """
-    The Unix timestamp (in seconds) for when the fine-tuning job is estimated to
-    finish. The value will be null if the fine-tuning job is not running.
+    """The Unix timestamp (in seconds) for when the fine-tuning job is estimated to
+    finish.
+
+    The value will be null if the fine-tuning job is not running.
     """
diff --git a/comps/finetuning/env.py b/comps/finetuning/env.py
index 2f24f7b3e..b4ea5c6b5 100644
--- a/comps/finetuning/env.py
+++ b/comps/finetuning/env.py
@@ -1,3 +1,5 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 MODEL_CONFIG_FILE_MAP = {
     "meta-llama/Llama-2-7b-chat-hf": "./models/llama-2-7b-chat-hf.yaml",
diff --git a/comps/finetuning/finetuning.py b/comps/finetuning/finetuning.py
index b26331282..47dbed8e6 100644
--- a/comps/finetuning/finetuning.py
+++ b/comps/finetuning/finetuning.py
@@ -1,13 +1,11 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-from comps import opea_microservices, register_microservice
+from handlers import handle_create_finetuning_jobs
 
+from comps import opea_microservices, register_microservice
 from comps.cores.proto.api_protocol import FineTuningJobsRequest
 
-from handlers import (
-    handle_create_finetuning_jobs,
-)
 
 @register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001)
 def create_finetuning_jobs(request: FineTuningJobsRequest):
diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py
index df3dc1aef..893f81809 100644
--- a/comps/finetuning/handlers.py
+++ b/comps/finetuning/handlers.py
@@ -1,20 +1,16 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
 
 import os
 import random
 import time
 import uuid
 from typing import Any, Dict, List, Set
-from pydantic_yaml import parse_yaml_raw_as, to_yaml_file
-
 
-from comps.cores.proto.api_protocol import FineTuningJobsRequest, FineTuningJob
+from envs import CHECK_JOB_STATUS_INTERVAL, DATASET_BASE_PATH, MODEL_CONFIG_FILE_MAP, ray_client
+from pydantic_yaml import parse_yaml_raw_as, to_yaml_file
 
-from envs import (
-    DATASET_BASE_PATH,
-    MODEL_CONFIG_FILE_MAP,
-    CHECK_JOB_STATUS_INTERVAL,
-    ray_client
-)
+from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobsRequest
 
 FineTuningJobID = str
 running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {}
@@ -73,4 +69,3 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest):
     # background_tasks.add_task(update_job_status, job.id)
 
     return job
-

From 003acaa687ab80c08288cfdb97396c4eec39c6ce Mon Sep 17 00:00:00 2001
From: changwangss <sys_lpot_val@intel.com>
Date: Thu, 15 Aug 2024 08:47:26 +0000
Subject: [PATCH 3/9] update create finetuning job code.

---
 comps/cores/proto/api_protocol.py             | 22 +++++-----
 comps/finetuning/{env.py => envs.py}          |  8 ++++
 comps/finetuning/finetuning.py                |  3 ++
 comps/finetuning/handlers.py                  | 16 +++++++-
 .../finetuning/models/llama-2-7b-chat-hf.yaml | 40 +++++++++++++++++++
 5 files changed, 76 insertions(+), 13 deletions(-)
 rename comps/finetuning/{env.py => envs.py} (66%)
 create mode 100644 comps/finetuning/models/llama-2-7b-chat-hf.yaml

diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py
index 24af09ed8..b533e04c2 100644
--- a/comps/cores/proto/api_protocol.py
+++ b/comps/cores/proto/api_protocol.py
@@ -466,19 +466,19 @@ def check_requests(request) -> Optional[JSONResponse]:
 
 
 class Hyperparameters(BaseModel):
-    batch_size: Optional[Literal["auto"], int] = "auto"
+    batch_size: Optional[Union[Literal["auto"], int]] = "auto"
     """Number of examples in each batch.
 
     A larger batch size means that model parameters are updated less frequently, but with lower variance.
     """
 
-    learning_rate_multiplier: Optional[Literal["auto"], float] = "auto"
+    learning_rate_multiplier: Optional[Union[Literal["auto"], float]] = "auto"
     """Scaling factor for the learning rate.
 
     A smaller learning rate may be useful to avoid overfitting.
     """
 
-    n_epochs: Optional[Literal["auto"], int] = "auto"
+    n_epochs: Optional[Union[Literal["auto"], int]] = "auto"
     """The number of epochs to train the model for.
 
     An epoch refers to one full cycle through the training dataset. "auto" decides
@@ -535,19 +535,19 @@ class FineTuningJobsRequest(BaseModel):
     training_file: str
     """The ID of an uploaded file that contains training data."""
 
-    hyperparameters: Optional[Hyperparameters]
+    hyperparameters: Optional[Hyperparameters] = Hyperparameters
     """The hyperparameters used for the fine-tuning job."""
 
-    suffix: Optional[str, None] = None
+    suffix: Optional[str] = None
     """A string of up to 64 characters that will be added to your fine-tuned model name."""
 
-    validation_file: Optional[str, None] = None
+    validation_file: Optional[str] = None
     """The ID of an uploaded file that contains validation data."""
 
     integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
     """A list of integrations to enable for your fine-tuning job."""
 
-    seed: Optional[str, None] = None
+    seed: Optional[str] = None
 
 
 class Error(BaseModel):
@@ -599,13 +599,13 @@ class FineTuningJob(BaseModel):
     model: str
     """The base model that is being fine-tuned."""
 
-    object: Literal["fine_tuning.job"]
+    object: Literal["fine_tuning.job"] = "fine_tuning.job"
     """The object type, which is always "fine_tuning.job"."""
 
-    organization_id: str
+    organization_id: Optional[str] = None
     """The organization that owns the fine-tuning job."""
 
-    result_files: List[str]
+    result_files: List[str] = None
     """The compiled results file ID(s) for the fine-tuning job.
 
     You can retrieve the results with the
@@ -639,7 +639,7 @@ class FineTuningJob(BaseModel):
     integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
     """A list of integrations to enable for this fine-tuning job."""
 
-    seed: int
+    seed: Optional[int] = None
     """The seed used for the fine-tuning job."""
 
     estimated_finish: Optional[int] = None
diff --git a/comps/finetuning/env.py b/comps/finetuning/envs.py
similarity index 66%
rename from comps/finetuning/env.py
rename to comps/finetuning/envs.py
index b4ea5c6b5..0d4d41cd5 100644
--- a/comps/finetuning/env.py
+++ b/comps/finetuning/envs.py
@@ -1,12 +1,20 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import os
+
 MODEL_CONFIG_FILE_MAP = {
     "meta-llama/Llama-2-7b-chat-hf": "./models/llama-2-7b-chat-hf.yaml",
     "mistralai/Mistral-7B-v0.1": "./models/mistral-7b-v0.1.yaml",
 }
 
 DATASET_BASE_PATH = "datasets"
+JOBS_PATH = "jobs"
+if not os.path.exists(DATASET_BASE_PATH):
+    os.path.mkdir(DATASET_BASE_PATH)
+
+if not os.path.exists(JOBS_PATH):
+    os.path.mkdir(JOBS_PATH)
 
 CHECK_JOB_STATUS_INTERVAL = 5  # Check every 5 secs
 
diff --git a/comps/finetuning/finetuning.py b/comps/finetuning/finetuning.py
index 47dbed8e6..36d439bef 100644
--- a/comps/finetuning/finetuning.py
+++ b/comps/finetuning/finetuning.py
@@ -10,3 +10,6 @@
 @register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001)
 def create_finetuning_jobs(request: FineTuningJobsRequest):
     return handle_create_finetuning_jobs(request)
+
+if __name__ == "__main__":
+    opea_microservices["opea_service@finetuning"].start()
diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py
index 893f81809..13160acb8 100644
--- a/comps/finetuning/handlers.py
+++ b/comps/finetuning/handlers.py
@@ -8,10 +8,14 @@
 from typing import Any, Dict, List, Set
 
 from envs import CHECK_JOB_STATUS_INTERVAL, DATASET_BASE_PATH, MODEL_CONFIG_FILE_MAP, ray_client
+from finetune_config import FinetuneConfig
+
 from pydantic_yaml import parse_yaml_raw_as, to_yaml_file
 
 from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobsRequest
 
+from ray.job_submission import JobSubmissionClient
+
 FineTuningJobID = str
 running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {}
 finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {}
@@ -23,6 +27,7 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest):
     train_file_path = os.path.join(DATASET_BASE_PATH, train_file)
 
     model_config_file = MODEL_CONFIG_FILE_MAP.get(base_model)
+
     if not model_config_file:
         raise HTTPException(status_code=404, detail=f"Base model '{base_model}' not supported!")
 
@@ -34,6 +39,15 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest):
 
     finetune_config.Dataset.train_file = train_file_path
 
+    if request.hyperparameters.epochs != "auto":
+        finetune_config.Training.epochs = request.hyperparameters.epochs
+
+    if request.hyperparameters.batch_size != "auto":
+        finetune_config.Training.batch_size = request.hyperparameters.batch_size
+
+    if request.hyperparameters.learning_rate_multiplier != "auto":
+        finetune_config.Training.learning_rate = request.hyperparameters.learning_rate_multiplier
+
     job = FineTuningJob(
         id=f"ft-job-{uuid.uuid4()}",
         model=base_model,
@@ -66,6 +80,4 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest):
     running_finetuning_jobs[job.id] = job
     finetuning_job_to_ray_job[job.id] = ray_job_id
 
-    # background_tasks.add_task(update_job_status, job.id)
-
     return job
diff --git a/comps/finetuning/models/llama-2-7b-chat-hf.yaml b/comps/finetuning/models/llama-2-7b-chat-hf.yaml
new file mode 100644
index 000000000..ab62383d2
--- /dev/null
+++ b/comps/finetuning/models/llama-2-7b-chat-hf.yaml
@@ -0,0 +1,40 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+General:
+  base_model: meta-llama/Llama-2-7b-chat-hf
+  gpt_base_model: false
+  output_dir: /tmp/llm-ray/output
+  save_strategy: no
+  config:
+    trust_remote_code: false
+    use_auth_token: null
+  lora_config:
+    task_type: CAUSAL_LM
+    r: 8
+    lora_alpha: 32
+    lora_dropout: 0.1
+    target_modules:
+      - q_proj
+      - v_proj
+  enable_gradient_checkpointing: false
+Dataset:
+  train_file: examples/data/sample_finetune_data_small.jsonl
+  group: false
+  validation_file: null
+  validation_split_percentage: 5
+Training:
+  optimizer: adamw_torch
+  batch_size: 2
+  epochs: 3
+  learning_rate: 1.0e-05
+  lr_scheduler: linear
+  weight_decay: 0.0
+  mixed_precision: bf16
+  device: cpu
+  num_training_workers: 2
+  resources_per_worker:
+    CPU: 32
+  accelerate_mode: DDP
+  gradient_accumulation_steps: 1
+  logging_steps: 10

From 93c9f5ea6bccdf54530b905dc714699d9a685d68 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 Aug 2024 08:47:59 +0000
Subject: [PATCH 4/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/finetuning/finetuning.py | 1 +
 comps/finetuning/handlers.py   | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/comps/finetuning/finetuning.py b/comps/finetuning/finetuning.py
index 36d439bef..4c3945ce3 100644
--- a/comps/finetuning/finetuning.py
+++ b/comps/finetuning/finetuning.py
@@ -11,5 +11,6 @@
 def create_finetuning_jobs(request: FineTuningJobsRequest):
     return handle_create_finetuning_jobs(request)
 
+
 if __name__ == "__main__":
     opea_microservices["opea_service@finetuning"].start()
diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py
index 13160acb8..bfb12fc33 100644
--- a/comps/finetuning/handlers.py
+++ b/comps/finetuning/handlers.py
@@ -9,13 +9,11 @@
 
 from envs import CHECK_JOB_STATUS_INTERVAL, DATASET_BASE_PATH, MODEL_CONFIG_FILE_MAP, ray_client
 from finetune_config import FinetuneConfig
-
 from pydantic_yaml import parse_yaml_raw_as, to_yaml_file
+from ray.job_submission import JobSubmissionClient
 
 from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobsRequest
 
-from ray.job_submission import JobSubmissionClient
-
 FineTuningJobID = str
 running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {}
 finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {}

From 8a7061b49a7ed16083c2f64b8421ac792ea1495f Mon Sep 17 00:00:00 2001
From: changwangss <sys_lpot_val@intel.com>
Date: Thu, 15 Aug 2024 10:23:49 +0000
Subject: [PATCH 5/9] update creating finetuning job code.

---
 comps/cores/proto/api_protocol.py   |   2 +-
 comps/finetuning/finetune_config.py | 156 ++++++++++++++++++++++++++++
 comps/finetuning/handlers.py        |  19 ++--
 3 files changed, 168 insertions(+), 9 deletions(-)
 create mode 100644 comps/finetuning/finetune_config.py

diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py
index b533e04c2..8db7251d1 100644
--- a/comps/cores/proto/api_protocol.py
+++ b/comps/cores/proto/api_protocol.py
@@ -535,7 +535,7 @@ class FineTuningJobsRequest(BaseModel):
     training_file: str
     """The ID of an uploaded file that contains training data."""
 
-    hyperparameters: Optional[Hyperparameters] = Hyperparameters
+    hyperparameters: Optional[Hyperparameters] = None
     """The hyperparameters used for the fine-tuning job."""
 
     suffix: Optional[str] = None
diff --git a/comps/finetuning/finetune_config.py b/comps/finetuning/finetune_config.py
new file mode 100644
index 000000000..c53b36131
--- /dev/null
+++ b/comps/finetuning/finetune_config.py
@@ -0,0 +1,156 @@
+# Copyright 2023 The LLM-on-Ray Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List, Optional
+
+from pydantic import BaseModel, validator
+
+PRECISION_BF16 = "bf16"
+PRECISION_FP16 = "fp16"
+PRECISION_NO = "no"
+
+DEVICE_CPU = "cpu"
+DEVICE_HPU = "hpu"
+DEVICE_GPU = "gpu"
+
+ACCELERATE_STRATEGY_DDP = "DDP"
+ACCELERATE_STRATEGY_FSDP = "FSDP"
+ACCELERATE_STRATEGY_DEEPSPEED = "DEEPSPEED"
+
+
+class GeneralConfig(BaseModel):
+    trust_remote_code: bool
+    use_auth_token: Optional[str]
+
+
+class LoraConfig(BaseModel):
+    task_type: str
+    r: int
+    lora_alpha: int
+    lora_dropout: float
+    target_modules: Optional[List[str]] = None
+
+
+class DeltatunerConfig(BaseModel):
+    algo: str
+    denas: bool
+    best_model_structure: str
+
+
+class General(BaseModel):
+    base_model: str
+    tokenizer_name: Optional[str] = None
+    gaudi_config_name: Optional[str] = None
+    gpt_base_model: bool
+    output_dir: str
+    resume_from_checkpoint: Optional[str] = None
+    save_strategy: str = "no"
+    config: GeneralConfig
+    lora_config: Optional[LoraConfig] = None
+    deltatuner_config: Optional[DeltatunerConfig] = None
+    enable_gradient_checkpointing: bool = False
+
+
+class Dataset(BaseModel):
+    train_file: str
+    validation_file: Optional[str]
+    validation_split_percentage: int
+    max_length: int = 512
+    group: bool = True
+    block_size: int = 512
+    shuffle: bool = False
+
+
+class RayResourceConfig(BaseModel):
+    CPU: int
+    GPU: int = 0
+    HPU: int = 0
+
+
+class Training(BaseModel):
+    optimizer: str
+    batch_size: int
+    epochs: int
+    max_train_steps: Optional[int] = None
+    learning_rate: float
+    lr_scheduler: str
+    weight_decay: float
+    device: str = DEVICE_CPU
+    hpu_execution_mode: str = "lazy"
+    num_training_workers: int
+    resources_per_worker: RayResourceConfig
+    accelerate_mode: str = ACCELERATE_STRATEGY_DDP
+    mixed_precision: str = PRECISION_NO
+    gradient_accumulation_steps: int = 1
+    logging_steps: int = 10
+    deepspeed_config_file: str = ""
+
+    @validator("device")
+    def check_device(cls, v: str):
+        # will convert to lower case
+        if v:
+            assert v.lower() in [DEVICE_CPU, DEVICE_GPU, DEVICE_HPU]
+        return v.lower()
+
+    @validator("hpu_execution_mode")
+    def check_hpu_execution_mode(cls, v: str):
+        if v:
+            assert v in ["lazy", "eager", "eager.compile"]
+        return v
+
+    @validator("accelerate_mode")
+    def check_accelerate_mode(cls, v: str):
+        if v:
+            assert v in [
+                ACCELERATE_STRATEGY_DDP,
+                ACCELERATE_STRATEGY_FSDP,
+                ACCELERATE_STRATEGY_DEEPSPEED,
+            ]
+        return v
+
+    @validator("mixed_precision")
+    def check_mixed_precision(cls, v: str):
+        if v:
+            assert v in [PRECISION_BF16, PRECISION_FP16, PRECISION_NO]
+        return v
+
+    @validator("logging_steps")
+    def check_logging_steps(cls, v: int):
+        assert v > 0
+        return v
+
+    # @model_validator(mode='after')
+    # def check_device_and_accelerate_mode(self) -> "Training":
+    #     dev = self.device
+    #     res = self.resources_per_worker
+    #     mode = self.accelerate_mode
+    #     if dev == "CPU":
+    #         if res.GPU is not None and res.GPU > 0:
+    #             raise ValueError("Please not specified GPU resource when use CPU only in Ray.")
+    #         if mode != "CPU_DDP":
+    #             raise ValueError("Please specified CPU related accelerate mode when use CPU only in Ray.")
+    #     elif dev == "GPU":
+    #         if res.GPU is None or res.GPU == 0:
+    #             raise ValueError("Please specified GPU resource when use GPU to fine tune in Ray.")
+    #         if mode not in ["GPU_DDP", "GPU_FSDP"]:
+    #             raise ValueError("Please speicifed GPU related accelerate mode when use GPU to fine tune in Ray.")
+
+    #     return self
+
+
+class FinetuneConfig(BaseModel):
+    General: General
+    Dataset: Dataset
+    Training: Training
diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py
index bfb12fc33..08a0f9a59 100644
--- a/comps/finetuning/handlers.py
+++ b/comps/finetuning/handlers.py
@@ -9,11 +9,13 @@
 
 from envs import CHECK_JOB_STATUS_INTERVAL, DATASET_BASE_PATH, MODEL_CONFIG_FILE_MAP, ray_client
 from finetune_config import FinetuneConfig
+
 from pydantic_yaml import parse_yaml_raw_as, to_yaml_file
-from ray.job_submission import JobSubmissionClient
 
 from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobsRequest
 
+from ray.job_submission import JobSubmissionClient
+
 FineTuningJobID = str
 running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {}
 finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {}
@@ -36,15 +38,16 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest):
         finetune_config = parse_yaml_raw_as(FinetuneConfig, f)
 
     finetune_config.Dataset.train_file = train_file_path
+    
+    if request.hyperparameters is not None:
+        if request.hyperparameters.epochs != "auto":
+            finetune_config.Training.epochs = request.hyperparameters.epochs
 
-    if request.hyperparameters.epochs != "auto":
-        finetune_config.Training.epochs = request.hyperparameters.epochs
-
-    if request.hyperparameters.batch_size != "auto":
-        finetune_config.Training.batch_size = request.hyperparameters.batch_size
+        if request.hyperparameters.batch_size != "auto":
+            finetune_config.Training.batch_size = request.hyperparameters.batch_size
 
-    if request.hyperparameters.learning_rate_multiplier != "auto":
-        finetune_config.Training.learning_rate = request.hyperparameters.learning_rate_multiplier
+        if request.hyperparameters.learning_rate_multiplier != "auto":
+            finetune_config.Training.learning_rate = request.hyperparameters.learning_rate_multiplier
 
     job = FineTuningJob(
         id=f"ft-job-{uuid.uuid4()}",

From da99a1022e0c43913df3e91fe142e1b17130ab8b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 15 Aug 2024 10:23:36 +0000
Subject: [PATCH 6/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/finetuning/handlers.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py
index 08a0f9a59..41e95942e 100644
--- a/comps/finetuning/handlers.py
+++ b/comps/finetuning/handlers.py
@@ -9,13 +9,11 @@
 
 from envs import CHECK_JOB_STATUS_INTERVAL, DATASET_BASE_PATH, MODEL_CONFIG_FILE_MAP, ray_client
 from finetune_config import FinetuneConfig
-
 from pydantic_yaml import parse_yaml_raw_as, to_yaml_file
+from ray.job_submission import JobSubmissionClient
 
 from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobsRequest
 
-from ray.job_submission import JobSubmissionClient
-
 FineTuningJobID = str
 running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {}
 finetuning_job_to_ray_job: Dict[FineTuningJobID, str] = {}
@@ -38,7 +36,7 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest):
         finetune_config = parse_yaml_raw_as(FinetuneConfig, f)
 
     finetune_config.Dataset.train_file = train_file_path
-    
+
     if request.hyperparameters is not None:
         if request.hyperparameters.epochs != "auto":
             finetune_config.Training.epochs = request.hyperparameters.epochs

From 93546a9dda35559e58eaf730f596f2307b9dcaac Mon Sep 17 00:00:00 2001
From: "Yue, Wenjiao" <wenjiao.yue@intel.com>
Date: Mon, 19 Aug 2024 12:33:00 +0800
Subject: [PATCH 7/9] add finetuning list job, cancel job, retrieve feature

Signed-off-by: Yue, Wenjiao <wenjiao.yue@intel.com>
---
 comps/cores/mega/micro_service.py |  5 ++--
 comps/cores/proto/api_protocol.py | 35 ++++++++++++++++++++++-
 comps/finetuning/finetuning.py    | 22 +++++++++++++--
 comps/finetuning/handlers.py      | 46 ++++++++++++++++++++++++++++++-
 4 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/comps/cores/mega/micro_service.py b/comps/cores/mega/micro_service.py
index e1276716c..285bc34a6 100644
--- a/comps/cores/mega/micro_service.py
+++ b/comps/cores/mega/micro_service.py
@@ -3,7 +3,7 @@
 
 import asyncio
 import multiprocessing
-from typing import Any, Optional, Type
+from typing import Any, List, Optional, Type
 
 from ..proto.docarray import TextDoc
 from .constants import ServiceRoleType, ServiceType
@@ -154,6 +154,7 @@ def register_microservice(
     output_datatype: Type[Any] = TextDoc,
     provider: Optional[str] = None,
     provider_endpoint: Optional[str] = None,
+    methods: List[str] = ["POST"], 
 ):
     def decorator(func):
         if name not in opea_microservices:
@@ -173,7 +174,7 @@ def decorator(func):
                 provider_endpoint=provider_endpoint,
             )
             opea_microservices[name] = micro_service
-        opea_microservices[name].app.router.add_api_route(endpoint, func, methods=["POST"])
+        opea_microservices[name].app.router.add_api_route(endpoint, func, methods=methods)
         return func
 
     return decorator
diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py
index 8db7251d1..ca302922d 100644
--- a/comps/cores/proto/api_protocol.py
+++ b/comps/cores/proto/api_protocol.py
@@ -79,7 +79,6 @@ class TokenCheckResponseItem(BaseModel):
 class TokenCheckResponse(BaseModel):
     prompts: List[TokenCheckResponseItem]
 
-
 class EmbeddingRequest(BaseModel):
     # Ordered by official OpenAI API documentation
     # https://platform.openai.com/docs/api-reference/embeddings
@@ -648,3 +647,37 @@ class FineTuningJob(BaseModel):
 
     The value will be null if the fine-tuning job is not running.
     """
+
+class FineTuningJobIDRequest(BaseModel):
+    # Ordered by official OpenAI API documentation
+    # https://platform.openai.com/docs/api-reference/fine-tuning/list
+    fine_tuning_job_id: str
+    """The ID of the fine-tuning job."""
+   
+
+class FineTuningJobListRequest(BaseModel):
+    # Ordered by official OpenAI API documentation
+    # https://platform.openai.com/docs/api-reference/fine-tuning/list
+    after: Optional[str] = None
+    """Identifier for the last job from the previous pagination request."""
+
+    limit: Optional[int] = 20
+    """Number of fine-tuning jobs to retrieve."""
+   
+
+class FineTuningJobList(BaseModel):
+    # Ordered by official OpenAI API documentation
+    # https://platform.openai.com/docs/api-reference/fine-tuning/list
+    object: str = "list"
+    """The object type, which is always "list". This indicates that the returned data is a list of fine-tuning jobs."""
+    
+    data: List[FineTuningJob]
+    """A list containing FineTuningJob objects."""
+
+    has_more: bool
+    """Indicates whether there are more fine-tuning jobs beyond the current list.
+    
+    If true, additional requests can be made to retrieve more jobs.
+    """
+
+
diff --git a/comps/finetuning/finetuning.py b/comps/finetuning/finetuning.py
index 4c3945ce3..7796bdf7e 100644
--- a/comps/finetuning/finetuning.py
+++ b/comps/finetuning/finetuning.py
@@ -1,16 +1,34 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-from handlers import handle_create_finetuning_jobs
+from handlers import (
+    handle_cancel_finetuning_job,
+    handle_create_finetuning_jobs,
+    handle_list_finetuning_jobs,
+    handle_retrieve_finetuning_job,
+)
 
 from comps import opea_microservices, register_microservice
-from comps.cores.proto.api_protocol import FineTuningJobsRequest
+from comps.cores.proto.api_protocol import FineTuningJobIDRequest, FineTuningJobListRequest, FineTuningJobsRequest
 
 
 @register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001)
 def create_finetuning_jobs(request: FineTuningJobsRequest):
     return handle_create_finetuning_jobs(request)
 
+@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001, methods=["GET"])
+def list_finetuning_jobs(request: FineTuningJobListRequest):
+    return handle_list_finetuning_jobs(request)
+ 
+@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/{fine_tuning_job_id}", host="0.0.0.0", port=8001, methods=["GET"])
+def retrieve_finetuning_job(request: FineTuningJobIDRequest):
+    job = handle_retrieve_finetuning_job(request)
+    return job
+
+@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel", host="0.0.0.0", port=8001)
+def cancel_finetuning_job(request: FineTuningJobIDRequest):
+    job = handle_cancel_finetuning_job(request)
+    return job
 
 if __name__ == "__main__":
     opea_microservices["opea_service@finetuning"].start()
diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py
index 41e95942e..e3b6a4c27 100644
--- a/comps/finetuning/handlers.py
+++ b/comps/finetuning/handlers.py
@@ -12,7 +12,7 @@
 from pydantic_yaml import parse_yaml_raw_as, to_yaml_file
 from ray.job_submission import JobSubmissionClient
 
-from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobsRequest
+from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobIDRequest, FineTuningJobList, FineTuningJobsRequest
 
 FineTuningJobID = str
 running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {}
@@ -80,3 +80,47 @@ def handle_create_finetuning_jobs(request: FineTuningJobsRequest):
     finetuning_job_to_ray_job[job.id] = ray_job_id
 
     return job
+
+
+def handle_list_finetuning_jobs():
+    finetuning_jobs_list = FineTuningJobList(data=list(running_finetuning_jobs.values()), has_more=False)
+
+    return finetuning_jobs_list
+
+
+def handle_retrieve_finetuning_job(request: FineTuningJobIDRequest):
+    fine_tuning_job_id = request.fine_tuning_job_id
+
+    job = running_finetuning_jobs.get(fine_tuning_job_id)
+    if job is None:
+        raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!")
+    return job
+
+
+def handle_cancel_finetuning_job(request: FineTuningJobIDRequest):
+    fine_tuning_job_id = request.fine_tuning_job_id
+
+    ray_job_id = finetuning_job_to_ray_job.get(fine_tuning_job_id)
+    if ray_job_id is None:
+        raise HTTPException(status_code=404, detail=f"Fine-tuning job '{fine_tuning_job_id}' not found!")
+
+    global ray_client
+    ray_client = JobSubmissionClient() if ray_client is None else ray_client
+    ray_client.stop_job(ray_job_id)
+
+    job = running_finetuning_jobs.get(fine_tuning_job_id)
+
+    if job is None:
+        raise HTTPException(status_code=404, detail=f"Job with ID '{fine_tuning_job_id}' not found in running jobs!")
+
+    # Check the job status before attempting to cancel
+    if job.status == "running":
+        # Stop the Ray job
+        ray_client.stop_job(ray_job_id)
+        # Update job status to cancelled
+        job.status = "cancelled"
+    else:
+        # If the job is not running, return a message indicating it cannot be cancelled
+        raise HTTPException(status_code=400, detail=f"Job with ID '{fine_tuning_job_id}' is not running and cannot be cancelled.")
+    
+    return job
\ No newline at end of file

From 931e5327f6c89508581bed8ad19bb260ba6cdefe Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 19 Aug 2024 04:36:51 +0000
Subject: [PATCH 8/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 comps/cores/mega/micro_service.py |  2 +-
 comps/cores/proto/api_protocol.py | 17 ++++++++++-------
 comps/finetuning/finetuning.py    | 25 +++++++++++++++++++++----
 comps/finetuning/handlers.py      | 15 +++++++++++----
 4 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/comps/cores/mega/micro_service.py b/comps/cores/mega/micro_service.py
index 285bc34a6..689fff9dd 100644
--- a/comps/cores/mega/micro_service.py
+++ b/comps/cores/mega/micro_service.py
@@ -154,7 +154,7 @@ def register_microservice(
     output_datatype: Type[Any] = TextDoc,
     provider: Optional[str] = None,
     provider_endpoint: Optional[str] = None,
-    methods: List[str] = ["POST"], 
+    methods: List[str] = ["POST"],
 ):
     def decorator(func):
         if name not in opea_microservices:
diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py
index ca302922d..c02e29f48 100644
--- a/comps/cores/proto/api_protocol.py
+++ b/comps/cores/proto/api_protocol.py
@@ -79,6 +79,7 @@ class TokenCheckResponseItem(BaseModel):
 class TokenCheckResponse(BaseModel):
     prompts: List[TokenCheckResponseItem]
 
+
 class EmbeddingRequest(BaseModel):
     # Ordered by official OpenAI API documentation
     # https://platform.openai.com/docs/api-reference/embeddings
@@ -648,12 +649,13 @@ class FineTuningJob(BaseModel):
     The value will be null if the fine-tuning job is not running.
     """
 
+
 class FineTuningJobIDRequest(BaseModel):
     # Ordered by official OpenAI API documentation
     # https://platform.openai.com/docs/api-reference/fine-tuning/list
     fine_tuning_job_id: str
     """The ID of the fine-tuning job."""
-   
+
 
 class FineTuningJobListRequest(BaseModel):
     # Ordered by official OpenAI API documentation
@@ -663,21 +665,22 @@ class FineTuningJobListRequest(BaseModel):
 
     limit: Optional[int] = 20
     """Number of fine-tuning jobs to retrieve."""
-   
+
 
 class FineTuningJobList(BaseModel):
     # Ordered by official OpenAI API documentation
     # https://platform.openai.com/docs/api-reference/fine-tuning/list
     object: str = "list"
-    """The object type, which is always "list". This indicates that the returned data is a list of fine-tuning jobs."""
-    
+    """The object type, which is always "list".
+
+    This indicates that the returned data is a list of fine-tuning jobs.
+    """
+
     data: List[FineTuningJob]
     """A list containing FineTuningJob objects."""
 
     has_more: bool
     """Indicates whether there are more fine-tuning jobs beyond the current list.
-    
+
     If true, additional requests can be made to retrieve more jobs.
     """
-
-
diff --git a/comps/finetuning/finetuning.py b/comps/finetuning/finetuning.py
index 7796bdf7e..8e79e5642 100644
--- a/comps/finetuning/finetuning.py
+++ b/comps/finetuning/finetuning.py
@@ -16,19 +16,36 @@
 def create_finetuning_jobs(request: FineTuningJobsRequest):
     return handle_create_finetuning_jobs(request)
 
-@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001, methods=["GET"])
+
+@register_microservice(
+    name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs", host="0.0.0.0", port=8001, methods=["GET"]
+)
 def list_finetuning_jobs(request: FineTuningJobListRequest):
     return handle_list_finetuning_jobs(request)
- 
-@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/{fine_tuning_job_id}", host="0.0.0.0", port=8001, methods=["GET"])
+
+
+@register_microservice(
+    name="opea_service@finetuning",
+    endpoint="/v1/fine_tuning/jobs/{fine_tuning_job_id}",
+    host="0.0.0.0",
+    port=8001,
+    methods=["GET"],
+)
 def retrieve_finetuning_job(request: FineTuningJobIDRequest):
     job = handle_retrieve_finetuning_job(request)
     return job
 
-@register_microservice(name="opea_service@finetuning", endpoint="/v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel", host="0.0.0.0", port=8001)
+
+@register_microservice(
+    name="opea_service@finetuning",
+    endpoint="/v1/fine_tuning/jobs/{fine_tuning_job_id}/cancel",
+    host="0.0.0.0",
+    port=8001,
+)
 def cancel_finetuning_job(request: FineTuningJobIDRequest):
     job = handle_cancel_finetuning_job(request)
     return job
 
+
 if __name__ == "__main__":
     opea_microservices["opea_service@finetuning"].start()
diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py
index e3b6a4c27..7721b52bc 100644
--- a/comps/finetuning/handlers.py
+++ b/comps/finetuning/handlers.py
@@ -12,7 +12,12 @@
 from pydantic_yaml import parse_yaml_raw_as, to_yaml_file
 from ray.job_submission import JobSubmissionClient
 
-from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobIDRequest, FineTuningJobList, FineTuningJobsRequest
+from comps.cores.proto.api_protocol import (
+    FineTuningJob,
+    FineTuningJobIDRequest,
+    FineTuningJobList,
+    FineTuningJobsRequest,
+)
 
 FineTuningJobID = str
 running_finetuning_jobs: Dict[FineTuningJobID, FineTuningJob] = {}
@@ -121,6 +126,8 @@ def handle_cancel_finetuning_job(request: FineTuningJobIDRequest):
         job.status = "cancelled"
     else:
         # If the job is not running, return a message indicating it cannot be cancelled
-        raise HTTPException(status_code=400, detail=f"Job with ID '{fine_tuning_job_id}' is not running and cannot be cancelled.")
-    
-    return job
\ No newline at end of file
+        raise HTTPException(
+            status_code=400, detail=f"Job with ID '{fine_tuning_job_id}' is not running and cannot be cancelled."
+        )
+
+    return job

From 09bea718a7fea38ffd5d4572c48e67fdc45b2ca6 Mon Sep 17 00:00:00 2001
From: WenjiaoYue <wenjiao.yue@intel.com>
Date: Mon, 19 Aug 2024 14:12:32 +0800
Subject: [PATCH 9/9] Update api_protocol.py

---
 comps/cores/proto/api_protocol.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py
index c02e29f48..1a1901d5d 100644
--- a/comps/cores/proto/api_protocol.py
+++ b/comps/cores/proto/api_protocol.py
@@ -652,7 +652,8 @@ class FineTuningJob(BaseModel):
 
 class FineTuningJobIDRequest(BaseModel):
     # Ordered by official OpenAI API documentation
-    # https://platform.openai.com/docs/api-reference/fine-tuning/list
+    # https://platform.openai.com/docs/api-reference/fine-tuning/retrieve
+    # https://platform.openai.com/docs/api-reference/fine-tuning/cancel
     fine_tuning_job_id: str
     """The ID of the fine-tuning job."""