Skip to content
This repository has been archived by the owner on May 10, 2024. It is now read-only.

Commit

Permalink
pin fastchat version
Browse files Browse the repository at this point in the history
fix serving mounts for finetuned models

fix tests for fine tune serving

fix comma
  • Loading branch information
asaiacai committed Oct 27, 2023
1 parent 37a765a commit 4d94979
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 23 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/pytest-smoke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ jobs:
python-version: ["3.10"]
test-path:
- tests/test_cli.py::test_hf_serve
- tests/test_cli.py::test_llmatc_serve
- tests/test_launch.py
- tests/test_runtracker.py
- tests/test_serve.py
Expand Down Expand Up @@ -52,4 +53,5 @@ jobs:
name: error-log
path: |
/tmp/serve_huggingface-*.log
/tmp/serve_llmatc-*.log
/home/runner/sky_logs/sky-*/*.log
17 changes: 13 additions & 4 deletions docs/source/quickstart/serving.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ Deployment
----------

Model deployments are referenced by their HuggingFace modelhub name. Finetuned models trained through LLM-ATC are referenced
by using the :code:`llm-atc/` prefix.
by using the :code:`--name llm-atc`.

.. code-block:: console
# serve an llm-atc finetuned model, requires `llm-atc/` prefix and grabs model checkpoint from object store
$ llm-atc serve --name llm-atc/myvicuna --source s3://my-bucket/my_vicuna/ --accelerator A100:1 -c servecluster --cloud gcp --region asia-southeast1 --envs "HF_TOKEN=<HuggingFace_token>"
# serve an llm-atc finetuned model, requires source `llm-atc/` prefix and grabs model checkpoint from object store
$ llm-atc serve --name llm-atc --source s3://my-bucket/my_vicuna/ --accelerator A100:1 -c servecluster --cloud gcp --region asia-southeast1 --envs "HF_TOKEN=<HuggingFace_token>"
# serve a HuggingFace model, e.g. `lmsys/vicuna-13b-v1.3`
$ llm-atc serve --name lmsys/vicuna-13b-v1.3 --accelerator A100:1 -c servecluster --cloud gcp --region asia-southeast1 --envs "HF_TOKEN=<HuggingFace_token>"
Expand All @@ -33,11 +33,20 @@ from your laptop.
.. code-block:: console
# get the ip address of the OpenAI API endpoint
$ ip=$(grep -A1 "Host servecluster" ~/.ssh/config | grep "HostName" | awk '{print $2}')
$ ip=$(sky status --ip servecluster)
# test which models are available
$ curl http://$ip:8000/v1/models
# chat completion
$ curl http://$ip:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-model",
"messages": [{"role": "user", "content": "Hello! What is your name?"}]
}'
# shutdown when done
$ sky stop servecluster
Expand Down
12 changes: 0 additions & 12 deletions llm_atc/config/serve/serve.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,6 @@ setup: |
pip install git+https://github.com/huggingface/transformers.git
sudo apt update
sudo apt install -y rclone
# copy files from object store onto disk
if [[ $MODEL_NAME == llm-atc/* ]];
then
CHECKPOINT="/$MODEL_NAME/"
LOCAL_CHKPT="./$MODEL_NAME/"
mkdir -p $LOCAL_CHKPT
rclone sync --progress --exclude "train*" $CHECKPOINT $LOCAL_CHKPT
fi
run: |
master_addr=`echo "$SKYPILOT_NODE_IPS" | head -n1`
Expand Down
1 change: 1 addition & 0 deletions llm_atc/config/train/vicuna.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ setup: |
pip install torch==2.0.1 --extra-index-url https://download.pytorch.org/whl/cu116
git clone https://github.com/lm-sys/FastChat.git
cd FastChat
git checkout cbf285360e8e809a316c88a8377c1bb0f0c770bc
pip install -e .
if [ $USE_FLASH_ATTN -eq 1 ]; then
pip install packaging
Expand Down
10 changes: 4 additions & 6 deletions llm_atc/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ def serve_route(model_name: str, source: Optional[str] = None, **serve_kwargs):
raise ValueError(
"Attempting to use a finetuned model without a corresponding object store location"
)
elif not source is None and not model_name.startswith("llm-atc/"):
logging.warning(
"Specified object store mount but model is not an llm-atc model. Skipping mounting."
)
return Serve(model_name, source, **serve_kwargs).serve()


Expand Down Expand Up @@ -69,6 +65,10 @@ def default_serve_task(self) -> sky.Task:
def serve(self) -> sky.Task:
"""Deploy fastchat.serve.openai_api_server with vllm_worker"""
serve_task = self.default_serve_task
if self.source and self.names == "llm-atc":
logging.info(f"Using a fine tuned model at {self.source}")
serve_task.update_file_mounts({"/llm-atc": self.source})
self.names = "/llm-atc"
self.envs["MODEL_NAME"] = self.names
if "HF_TOKEN" not in self.envs:
logging.warning(
Expand All @@ -80,6 +80,4 @@ def serve(self) -> sky.Task:
resource._cloud = sky.clouds.CLOUD_REGISTRY.from_str(self.cloud)
resource._set_region_zone(self.region, self.zone)
serve_task.set_resources(resource)
if self.source and self.names.startswith("llm-atc/"):
serve_task.update_file_mounts({"/" + self.names: self.source})
return serve_task
24 changes: 23 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,29 @@ def test_hf_serve():
+ """awk '{print $2}'); echo $ip; curl http://"$ip":8000/v1/models | grep vicuna""",
],
f"sky stop -y {name} ; sleep 300 ; sky down --purge -y {name}",
timeout=45 * 60,
timeout=30 * 60,
)
run_one_test(test)


@pytest.mark.cli
def test_llmatc_serve():
"""
Tests serving a llm-atc fine tuned model
"""

name = "test_fine_tune"
ssh_config = os.path.expanduser("~/.ssh/config")
test = Test(
"serve_llmatc",
[
f"llm-atc serve --detach_run --name llm-atc --source s3://my-trainy-bucket/mymistral --accelerator V100:1 -c {name} --cloud aws --region us-east-2",
"sleep 300",
f"""ip=$(grep -A1 "Host {name}" {ssh_config} | grep "HostName" | """
+ """awk '{print $2}'); echo $ip; curl http://"$ip":8000/v1/models | grep llm-atc""",
],
f"sky stop -y {name} ; sleep 300 ; sky down --purge -y {name}",
timeout=30 * 60,
)
run_one_test(test)

Expand Down

0 comments on commit 4d94979

Please sign in to comment.