diff --git a/manifests/FineTuning/README.md b/manifests/FineTuning/README.md new file mode 100644 index 00000000..d5433276 --- /dev/null +++ b/manifests/FineTuning/README.md @@ -0,0 +1,34 @@ +# Deploy the KubeRay operator with the Helm chart + +```bash +helm repo add kuberay https://ray-project.github.io/kuberay-helm/ +helm repo update + +# Install both CRDs and KubeRay operator v1.1.1. +helm install kuberay-operator kuberay/kuberay-operator --version 1.1.1 + +# Confirm that the operator is running in the namespace `default`. +kubectl get pods +# NAME READY STATUS RESTARTS AGE +# kuberay-operator-7fbdbf8c89-pt8bk 1/1 Running 0 27s +``` + +# Deploy On Xeon + +```bash +cd GenAIInfra/manifests/FineTuning/xeon +kubectl apply -f finetuning-ray-cluster-autoscaler.yaml +kubectl apply -f finetuning.yaml +``` + +# Deploy On Gaudi + +TBD + +# Verify LLM Fine-tuning Service + +Make sure all the pods are running. + +```bash +kubectl get pods +``` diff --git a/manifests/FineTuning/gaudi/.gitkeep b/manifests/FineTuning/gaudi/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/manifests/FineTuning/xeon/finetuning-ray-cluster-autoscaler.yaml b/manifests/FineTuning/xeon/finetuning-ray-cluster-autoscaler.yaml new file mode 100644 index 00000000..da46ab80 --- /dev/null +++ b/manifests/FineTuning/xeon/finetuning-ray-cluster-autoscaler.yaml @@ -0,0 +1,140 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: ray.io/v1 +kind: RayCluster +metadata: + name: raycluster-autoscaler +spec: + # The version of Ray you are using. Make sure all Ray containers are running this version of Ray. + rayVersion: '2.23.0' + # If `enableInTreeAutoscaling` is true, the Autoscaler sidecar will be added to the Ray head pod. + # Ray Autoscaler integration is Beta with KubeRay >= 0.3.0 and Ray >= 2.0.0. + enableInTreeAutoscaling: true + # `autoscalerOptions` is an OPTIONAL field specifying configuration overrides for the Ray Autoscaler. + # The example configuration shown below below represents the DEFAULT values. + # (You may delete autoscalerOptions if the defaults are suitable.) + autoscalerOptions: + # `upscalingMode` is "Default" or "Aggressive." + # Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster. + # Default: Upscaling is not rate-limited. + # Aggressive: An alias for Default; upscaling is not rate-limited. + upscalingMode: Default + # `idleTimeoutSeconds` is the number of seconds to wait before scaling down a worker pod which is not using Ray resources. + idleTimeoutSeconds: 60 + # `image` optionally overrides the Autoscaler's container image. The Autoscaler uses the same image as the Ray container by default. + ## image: "my-repo/my-custom-autoscaler-image:tag" + # `imagePullPolicy` optionally overrides the Autoscaler container's default image pull policy (IfNotPresent). + image: opea-finetune:latest + imagePullPolicy: IfNotPresent + # Optionally specify the Autoscaler container's securityContext. + securityContext: {} + env: [] + envFrom: [] + # resources specifies optional resource request and limit overrides for the Autoscaler container. + # The default Autoscaler resource limits and requests should be sufficient for production use-cases. + # However, for large Ray clusters, we recommend monitoring container resource usage to determine if overriding the defaults is required. + resources: + limits: + cpu: "500m" + memory: "512Mi" + requests: + cpu: "500m" + memory: "512Mi" + # Ray head pod template + headGroupSpec: + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. + rayStartParams: + # Setting "num-cpus: 0" to avoid any Ray actors or tasks being scheduled on the Ray head Pod. + num-cpus: "0" + # Use `resources` to optionally specify custom resource annotations for the Ray node. + # The value of `resources` is a string-integer mapping. + # Currently, `resources` must be provided in the specific format demonstrated below: + # resources: '"{\"Custom1\": 1, \"Custom2\": 5}"' + # Pod template + template: + spec: + containers: + # The Ray head container + - name: ray-head + image: opea-finetune:latest + imagePullPolicy: IfNotPresent + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + lifecycle: + preStop: + exec: + command: ["/bin/sh","-c","ray stop"] + resources: + limits: + cpu: "1" + memory: "2G" + requests: + cpu: "1" + memory: "2G" + volumeMounts: + - name: k8s-data + mountPath: /root/k8s-data + - name: hf-cache + mountPath: /root/.cache/huggingface/ + volumes: + - name: k8s-data + hostPath: + path: /root/k8s-data + - name: hf-cache + hostPath: + path: /root/.cache/huggingface/ + + workerGroupSpecs: + # the Pod replicas in this group typed worker + - replicas: 0 + minReplicas: 0 + maxReplicas: 10 + # logical group name, for this called small-group, also can be functional + groupName: small-group + # If worker pods need to be added, Ray Autoscaler can increment the `replicas`. + # If worker pods need to be removed, Ray Autoscaler decrements the replicas, and populates the `workersToDelete` list. + # KubeRay operator will remove Pods from the list until the desired number of replicas is satisfied. + #scaleStrategy: + # workersToDelete: + # - raycluster-complete-worker-small-group-bdtwh + # - raycluster-complete-worker-small-group-hv457 + # - raycluster-complete-worker-small-group-k8tj7 + rayStartParams: {} + # Pod template + template: + spec: + containers: + - name: ray-worker + image: opea-finetune:latest + imagePullPolicy: IfNotPresent + lifecycle: + preStop: + exec: + command: ["/bin/sh","-c","ray stop"] + resources: + limits: + cpu: "35" + memory: "32G" + requests: + cpu: "35" + memory: "32G" + volumeMounts: + - name: k8s-data + mountPath: /root/k8s-data + - name: hf-cache + mountPath: /root/.cache/huggingface/ + volumes: + - name: k8s-data + hostPath: + path: /root/k8s-data + - name: hf-cache + hostPath: + path: /root/.cache/huggingface/ diff --git a/manifests/FineTuning/xeon/finetuning.yaml b/manifests/FineTuning/xeon/finetuning.yaml new file mode 100644 index 00000000..b6e29b7c --- /dev/null +++ b/manifests/FineTuning/xeon/finetuning.yaml @@ -0,0 +1,36 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: opea-finetune + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + service: opea-finetune + template: + metadata: + labels: + service: opea-finetune + spec: + containers: + - name: opea-finetune + image: opea-finetune:latest + imagePullPolicy: IfNotPresent +--- +apiVersion: v1 +kind: Service +metadata: + name: opea-finetune-entrypoint + namespace: default +spec: + type: NodePort + selector: + service: opea-finetune + ports: + - port: 8000 + targetPort: 8000 + nodePort: 30001