add openai client

kyryl-opens-ml · Sep 18, 2024 · d0561b6 · d0561b6
1 parent acd587e
commit d0561b6
Show file tree

Hide file tree

Showing 3 changed files with 50 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -162,4 +162,5 @@ taskfile.yml
 TODO.md
 README.p.md
 .DS_Store
-README.private.md
+README.private.md
+vllm
diff --git a/module-5/README.md b/module-5/README.md
@@ -13,7 +13,6 @@
 ***
 
 
-
 # Setup
 
 Create kind cluster
@@ -50,7 +49,7 @@ Deploy k8s:
 
 ```
 kubectl create -f k8s/app-streamlit.yaml
-kubectl port-forward --address 0.0.0.0 svc/app-streamlit 8081:8080
+kubectl port-forward --address 0.0.0.0 svc/app-streamlit 8080:8080
 ```
 
 # Fast API
@@ -65,7 +64,7 @@ Deploy k8s:
 
 ```
 kubectl create -f k8s/app-fastapi.yaml
-kubectl port-forward --address 0.0.0.0 svc/app-fastapi 8081:8080
+kubectl port-forward --address 0.0.0.0 svc/app-fastapi 8080:8080
 ```
 
 
@@ -160,6 +159,8 @@ Create deployment
 ```
 kubectl create -f ./k8s/vllm-inference.yaml
 kubectl port-forward --address 0.0.0.0 svc/app-vllm 8000:8000
+kubectl logs <POD> -c model-loader
+kubectl logs <POD> -c app-vllm
 ```
 
 

diff --git a/module-5/k8s/vllm-inference.yaml b/module-5/k8s/vllm-inference.yaml
@@ -52,6 +52,50 @@ spec:
           volumeMounts:
             - name: vllm-storage
               mountPath: /vllm-storage
+
+        - name: model-loader
+          image: ghcr.io/kyryl-opens-ml/app-fastapi:latest
+          env:
+            - name: WANDB_API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: wandb
+                  key: WANDB_API_KEY
+          command: ["/bin/sh", "-c"]
+          args:
+            - |
+              echo "Model Loader: Waiting for vllm server to be available on port 8000..."
+              while ! curl -s http://localhost:8000/health >/dev/null; do
+                echo "Model Loader: vllm server not available yet. Retrying in 5 seconds..."
+                sleep 5
+              done
+              echo "Model Loader: vllm server is now available. Starting model loading..."
+              
+              # Execute the required Python commands
+              python serving-llm/client.py load-from-registry truskovskiyk/ml-in-production-practice/modal_generative_example:latest sql-default-model
+              if [ $? -ne 0 ]; then
+                echo "Model Loader: Failed to load model from registry."
+                exit 1
+              fi
+              
+              python serving-llm/client.py load-adapter sql-default-model ./sql-default-model
+              if [ $? -ne 0 ]; then
+                echo "Model Loader: Failed to load adapter."
+                exit 1
+              fi
+              
+              echo "Model Loader: Model loading completed successfully."
+          volumeMounts:
+            - name: vllm-storage
+              mountPath: /vllm-storage
+          resources:
+            limits:
+              cpu: "500m"
+              memory: "512Mi"
+            requests:
+              cpu: "250m"
+              memory: "256Mi"
+
       volumes:
         - name: vllm-storage
           persistentVolumeClaim: