ci

kyryl-opens-ml · Sep 14, 2024 · 757bb68 · 757bb68
1 parent e928821
commit 757bb68
Show file tree

Hide file tree

Showing 25 changed files with 268 additions and 609 deletions.
diff --git a/.github/workflows/module-5.yaml b/.github/workflows/module-5.yaml
@@ -59,4 +59,4 @@ jobs:
           context: module-5/
           push: true
           target: app-kserve
-          tags: ghcr.io/kyryl-opens-ml/app-kserve:latest          
+          tags: ghcr.io/kyryl-opens-ml/app-kserve:latest
diff --git a/.github/workflows/module-6.yaml b/.github/workflows/module-6.yaml
@@ -1,42 +1,29 @@
 name: Module 6
 
 on:
-  workflow_dispatch:
+  push:
+    branches:
+      - main
+
+  pull_request:
+    branches:
+      - main
+    # paths:
+    #   - 'module-6/**'
 
 jobs:
-  build:
+  docker-builds:
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
     steps:
-      - name: Checkout 
-        uses: actions/checkout@v2
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v1
-        with:
-          username: ${{ secrets.DOCKER_HUB_USERNAME }}
-          password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
-
-      - name: Build app fastapi
-        uses: docker/build-push-action@v2
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
         with:
-          context: week-6/
-          file: week-6/Dockerfile
-          push: true
-          target: app-fastapi
-          tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi-week-6:latest
-          cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi-week-6:buildcache
-          cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi-week-6:buildcache,mode=max
-
-      # - name: Build app seldon
-      #   uses: docker/build-push-action@v2
-      #   with:
-      #     context: week-5/
-      #     file: week-5/Dockerfile
-      #     push: true
-      #     target: app-seldon
-      #     tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:latest
-      #     cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache
-      #     cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache,mode=max
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
diff --git a/module-5/README.md b/module-5/README.md
@@ -34,10 +34,6 @@ k9s -A
 
 ```
 export WANDB_API_KEY='your key here'
-```
-
-
-```
 kubectl create secret generic wandb --from-literal=WANDB_API_KEY=$WANDB_API_KEY
 ```
 
@@ -119,45 +115,3 @@ Call API
 ```
 curl -v -H "Host: custom-model.default.example.com" -H "Content-Type: application/json" "http://localhost:8080/v1/models/custom-model:predict" -d @data-samples/kserve-input.json
 ```
-
-
-# Seldon V2 
-
-```
-git clone https://github.com/SeldonIO/seldon-core --branch=v2
-```
-
-
-# Seldon V1
-
-
-## Install with helm
-
-```
-kubectl apply -f https://github.com/datawire/ambassador-operator/releases/latest/download/ambassador-operator-crds.yaml
-kubectl apply -n ambassador -f https://github.com/datawire/ambassador-operator/releases/latest/download/ambassador-operator-kind.yaml
-kubectl wait --timeout=180s -n ambassador --for=condition=deployed ambassadorinstallations/ambassador
-
-kubectl create namespace seldon-system
-
-helm install seldon-core seldon-core-operator --version 1.15.1 --repo https://storage.googleapis.com/seldon-charts --set usageMetrics.enabled=true --set ambassador.enabled=true  --namespace seldon-system
-```
-
-## Port forward 
-
-```
-kubectl port-forward  --address 0.0.0.0 -n ambassador svc/ambassador 7777:80
-```
-
-
-## Custom example
-```
-kubectl create -f k8s/seldon-custom.yaml
-
-open http://IP:7777/seldon/default/nlp-sample/api/v1.0/doc/#/
-{ "data": { "ndarray": ["this is an example"] } }
-
-
-curl -X POST "http://IP:7777/seldon/default/nlp-sample/api/v1.0/predictions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"data\":{\"ndarray\":[\"this is an example\"]}}"
-
-```
diff --git a/module-5/k8s/app-triton.yaml b/module-5/k8s/app-triton.yaml
@@ -15,7 +15,7 @@ spec:
     spec:
       containers:
         - name: app-triton
-          image: ghcr.io/kyryl-opens-ml/app-triton:latest
+          image: ghcr.io/kyryl-opens-ml/app-pytriton:latest
           env:
           - name: WANDB_API_KEY
             valueFrom:

diff --git a/module-6/.gitignore b/module-6/.gitignore
@@ -0,0 +1 @@
+node_modules
diff --git a/module-6/README.md b/module-6/README.md
@@ -14,53 +14,81 @@
 
 
 
-# Setup 
+# Setup
 
-Create kind cluster 
+Create kind cluster
 
+```bash
+kind create cluster --name ml-in-production
 ```
-export WANDB_API_KEY="cb86168a2e8db7edb905da69307450f5e7867d66"
-kind create cluster --name ml-in-production-course-week-6
-kubectl create secret generic wandb --from-literal=WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
+
+Run k9s
+
+```bash
+k9s -A
 ```
 
-Run k9s 
+
+# Setup 
+
 
 ```
-k9s -A
+export WANDB_API_KEY='your key here'
+kubectl create secret generic wandb --from-literal=WANDB_API_KEY=$WANDB_API_KEY
 ```
 
 
-# Load test 
+# Benchmarking
+
+NOTE: **Premature optimization is the root of all evil!**
 
-Deploy API 
+Deploy API from module 5
+
+```
+kubectl create -f ./k8s/app-fastapi.yaml
+kubectl create -f ./k8s/app-triton.yaml
+kubectl create -f ./k8s/app-streamlit.yaml
+kubectl create -f ./k8s/kserve-inferenceserver.yaml
+```
 
 ```
-kubectl create -f ./k8s/fastapi-app.yaml
 kubectl port-forward --address 0.0.0.0 svc/app-fastapi 8080:8080
+kubectl port-forward --address 0.0.0.0 svc/app-streamlit 8080:8080
 ```
 
-Run test 
+Run load test via locust
 
 ```
-locust -f load-testing/locustfile.py --host=http://app-fastapi.default.svc.cluster.local:8080 --users 50 --spawn-rate 10 --autostart --run-time 600s
+locust -f load-testing/locustfile.py --host=http://0.0.0.0:8080 --users 50 --spawn-rate 10 --autostart --run-time 600s
 ```
 
-Run on k8s 
+Run load test via k6
+
+```
+K6_WEB_DASHBOARD=true k6 run ./load-testing/load_test.js
+```
 
+Run on k8s 
 
 ```
-kubectl create -f ./k8s/fastapi-locust.yaml
-kubectl port-forward --address 0.0.0.0 pod/load-fastapi-naive 8089:8089
+kubectl create -f ./k8s/vegeta-job.yaml
 ```
 
 - https://github.com/locustio/locust
 - https://github.com/grafana/k6
 - https://github.com/gatling/gatling
+- https://ghz.sh/
+- https://github.com/tsenart/vegeta
+
 
-# HPA
+# Vertical scaling
 
+- https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler
+- https://docs.railway.app/reference/scaling 
 
+# Horizontal scaling
+
+- https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
 
 Install metric server 
 
@@ -69,6 +97,13 @@ kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/late
 kubectl patch -n kube-system deployment metrics-server --type=json -p '[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--kubelet-insecure-tls"}]'
 ```
 
+Update deployment 
+
+```
+kubectl apply -f k8s/app-fastapi-resources.yaml
+```
+
+
 Create from cli
 
 ```
@@ -84,65 +119,38 @@ kubectl create -f ./k8s/fastapi-hpa.yaml
 
 - https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/
 - https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/
+- https://kserve.github.io/website/master/modelserving/autoscaling/autoscaling/
 
 
-# Async inferece 
-
-## Install KServe
-
-Install kserve
+KNative autoscaling: https://kserve.github.io/website/master/modelserving/autoscaling/autoscaling/
 
 ```
-curl -s "https://raw.githubusercontent.com/kserve/kserve/release-0.10/hack/quick_install.sh" | bash
-```
-
-## Test single model 
-
+kubectl create -f ./k8s/kserve-inferenceserver-autoscaling.yaml
 ```
-kubectl create namespace kserve-test
-kubectl create -n kserve-test -f ./k8s/kserve-iris.yaml
-kubectl get inferenceservices sklearn-iris -n kserve-test
-kubectl get svc istio-ingressgateway -n istio-system
-
-kubectl port-forward --address 0.0.0.0 svc/istio-ingressgateway -n istio-system 8080:80
 
-```
 
 ```
-curl -v -H "Host: sklearn-iris.kserve-test.example.com" "http://0.0.0.0:8080/v1/models/sklearn-iris:predict" -d @data/iris-input.json
+seq 1 1000 | xargs -n1 -P10 -I {} curl -v -H "Host: custom-model-autoscaling.default.example.com" \
+-H "Content-Type: application/json" \
+"http://localhost:8080/v1/models/custom-model:predict" \
+-d @data-samples/kserve-input.json
 ```
 
-
-```
-kubectl create -f load-testing/perf.yaml -n kserve-test
-```
-
-
-## Test custom model 
+# Async inferece 
 
 
-Run locally 
+Simple example 
 
 ```
-docker build -t kyrylprojector/kserve-custom:latest -f Dockerfile --target app-kserve .
-docker build -t kyrylprojector/kserve-custom:latest -f Dockerfile --target app-kserve . && docker push kyrylprojector/kserve-custom:latest
-
-docker run -e PORT=8080 -e WANDB_API_KEY=******* -p 8080:8080 kyrylprojector/kserve-custom:latest 
-
-
-curl localhost:8080/v1/models/kserve-custom:predict -d @data/text-input.json
+modal deploy ./queue/simple_queue.py
+python queue/simple_queue.py
 ```
 
-Run on k8s 
 
-```
-kubectl apply -f k8s/kserve-custom.yaml
+Seldon V2 Examples: https://docs.seldon.io/projects/seldon-core/en/v2/contents/architecture/index.html
+SQS: https://github.com/poundifdef/smoothmq 
 
-kubectl port-forward --namespace istio-system svc/istio-ingressgateway 8080:80
-curl -v -H "Host: custom-model.default.example.com" "http://0.0.0.0:8080/v1/models/kserve-custom:predict" -d @data/text-input.json
-```
 
-- https://kserve.github.io/website/0.10/modelserving/v1beta1/custom/custom_model/#implement-custom-model-using-kserve-api
 
 
 ## Kafka
@@ -152,8 +160,12 @@ Install kafka
 
 ```
 helm repo add bitnami https://charts.bitnami.com/bitnami
-helm install zookeeper bitnami/zookeeper --set replicaCount=1 --set auth.enabled=false --set allowAnonymousLogin=true --set persistance.enabled=false --version 11.0.0
-helm install kafka bitnami/kafka --set zookeeper.enabled=false --set replicaCount=1 --set persistance.enabled=false --set logPersistance.enabled=false --set externalZookeeper.servers=zookeeper-headless.default.svc.cluster.local --version 21.0.0
+helm install zookeeper bitnami/zookeeper --set replicaCount=1 --set auth.enabled=false --set allowAnonymousLogin=true \
+  --set persistance.enabled=false --version 11.0.0
+helm install kafka bitnami/kafka --set zookeeper.enabled=false --set replicaCount=1 --set persistance.enabled=false \
+  --set logPersistance.enabled=false --set externalZookeeper.servers=zookeeper-headless.default.svc.cluster.local \
+  --version 21.0.0
+
 ```
 
 Install eventing
@@ -224,7 +236,6 @@ mc cp data/text-input.json myminio/input
 - https://github.com/huggingface/transformers/tree/main/examples/research_projects/distillation
 - https://github.com/huggingface/distil-whisper/
 
-
 - https://github.com/intel/neural-compressor
 - https://github.com/neuralmagic/sparseml
 

diff --git a/module-6/data/iris-input.json b/module-6/data/iris-input.json
diff --git a/module-6/data/text-input.json b/module-6/data/text-input.json
diff --git a/module-6/k8s/fastapi-hpa.yaml → module-6/k8s/app-fastapi-hpa.yaml b/module-6/k8s/fastapi-hpa.yaml → module-6/k8s/app-fastapi-hpa.yaml