Skip to content

Commit

Permalink
Module 8 webinar (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
truskovskiyk authored Oct 5, 2024
1 parent 1949a2c commit 237079d
Show file tree
Hide file tree
Showing 9 changed files with 441 additions and 0 deletions.
66 changes: 66 additions & 0 deletions module-8/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,69 @@
# Reference implementation

***

## Buy vs Build

![alt text](./docs/build-vs-buy.png)

## AWS example

- Infra: AWS
- Data: S3 + RDS
- Experiments: EC2 + [SageMaker Processing Jobs](https://sagemaker-examples.readthedocs.io/en/latest/sagemaker_processing/scikit_learn_data_processing_and_model_evaluation/scikit_learn_data_processing_and_model_evaluation.html)
- Pipelines: [MWAA](https://aws.amazon.com/managed-workflows-for-apache-airflow/)
- Basic deployment: [SageMaker Inference Toolkit](https://github.com/aws/sagemaker-inference-toolkit)
- Advance deployment: [Asynchronous inference](https://docs.aws.amazon.com/sagemaker/latest/dg/async-inference.html) + [Multi-model endpoints](https://docs.aws.amazon.com/sagemaker/latest/dg/multi-model-endpoints.html)
- Monitoring: [SageMaker Model Monitor](https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor.html)


## SageMaker Advance Multi-model endpoints

![alt text](./docs/mme.png)

Create

```
python cli.py create-endpoint
```

Add

```
python cli.py add-model ./model_registry/add_sub/ add_sub_v0.tar.gz
python cli.py add-model ./model_registry/triton-serve-pt/ triton-serve-pt_v0.tar.gz
```

Note: make sure you have model_registry: https://github.com/triton-inference-server/python_backend


Check

```
aws s3 ls s3://sagemaker-us-east-1-469651751916/models/
```

Call

```
python cli.py call-model-image triton-serve-pt_v0.tar.gz
python cli.py call-model-vector add_sub_kyryl-dev.tar.gz
```



Clean all
```
bash ./sagemaker-multimodal/clean.sh
```


## Tech radar

```
python -m http.server 9000
```

- https://github.com/zalando/tech-radar
- https://tech-radar.preply.com/

Binary file added module-8/docs/build-vs-buy.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added module-8/docs/mme.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions module-8/sagemaker-multimodal/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_registry
*.tar.gz
21 changes: 21 additions & 0 deletions module-8/sagemaker-multimodal/clean.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

# Step 1: Delete Endpoints
for endpoint in $(aws sagemaker list-endpoints --query "Endpoints[*].EndpointName" --output text); do
aws sagemaker delete-endpoint --endpoint-name $endpoint
echo "Deleted endpoint: $endpoint"
done

# Step 2: Delete Endpoint Configurations
for endpoint_config in $(aws sagemaker list-endpoint-configs --query "EndpointConfigs[*].EndpointConfigName" --output text); do
aws sagemaker delete-endpoint-config --endpoint-config-name $endpoint_config
echo "Deleted endpoint config: $endpoint_config"
done

# Step 3: Delete Models
for model in $(aws sagemaker list-models --query "Models[*].ModelName" --output text); do
aws sagemaker delete-model --model-name $model
echo "Deleted model: $model"
done

aws s3 rm s3://sagemaker-us-east-1-469651751916 --recursive
133 changes: 133 additions & 0 deletions module-8/sagemaker-multimodal/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import typer

import boto3, json, sagemaker, time
from sagemaker import get_execution_role
import numpy as np
from PIL import Image
import tritonclient.http as httpclient
from pydantic_settings import BaseSettings, SettingsConfigDict
from rich.console import Console
from rich.panel import Panel
import tarfile
import os
from typing import Any


class Settings(BaseSettings):
role: str = "arn:aws:iam::469651751916:role/sagemaker-execution-role"
model_data_url: str = "s3://sagemaker-us-east-1-469651751916/models/"
bucket_name: str = "sagemaker-us-east-1-469651751916"
mme_triton_image_uri: str = '785573368785.dkr.ecr.us-east-1.amazonaws.com/sagemaker-tritonserver:22.07-py3'
model_name: str = "sagemaker-poc"
endpoint_config_name: str = "sagemaker-poc"
endpoint_name: str = "sagemaker-poc"

settings = Settings()
console = Console()

def create_endpoint():
sm_client = boto3.client(service_name="sagemaker")

container = {"Image": settings.mme_triton_image_uri, "ModelDataUrl": settings.model_data_url, "Mode": "MultiModel"}
create_model_response = sm_client.create_model(ModelName=settings.model_name, ExecutionRoleArn=settings.role, PrimaryContainer=container)
console.print(create_model_response)


create_endpoint_config_response = sm_client.create_endpoint_config(
EndpointConfigName=settings.endpoint_config_name,
ProductionVariants=[
{
"InstanceType": "ml.g5.xlarge",
"InitialVariantWeight": 1,
"InitialInstanceCount": 1,
"ModelName": settings.model_name,
"VariantName": "AllTraffic",
}
],
)
console.print(create_endpoint_config_response)
create_endpoint_response = sm_client.create_endpoint(EndpointName=settings.endpoint_name, EndpointConfigName=settings.endpoint_config_name)
console.print(create_endpoint_response)

def add_model(model_directory: str, tarball_name: str):
s3_key = f"models/{tarball_name}"

with tarfile.open(tarball_name, "w:gz") as tar:
tar.add(model_directory, arcname=os.path.basename(model_directory))
console.print(f"Created tarball: {tarball_name}")

s3_client = boto3.client('s3')
s3_client.upload_file(tarball_name, settings.bucket_name, f"models/{tarball_name}") # Use the S3 key here
console.print(f"Uploaded model to: s3://{settings.bucket_name}/{s3_key}")
return f"s3://{settings.bucket_name}/{s3_key}"

def _call_model(target_model: str, payload: Any):
runtime_sm_client = boto3.client("sagemaker-runtime")
response = runtime_sm_client.invoke_endpoint(
EndpointName=settings.endpoint_name,
ContentType="application/octet-stream",
Body=json.dumps(payload),
TargetModel=target_model,
)

response = json.loads(response["Body"].read().decode("utf8"))
output = response["outputs"][0]["data"]

console.print(output)


def call_model_image(target_model: str):

def get_sample_image():
# Generate a random image (224x224 pixels with 3 color channels)
img = np.random.rand(224, 224, 3).astype(np.float32)

# Normalize the image
img = (img - np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(1, 1, 3)) # Subtract mean
img = img / np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(1, 1, 3) # Divide by std

# Transpose the image to (C, H, W)
img = np.transpose(img, (2, 0, 1))
return img.tolist()

pt_payload = {
"inputs": [
{
"name": "INPUT__0",
"shape": [1, 3, 224, 224],
"datatype": "FP32",
"data": get_sample_image(),
}
]
}
_call_model(target_model=target_model, payload=pt_payload)




def call_model_vector(target_model: str):
pt_payload = {
"inputs": [
{
"name": "INPUT0",
"shape": [4],
"datatype": "FP32",
"data": [1, 2, 3, 4],
},
{
"name": "INPUT1",
"shape": [4],
"datatype": "FP32",
"data": [1, 2, 3, 4],
}
]
}
_call_model(target_model=target_model, payload=pt_payload)

if __name__ == "__main__":
app = typer.Typer()
app.command()(create_endpoint)
app.command()(add_model)
app.command()(call_model_image)
app.command()(call_model_vector)
app()
128 changes: 128 additions & 0 deletions module-8/tech-radar/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
{
"date": "2024.10",
"entries": [
{
"quadrant": 2,
"ring": 0,
"label": "Modal",
"link": "https://kyrylai.com/2024/07/29/exploring-gpu-jobs-serverless-solutions-for-efficient-ml-experiments-part-1-2/",
"active": true,
"moved": 0
},
{
"quadrant": 2,
"ring": 0,
"label": "SageMaker Processing",
"link": "https://kyrylai.com/2024/07/29/exploring-gpu-jobs-serverless-solutions-for-efficient-ml-experiments-part-1-2/",
"active": true,
"moved": 0
},
{
"quadrant": 2,
"ring": 3,
"label": "SageMaker Training",
"link": "https://kyrylai.com/2024/07/29/exploring-gpu-jobs-serverless-solutions-for-efficient-ml-experiments-part-1-2/",
"active": true,
"moved": 0
},

{
"quadrant": 2,
"ring": 3,
"label": "VertexAI Training",
"link": "https://kyrylai.com/2024/07/29/exploring-gpu-jobs-serverless-solutions-for-efficient-ml-experiments-part-1-2/",
"active": true,
"moved": 0
},

{
"quadrant": 3,
"ring": 0,
"label": "S3",
"link": "https://kyrylai.com/2024/09/09/remove-complexity-from-your-rag-applications/",
"active": true,
"moved": 0
},

{
"quadrant": 3,
"ring": 0,
"label": "PostgreSQL",
"link": "https://kyrylai.com/2024/09/09/remove-complexity-from-your-rag-applications/",
"active": true,
"moved": 0
},
{
"quadrant": 3,
"ring": 1,
"label": "Argilla",
"link": "https://kyrylai.com/2024/09/09/remove-complexity-from-your-rag-applications/",
"active": true,
"moved": 0
},

{
"quadrant": 3,
"ring": 0,
"label": "Supabase",
"link": "https://kyrylai.com/2024/09/09/remove-complexity-from-your-rag-applications/",
"active": true,
"moved": 0
},

{
"quadrant": 3,
"ring": 2,
"label": "Qdrant",
"link": "https://kyrylai.com/2024/09/09/remove-complexity-from-your-rag-applications/",
"active": true,
"moved": 0
},

{
"quadrant": 3,
"ring": 2,
"label": "Weaviate",
"link": "https://kyrylai.com/2024/09/09/remove-complexity-from-your-rag-applications/",
"active": true,
"moved": 0
},

{
"quadrant": 3,
"ring": 1,
"label": "LanceDB",
"link": "https://kyrylai.com/2024/09/09/remove-complexity-from-your-rag-applications/",
"active": true,
"moved": 0
},

{
"quadrant": 1,
"ring": 0,
"label": "Triton",
"link": "https://kyrylai.com/2024/09/09/remove-complexity-from-your-rag-applications/",
"active": true,
"moved": 0
},

{
"quadrant": 1,
"ring": 3,
"label": "Seldon",
"link": "https://kyrylai.com/2024/09/09/remove-complexity-from-your-rag-applications/",
"active": true,
"moved": 0
},

{
"quadrant": 0,
"ring": 0,
"label": "W&D",
"link": "https://kyrylai.com/2024/09/09/remove-complexity-from-your-rag-applications/",
"active": true,
"moved": 0
}

]
}
Binary file added module-8/tech-radar/favicon.ico
Binary file not shown.
Loading

0 comments on commit 237079d

Please sign in to comment.