Skip to content

Commit

Permalink
Merge branch 'main' into kserve-0.11
Browse files Browse the repository at this point in the history
  • Loading branch information
Arief Rahmansyah committed Sep 5, 2023
2 parents fa90df5 + 62d30af commit 3e862c7
Show file tree
Hide file tree
Showing 38 changed files with 1,412 additions and 515 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/codesee-arch-diagram.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# This workflow was added by CodeSee. Learn more at https://codesee.io/
# This is v2.0 of this workflow file
on:
push:
branches:
- main
pull_request_target:
types: [opened, synchronize, reopened]

name: CodeSee

permissions: read-all

jobs:
codesee:
runs-on: ubuntu-latest
continue-on-error: true
name: Analyze the repo with CodeSee
steps:
- uses: Codesee-io/codesee-action@v2
with:
codesee-token: ${{ secrets.CODESEE_ARCH_DIAG_API_TOKEN }}
codesee-url: https://app.codesee.io
6 changes: 5 additions & 1 deletion api/api/version_endpoints_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,9 +392,13 @@ func validateUpdateRequest(prev *models.VersionEndpoint, new *models.VersionEndp
return fmt.Errorf("Updating environment is not allowed, previous: %s, new: %s", prev.EnvironmentName, new.EnvironmentName)
}

if prev.Status == models.EndpointPending {
return fmt.Errorf("Updating endpoint status to %s is not allowed when the endpoint is currently in the pending state", new.Status)
}

if new.Status != prev.Status {
if prev.Status == models.EndpointServing {
return fmt.Errorf("Updating endpoint status to %s is not allowed when the endpoint is in serving state", new.Status)
return fmt.Errorf("Updating endpoint status to %s is not allowed when the endpoint is currently in the serving state", new.Status)
}

if new.Status != models.EndpointRunning && new.Status != models.EndpointTerminated {
Expand Down
125 changes: 119 additions & 6 deletions api/api/version_endpoints_api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3394,7 +3394,7 @@ func TestUpdateEndpoint(t *testing.T) {
ID: uuid,
VersionID: models.ID(1),
VersionModelID: models.ID(1),
Status: models.EndpointPending,
Status: models.EndpointRunning,
ServiceName: "sample",
InferenceServiceName: "sample",
Namespace: "sample",
Expand Down Expand Up @@ -3648,7 +3648,120 @@ func TestUpdateEndpoint(t *testing.T) {
},
expected: &Response{
code: http.StatusBadRequest,
data: Error{Message: "Error validating request: Updating endpoint status to running is not allowed when the endpoint is in serving state"},
data: Error{Message: "Error validating request: Updating endpoint status to running is not allowed when the endpoint is currently in the serving state"},
},
},
{
desc: "Should 400 if endpoint status is in the pending state",
vars: map[string]string{
"model_id": "1",
"version_id": "1",
"endpoint_id": uuid.String(),
},
requestBody: &models.VersionEndpoint{
ID: uuid,
VersionID: models.ID(1),
VersionModelID: models.ID(1),
Status: models.EndpointRunning,
ServiceName: "sample",
Namespace: "sample",
EnvironmentName: "dev",
Message: "",
ResourceRequest: &models.ResourceRequest{
MinReplica: 1,
MaxReplica: 4,
CPURequest: resource.MustParse("1"),
MemoryRequest: resource.MustParse("1Gi"),
},
EnvVars: models.EnvVars([]models.EnvVar{
{
Name: "WORKER",
Value: "1",
},
}),
},
modelService: func() *mocks.ModelsService {
svc := &mocks.ModelsService{}
svc.On("FindByID", context.Background(), models.ID(1)).Return(&models.Model{
ID: models.ID(1),
Name: "model-1",
ProjectID: models.ID(1),
Project: mlp.Project{},
ExperimentID: 1,
Type: "pyfunc",
MlflowURL: "",
Endpoints: nil,
}, nil)
return svc
},
versionService: func() *mocks.VersionsService {
svc := &mocks.VersionsService{}
svc.On("FindByID", context.Background(), models.ID(1), models.ID(1), mock.Anything).Return(&models.Version{
ID: models.ID(1),
ModelID: models.ID(1),
Model: &models.Model{
ID: models.ID(1),
Name: "model-1",
ProjectID: models.ID(1),
Project: mlp.Project{},
ExperimentID: 1,
Type: "pyfunc",
MlflowURL: "",
Endpoints: nil,
},
}, nil)
return svc
},
envService: func() *mocks.EnvironmentService {
svc := &mocks.EnvironmentService{}
svc.On("GetEnvironment", "dev").Return(&models.Environment{
ID: models.ID(1),
Name: "dev",
Cluster: "dev",
IsDefault: &trueBoolean,
Region: "id",
GcpProject: "dev-proj",
MaxCPU: "1",
MaxMemory: "1Gi",
}, nil)
return svc
},
endpointService: func() *mocks.EndpointsService {
svc := &mocks.EndpointsService{}
svc.On("FindByID", context.Background(), uuid).Return(&models.VersionEndpoint{
ID: uuid,
VersionID: models.ID(1),
VersionModelID: models.ID(1),
Status: models.EndpointPending,
ServiceName: "sample",
InferenceServiceName: "sample",
Namespace: "sample",
URL: "http://endpoint.svc",
MonitoringURL: "http://monitoring.com",
Environment: &models.Environment{
ID: models.ID(1),
Name: "dev",
Cluster: "dev",
IsDefault: &trueBoolean,
Region: "id",
GcpProject: "dev-proj",
MaxCPU: "1",
MaxMemory: "1Gi",
}, EnvironmentName: "dev",
Message: "",
ResourceRequest: nil,
EnvVars: models.EnvVars([]models.EnvVar{
{
Name: "WORKER",
Value: "1",
},
}),
}, nil)
return svc
},
expected: &Response{
code: http.StatusBadRequest,
data: Error{Message: "Error validating request: Updating endpoint status to running is not allowed when the endpoint is currently in the pending state"},
},
},
{
Expand Down Expand Up @@ -3949,7 +4062,7 @@ func TestUpdateEndpoint(t *testing.T) {
ID: uuid,
VersionID: models.ID(1),
VersionModelID: models.ID(1),
Status: models.EndpointPending,
Status: models.EndpointRunning,
ServiceName: "sample",
InferenceServiceName: "sample",
Namespace: "sample",
Expand Down Expand Up @@ -4062,7 +4175,7 @@ func TestUpdateEndpoint(t *testing.T) {
ID: uuid,
VersionID: models.ID(1),
VersionModelID: models.ID(1),
Status: models.EndpointPending,
Status: models.EndpointRunning,
ServiceName: "sample",
InferenceServiceName: "sample",
Namespace: "sample",
Expand Down Expand Up @@ -4239,7 +4352,7 @@ func TestUpdateEndpoint(t *testing.T) {
ID: uuid,
VersionID: models.ID(1),
VersionModelID: models.ID(1),
Status: models.EndpointPending,
Status: models.EndpointRunning,
ServiceName: "sample",
InferenceServiceName: "sample",
Namespace: "sample",
Expand Down Expand Up @@ -4927,7 +5040,7 @@ func TestUpdateEndpoint(t *testing.T) {
},
expected: &Response{
code: http.StatusBadRequest,
data: Error{Message: "Changing deployment type of a pending model is not allowed, please terminate it first."},
data: Error{Message: "Error validating request: Updating endpoint status to running is not allowed when the endpoint is currently in the pending state"},
},
},
}
Expand Down
2 changes: 1 addition & 1 deletion api/client/model_environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ type Environment struct {
DefaultResourceRequest *ResourceRequest `json:"default_resource_request,omitempty"`
DefaultTransformerResourceRequest *ResourceRequest `json:"default_transformer_resource_request,omitempty"`
DefaultPredictionJobResourceRequest *PredictionJobResourceRequest `json:"default_prediction_job_resource_request,omitempty"`
Gpus []Gpu `json:"gpus,omitempty"`
Gpus []GpuConfig `json:"gpus,omitempty"`
CreatedAt time.Time `json:"created_at,omitempty"`
UpdatedAt time.Time `json:"updated_at,omitempty"`
}
17 changes: 0 additions & 17 deletions api/client/model_gpu.go

This file was deleted.

19 changes: 19 additions & 0 deletions api/client/model_gpu_config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* Merlin
*
* API Guide for accessing Merlin's model management, deployment, and serving functionalities
*
* API version: 0.14.0
* Generated by: Swagger Codegen (https://github.com/swagger-api/swagger-codegen.git)
*/
package client

type GpuConfig struct {
Name string `json:"name,omitempty"`
Values []string `json:"values,omitempty"`
ResourceType string `json:"resource_type,omitempty"`
NodeSelector map[string]string `json:"node_selector,omitempty"`
Tolerations []GpuToleration `json:"tolerations,omitempty"`
MinMonthlyCostPerGpu float64 `json:"min_monthly_cost_per_gpu,omitempty"`
MaxMonthlyCostPerGpu float64 `json:"max_monthly_cost_per_gpu,omitempty"`
}
17 changes: 17 additions & 0 deletions api/client/model_gpu_toleration.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
* Merlin
*
* API Guide for accessing Merlin's model management, deployment, and serving functionalities
*
* API version: 0.14.0
* Generated by: Swagger Codegen (https://github.com/swagger-api/swagger-codegen.git)
*/
package client

type GpuToleration struct {
Key string `json:"key,omitempty"`
Operator string `json:"operator,omitempty"`
Value string `json:"value,omitempty"`
Effect string `json:"effect,omitempty"`
TolerationSeconds int64 `json:"toleration_seconds,omitempty"`
}
13 changes: 6 additions & 7 deletions api/client/model_resource_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@
package client

type ResourceRequest struct {
MinReplica int32 `json:"min_replica,omitempty"`
MaxReplica int32 `json:"max_replica,omitempty"`
CpuRequest string `json:"cpu_request,omitempty"`
MemoryRequest string `json:"memory_request,omitempty"`
GpuResourceType string `json:"gpu_resource_type,omitempty"`
GpuRequest string `json:"gpu_request,omitempty"`
GpuNodeSelector map[string]string `json:"gpu_node_selector,omitempty"`
MinReplica int32 `json:"min_replica,omitempty"`
MaxReplica int32 `json:"max_replica,omitempty"`
CpuRequest string `json:"cpu_request,omitempty"`
MemoryRequest string `json:"memory_request,omitempty"`
GpuName string `json:"gpu_name,omitempty"`
GpuRequest string `json:"gpu_request,omitempty"`
}
39 changes: 27 additions & 12 deletions api/cluster/resource/templater.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,16 +256,23 @@ func createPredictorSpec(modelService *models.Service, config *config.Deployment
}

nodeSelector := map[string]string{}
if !modelService.ResourceRequest.GPURequest.IsZero() {
// Declare and initialize resourceType and resourceQuantity variables
resourceType := corev1.ResourceName(modelService.ResourceRequest.GPUResourceType)
resourceQuantity := modelService.ResourceRequest.GPURequest

// Set the resourceType as the key in the maps, with resourceQuantity as the value
resources.Requests[resourceType] = resourceQuantity
resources.Limits[resourceType] = resourceQuantity

nodeSelector = modelService.ResourceRequest.GPUNodeSelector
tolerations := []corev1.Toleration{}
if modelService.ResourceRequest.GPUName != "" && !modelService.ResourceRequest.GPURequest.IsZero() {
// Look up to the GPU resource type and quantity from DeploymentConfig
for _, gpuConfig := range config.GPUs {
if gpuConfig.Name == modelService.ResourceRequest.GPUName {
// Declare and initialize resourceType and resourceQuantity variables
resourceType := corev1.ResourceName(gpuConfig.ResourceType)
resourceQuantity := modelService.ResourceRequest.GPURequest

// Set the resourceType as the key in the maps, with resourceQuantity as the value
resources.Requests[resourceType] = resourceQuantity
resources.Limits[resourceType] = resourceQuantity

nodeSelector = gpuConfig.NodeSelector
tolerations = gpuConfig.Tolerations
}
}
}

// liveness probe config. if env var to disable != true or not set, it will default to enabled
Expand Down Expand Up @@ -360,13 +367,17 @@ func createPredictorSpec(modelService *models.Service, config *config.Deployment
},
}
case models.ModelTypeCustom:
predictorSpec = createCustomPredictorSpec(modelService, resources, nodeSelector)
predictorSpec = createCustomPredictorSpec(modelService, resources, nodeSelector, tolerations)
}

if len(nodeSelector) > 0 {
predictorSpec.NodeSelector = nodeSelector
}

if len(tolerations) > 0 {
predictorSpec.Tolerations = tolerations
}

var loggerSpec *kservev1beta1.LoggerSpec
if modelService.Logger != nil && modelService.Logger.Model != nil && modelService.Logger.Model.Enabled {
logger := modelService.Logger
Expand Down Expand Up @@ -802,7 +813,7 @@ func createDefaultPredictorEnvVars(modelService *models.Service) models.EnvVars
return defaultEnvVars
}

func createCustomPredictorSpec(modelService *models.Service, resources corev1.ResourceRequirements, nodeSelector map[string]string) kservev1beta1.PredictorSpec {
func createCustomPredictorSpec(modelService *models.Service, resources corev1.ResourceRequirements, nodeSelector map[string]string, tolerations []corev1.Toleration) kservev1beta1.PredictorSpec {
envVars := modelService.EnvVars

// Add default env var (Overwrite by user not allowed)
Expand Down Expand Up @@ -846,6 +857,10 @@ func createCustomPredictorSpec(modelService *models.Service, resources corev1.Re
spec.NodeSelector = nodeSelector
}

if len(tolerations) > 0 {
spec.Tolerations = tolerations
}

return spec
}

Expand Down
Loading

0 comments on commit 3e862c7

Please sign in to comment.