Merge branch 'main' into kserve-0.11

caraml-dev · Sep 5, 2023 · 3e862c7 · 3e862c7
2 parents fa90df5 + 62d30af
commit 3e862c7
Show file tree

Hide file tree

Showing 38 changed files with 1,412 additions and 515 deletions.
diff --git a/.github/workflows/codesee-arch-diagram.yml b/.github/workflows/codesee-arch-diagram.yml
@@ -0,0 +1,23 @@
+# This workflow was added by CodeSee. Learn more at https://codesee.io/
+# This is v2.0 of this workflow file
+on:
+  push:
+    branches:
+      - main
+  pull_request_target:
+    types: [opened, synchronize, reopened]
+
+name: CodeSee
+
+permissions: read-all
+
+jobs:
+  codesee:
+    runs-on: ubuntu-latest
+    continue-on-error: true
+    name: Analyze the repo with CodeSee
+    steps:
+      - uses: Codesee-io/codesee-action@v2
+        with:
+          codesee-token: ${{ secrets.CODESEE_ARCH_DIAG_API_TOKEN }}
+          codesee-url: https://app.codesee.io
diff --git a/api/api/version_endpoints_api.go b/api/api/version_endpoints_api.go
@@ -392,9 +392,13 @@ func validateUpdateRequest(prev *models.VersionEndpoint, new *models.VersionEndp
 		return fmt.Errorf("Updating environment is not allowed, previous: %s, new: %s", prev.EnvironmentName, new.EnvironmentName)
 	}
 
+	if prev.Status == models.EndpointPending {
+		return fmt.Errorf("Updating endpoint status to %s is not allowed when the endpoint is currently in the pending state", new.Status)
+	}
+
 	if new.Status != prev.Status {
 		if prev.Status == models.EndpointServing {
-			return fmt.Errorf("Updating endpoint status to %s is not allowed when the endpoint is in serving state", new.Status)
+			return fmt.Errorf("Updating endpoint status to %s is not allowed when the endpoint is currently in the serving state", new.Status)
 		}
 
 		if new.Status != models.EndpointRunning && new.Status != models.EndpointTerminated {

diff --git a/api/api/version_endpoints_api_test.go b/api/api/version_endpoints_api_test.go
@@ -3394,7 +3394,7 @@ func TestUpdateEndpoint(t *testing.T) {
 					ID:                   uuid,
 					VersionID:            models.ID(1),
 					VersionModelID:       models.ID(1),
-					Status:               models.EndpointPending,
+					Status:               models.EndpointRunning,
 					ServiceName:          "sample",
 					InferenceServiceName: "sample",
 					Namespace:            "sample",
@@ -3648,7 +3648,120 @@ func TestUpdateEndpoint(t *testing.T) {
 			},
 			expected: &Response{
 				code: http.StatusBadRequest,
-				data: Error{Message: "Error validating request: Updating endpoint status to running is not allowed when the endpoint is in serving state"},
+				data: Error{Message: "Error validating request: Updating endpoint status to running is not allowed when the endpoint is currently in the serving state"},
+			},
+		},
+		{
+			desc: "Should 400 if endpoint status is in the pending state",
+			vars: map[string]string{
+				"model_id":    "1",
+				"version_id":  "1",
+				"endpoint_id": uuid.String(),
+			},
+			requestBody: &models.VersionEndpoint{
+				ID:              uuid,
+				VersionID:       models.ID(1),
+				VersionModelID:  models.ID(1),
+				Status:          models.EndpointRunning,
+				ServiceName:     "sample",
+				Namespace:       "sample",
+				EnvironmentName: "dev",
+				Message:         "",
+				ResourceRequest: &models.ResourceRequest{
+					MinReplica:    1,
+					MaxReplica:    4,
+					CPURequest:    resource.MustParse("1"),
+					MemoryRequest: resource.MustParse("1Gi"),
+				},
+				EnvVars: models.EnvVars([]models.EnvVar{
+					{
+						Name:  "WORKER",
+						Value: "1",
+					},
+				}),
+			},
+			modelService: func() *mocks.ModelsService {
+				svc := &mocks.ModelsService{}
+				svc.On("FindByID", context.Background(), models.ID(1)).Return(&models.Model{
+					ID:           models.ID(1),
+					Name:         "model-1",
+					ProjectID:    models.ID(1),
+					Project:      mlp.Project{},
+					ExperimentID: 1,
+					Type:         "pyfunc",
+					MlflowURL:    "",
+					Endpoints:    nil,
+				}, nil)
+				return svc
+			},
+			versionService: func() *mocks.VersionsService {
+				svc := &mocks.VersionsService{}
+				svc.On("FindByID", context.Background(), models.ID(1), models.ID(1), mock.Anything).Return(&models.Version{
+					ID:      models.ID(1),
+					ModelID: models.ID(1),
+					Model: &models.Model{
+						ID:           models.ID(1),
+						Name:         "model-1",
+						ProjectID:    models.ID(1),
+						Project:      mlp.Project{},
+						ExperimentID: 1,
+						Type:         "pyfunc",
+						MlflowURL:    "",
+						Endpoints:    nil,
+					},
+				}, nil)
+				return svc
+			},
+			envService: func() *mocks.EnvironmentService {
+				svc := &mocks.EnvironmentService{}
+				svc.On("GetEnvironment", "dev").Return(&models.Environment{
+					ID:         models.ID(1),
+					Name:       "dev",
+					Cluster:    "dev",
+					IsDefault:  &trueBoolean,
+					Region:     "id",
+					GcpProject: "dev-proj",
+					MaxCPU:     "1",
+					MaxMemory:  "1Gi",
+				}, nil)
+				return svc
+			},
+			endpointService: func() *mocks.EndpointsService {
+				svc := &mocks.EndpointsService{}
+				svc.On("FindByID", context.Background(), uuid).Return(&models.VersionEndpoint{
+					ID:                   uuid,
+					VersionID:            models.ID(1),
+					VersionModelID:       models.ID(1),
+					Status:               models.EndpointPending,
+					ServiceName:          "sample",
+					InferenceServiceName: "sample",
+					Namespace:            "sample",
+					URL:                  "http://endpoint.svc",
+					MonitoringURL:        "http://monitoring.com",
+					Environment: &models.Environment{
+						ID:         models.ID(1),
+						Name:       "dev",
+						Cluster:    "dev",
+						IsDefault:  &trueBoolean,
+						Region:     "id",
+						GcpProject: "dev-proj",
+						MaxCPU:     "1",
+						MaxMemory:  "1Gi",
+					}, EnvironmentName: "dev",
+					Message:         "",
+					ResourceRequest: nil,
+					EnvVars: models.EnvVars([]models.EnvVar{
+						{
+							Name:  "WORKER",
+							Value: "1",
+						},
+					}),
+				}, nil)
+				return svc
+			},
+			expected: &Response{
+				code: http.StatusBadRequest,
+				data: Error{Message: "Error validating request: Updating endpoint status to running is not allowed when the endpoint is currently in the pending state"},
 			},
 		},
 		{
@@ -3949,7 +4062,7 @@ func TestUpdateEndpoint(t *testing.T) {
 					ID:                   uuid,
 					VersionID:            models.ID(1),
 					VersionModelID:       models.ID(1),
-					Status:               models.EndpointPending,
+					Status:               models.EndpointRunning,
 					ServiceName:          "sample",
 					InferenceServiceName: "sample",
 					Namespace:            "sample",
@@ -4062,7 +4175,7 @@ func TestUpdateEndpoint(t *testing.T) {
 					ID:                   uuid,
 					VersionID:            models.ID(1),
 					VersionModelID:       models.ID(1),
-					Status:               models.EndpointPending,
+					Status:               models.EndpointRunning,
 					ServiceName:          "sample",
 					InferenceServiceName: "sample",
 					Namespace:            "sample",
@@ -4239,7 +4352,7 @@ func TestUpdateEndpoint(t *testing.T) {
 					ID:                   uuid,
 					VersionID:            models.ID(1),
 					VersionModelID:       models.ID(1),
-					Status:               models.EndpointPending,
+					Status:               models.EndpointRunning,
 					ServiceName:          "sample",
 					InferenceServiceName: "sample",
 					Namespace:            "sample",
@@ -4927,7 +5040,7 @@ func TestUpdateEndpoint(t *testing.T) {
 			},
 			expected: &Response{
 				code: http.StatusBadRequest,
-				data: Error{Message: "Changing deployment type of a pending model is not allowed, please terminate it first."},
+				data: Error{Message: "Error validating request: Updating endpoint status to running is not allowed when the endpoint is currently in the pending state"},
 			},
 		},
 	}

diff --git a/api/client/model_environment.go b/api/client/model_environment.go
@@ -22,7 +22,7 @@ type Environment struct {
 	DefaultResourceRequest              *ResourceRequest              `json:"default_resource_request,omitempty"`
 	DefaultTransformerResourceRequest   *ResourceRequest              `json:"default_transformer_resource_request,omitempty"`
 	DefaultPredictionJobResourceRequest *PredictionJobResourceRequest `json:"default_prediction_job_resource_request,omitempty"`
-	Gpus                                []Gpu                         `json:"gpus,omitempty"`
+	Gpus                                []GpuConfig                   `json:"gpus,omitempty"`
 	CreatedAt                           time.Time                     `json:"created_at,omitempty"`
 	UpdatedAt                           time.Time                     `json:"updated_at,omitempty"`
 }
diff --git a/api/client/model_gpu.go b/api/client/model_gpu.go
diff --git a/api/client/model_gpu_config.go b/api/client/model_gpu_config.go
@@ -0,0 +1,19 @@
+/*
+ * Merlin
+ *
+ * API Guide for accessing Merlin's model management, deployment, and serving functionalities
+ *
+ * API version: 0.14.0
+ * Generated by: Swagger Codegen (https://github.com/swagger-api/swagger-codegen.git)
+ */
+package client
+
+type GpuConfig struct {
+	Name                 string            `json:"name,omitempty"`
+	Values               []string          `json:"values,omitempty"`
+	ResourceType         string            `json:"resource_type,omitempty"`
+	NodeSelector         map[string]string `json:"node_selector,omitempty"`
+	Tolerations          []GpuToleration   `json:"tolerations,omitempty"`
+	MinMonthlyCostPerGpu float64           `json:"min_monthly_cost_per_gpu,omitempty"`
+	MaxMonthlyCostPerGpu float64           `json:"max_monthly_cost_per_gpu,omitempty"`
+}
diff --git a/api/client/model_gpu_toleration.go b/api/client/model_gpu_toleration.go
@@ -0,0 +1,17 @@
+/*
+ * Merlin
+ *
+ * API Guide for accessing Merlin's model management, deployment, and serving functionalities
+ *
+ * API version: 0.14.0
+ * Generated by: Swagger Codegen (https://github.com/swagger-api/swagger-codegen.git)
+ */
+package client
+
+type GpuToleration struct {
+	Key               string `json:"key,omitempty"`
+	Operator          string `json:"operator,omitempty"`
+	Value             string `json:"value,omitempty"`
+	Effect            string `json:"effect,omitempty"`
+	TolerationSeconds int64  `json:"toleration_seconds,omitempty"`
+}
diff --git a/api/client/model_resource_request.go b/api/client/model_resource_request.go
@@ -9,11 +9,10 @@
 package client
 
 type ResourceRequest struct {
-	MinReplica      int32             `json:"min_replica,omitempty"`
-	MaxReplica      int32             `json:"max_replica,omitempty"`
-	CpuRequest      string            `json:"cpu_request,omitempty"`
-	MemoryRequest   string            `json:"memory_request,omitempty"`
-	GpuResourceType string            `json:"gpu_resource_type,omitempty"`
-	GpuRequest      string            `json:"gpu_request,omitempty"`
-	GpuNodeSelector map[string]string `json:"gpu_node_selector,omitempty"`
+	MinReplica    int32  `json:"min_replica,omitempty"`
+	MaxReplica    int32  `json:"max_replica,omitempty"`
+	CpuRequest    string `json:"cpu_request,omitempty"`
+	MemoryRequest string `json:"memory_request,omitempty"`
+	GpuName       string `json:"gpu_name,omitempty"`
+	GpuRequest    string `json:"gpu_request,omitempty"`
 }
diff --git a/api/cluster/resource/templater.go b/api/cluster/resource/templater.go
@@ -256,16 +256,23 @@ func createPredictorSpec(modelService *models.Service, config *config.Deployment
 	}
 
 	nodeSelector := map[string]string{}
-	if !modelService.ResourceRequest.GPURequest.IsZero() {
-		// Declare and initialize resourceType and resourceQuantity variables
-		resourceType := corev1.ResourceName(modelService.ResourceRequest.GPUResourceType)
-		resourceQuantity := modelService.ResourceRequest.GPURequest
-
-		// Set the resourceType as the key in the maps, with resourceQuantity as the value
-		resources.Requests[resourceType] = resourceQuantity
-		resources.Limits[resourceType] = resourceQuantity
-
-		nodeSelector = modelService.ResourceRequest.GPUNodeSelector
+	tolerations := []corev1.Toleration{}
+	if modelService.ResourceRequest.GPUName != "" && !modelService.ResourceRequest.GPURequest.IsZero() {
+		// Look up to the GPU resource type and quantity from DeploymentConfig
+		for _, gpuConfig := range config.GPUs {
+			if gpuConfig.Name == modelService.ResourceRequest.GPUName {
+				// Declare and initialize resourceType and resourceQuantity variables
+				resourceType := corev1.ResourceName(gpuConfig.ResourceType)
+				resourceQuantity := modelService.ResourceRequest.GPURequest
+
+				// Set the resourceType as the key in the maps, with resourceQuantity as the value
+				resources.Requests[resourceType] = resourceQuantity
+				resources.Limits[resourceType] = resourceQuantity
+
+				nodeSelector = gpuConfig.NodeSelector
+				tolerations = gpuConfig.Tolerations
+			}
+		}
 	}
 
 	// liveness probe config. if env var to disable != true or not set, it will default to enabled
@@ -360,13 +367,17 @@ func createPredictorSpec(modelService *models.Service, config *config.Deployment
 			},
 		}
 	case models.ModelTypeCustom:
-		predictorSpec = createCustomPredictorSpec(modelService, resources, nodeSelector)
+		predictorSpec = createCustomPredictorSpec(modelService, resources, nodeSelector, tolerations)
 	}
 
 	if len(nodeSelector) > 0 {
 		predictorSpec.NodeSelector = nodeSelector
 	}
 
+	if len(tolerations) > 0 {
+		predictorSpec.Tolerations = tolerations
+	}
+
 	var loggerSpec *kservev1beta1.LoggerSpec
 	if modelService.Logger != nil && modelService.Logger.Model != nil && modelService.Logger.Model.Enabled {
 		logger := modelService.Logger
@@ -802,7 +813,7 @@ func createDefaultPredictorEnvVars(modelService *models.Service) models.EnvVars
 	return defaultEnvVars
 }
 
-func createCustomPredictorSpec(modelService *models.Service, resources corev1.ResourceRequirements, nodeSelector map[string]string) kservev1beta1.PredictorSpec {
+func createCustomPredictorSpec(modelService *models.Service, resources corev1.ResourceRequirements, nodeSelector map[string]string, tolerations []corev1.Toleration) kservev1beta1.PredictorSpec {
 	envVars := modelService.EnvVars
 
 	// Add default env var (Overwrite by user not allowed)
@@ -846,6 +857,10 @@ func createCustomPredictorSpec(modelService *models.Service, resources corev1.Re
 		spec.NodeSelector = nodeSelector
 	}
 
+	if len(tolerations) > 0 {
+		spec.Tolerations = tolerations
+	}
+
 	return spec
 }