cortexlabs · miguelvr · Jul 12, 2021 · Jul 12, 2021 · Jul 15, 2021 · Jul 20, 2021
diff --git a/pkg/config/config.go b/pkg/config/config.go
@@ -24,6 +24,7 @@ import (
 	"github.com/DataDog/datadog-go/statsd"
 	"github.com/cortexlabs/cortex/pkg/consts"
 	batch "github.com/cortexlabs/cortex/pkg/crds/apis/batch/v1alpha1"
+	serverless "github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1"
 	"github.com/cortexlabs/cortex/pkg/lib/aws"
 	cr "github.com/cortexlabs/cortex/pkg/lib/configreader"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
@@ -55,6 +56,7 @@ var (
 func init() {
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
 	utilruntime.Must(batch.AddToScheme(scheme))
+	utilruntime.Must(serverless.AddToScheme(scheme))
 }
 
 func InitConfigs(clusterConfig *clusterconfig.Config, operatorMetadata *clusterconfig.OperatorMetadata) {

diff --git a/pkg/crds/PROJECT b/pkg/crds/PROJECT
@@ -3,7 +3,7 @@ layout:
 - go.kubebuilder.io/v3
 multigroup: true
 projectName: operator
-repo: github.com/cortexlabs/cortex
+repo: github.com/cortexlabs/cortex/pkg/crds
 resources:
 - api:
     crdVersion: v1
@@ -14,4 +14,13 @@ resources:
   kind: BatchJob
   path: github.com/cortexlabs/cortex/pkg/crds/apis/batch/v1alpha1
   version: v1alpha1
+- api:
+    crdVersion: v1
+    namespaced: true
+  controller: true
+  domain: cortex.dev
+  group: serverless
+  kind: RealtimeAPI
+  path: github.com/cortexlabs/cortex/pkg/crds/apis/serverless/v1alpha1
+  version: v1alpha1
 version: "3"
diff --git a/pkg/crds/apis/serverless/v1alpha1/groupversion_info.go b/pkg/crds/apis/serverless/v1alpha1/groupversion_info.go
@@ -0,0 +1,36 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package v1alpha1 contains API Schema definitions for the api v1alpha1 API group
+//+kubebuilder:object:generate=true
+//+groupName=serverless.cortex.dev
+package v1alpha1
+
+import (
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"sigs.k8s.io/controller-runtime/pkg/scheme"
+)
+
+var (
+	// GroupVersion is group version used to register these objects
+	GroupVersion = schema.GroupVersion{Group: "serverless.cortex.dev", Version: "v1alpha1"}
+
+	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
+	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
+
+	// AddToScheme adds the types in this group-version to the given scheme.
+	AddToScheme = SchemeBuilder.AddToScheme
+)
diff --git a/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go b/pkg/crds/apis/serverless/v1alpha1/realtimeapi_types.go
@@ -0,0 +1,246 @@
+/*
+Copyright 2021 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package v1alpha1
+
+import (
+	"github.com/cortexlabs/cortex/pkg/types/status"
+	kcore "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/intstr"
+)
+
+// RealtimeAPISpec defines the desired state of RealtimeAPI
+type RealtimeAPISpec struct {
+	// Pod configuration
+	// +kubebuilder:validation:Required
+	Pod PodSpec `json:"pod"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default={"min_replicas": 1}
+	// Autoscaling configuration
+	Autoscaling AutoscalingSpec `json:"autoscaling"`
+
+	// +kubebuilder:validation:Optional
+	// List of node groups on which this API can run (default: all node groups are eligible)
+	NodeGroups []string `json:"node_groups"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default={"max_surge": "25%", "max_unavailable": "25%"}
+	// Deployment strategy to use when replacing existing replicas with new ones
+	UpdateStrategy UpdateStrategySpec `json:"update_strategy"`
+
+	// +kubebuilder:validation:Required
+	// Networking configuration
+	Networking NetworkingSpec `json:"networking"`
+}
+
+type PodSpec struct {
+	// +kubebuilder:validation:Required
+	// +kubebuilder:default=8080
+	// Port to which requests will be sent to
+	Port int32 `json:"port"`
+
+	// +kubebuilder:validation:Required
+	// +kubebuilder:default=1
+	// Maximum number of requests that will be concurrently sent into the container
+	MaxConcurrency int32 `json:"max_concurrency"`
+
+	// +kubebuilder:validation:Required
+	// +kubebuilder:default=100
+	// Maximum number of requests per replica which will be queued
+	// (beyond max_concurrency) before requests are rejected with error code 503
+	MaxQueueLength int32 `json:"max_queue_length"`
+
+	// +kubebuilder:validation:Required
+	// +kubebuilder:default=1
+	// Number of desired replicas
+	Replicas int32 `json:"replicas"`
+
+	// +kubebuilder:validation:Required
+	// Configurations for the containers to run
+	Containers []ContainerSpec `json:"containers"`
+}
+
+type ContainerSpec struct {
+	// +kubebuilder:validation:Required
+	// Name of the container
+	Name string `json:"name"`
+
+	// +kubebuilder:validation:Required
+	// Docker image to use for the container
+	Image string `json:"image"`
+
+	// +kubebuilder:validation:Optional
+	// Entrypoint (not executed within a shell)
+	Command []string `json:"command,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Arguments to the entrypoint
+	Args []string `json:"args,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Environment variables to set in the container
+	Env []kcore.EnvVar `json:"env,omitempty"`
+
+	// Compute resource requests
+	Compute *ComputeSpec `json:"compute,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Periodic probe of container readiness;
+	// traffic will not be sent into the pod unless all containers' readiness probes are succeeding
+	ReadinessProbe *kcore.Probe `json:"readiness_probe,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Periodic probe of container liveness; container will be restarted if the probe fails
+	LivenessProbe *kcore.Probe `json:"liveness_probe,omitempty"`
+}
+
+type ComputeSpec struct {
+	// +kubebuilder:validation:Optional
+	// CPU request for the container; one unit of CPU corresponds to one virtual CPU;
+	// fractional requests are allowed, and can be specified as a floating point number or via the "m" suffix
+	CPU *resource.Quantity `json:"cpu,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// GPU request for the container; one unit of GPU corresponds to one virtual GPU
+	GPU int64 `json:"gpu,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Inferentia request for the container; one unit of Inf corresponds to one virtual Inf chip
+	Inf int64 `json:"inf,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Memory request for the container;
+	// one unit of memory is one byte and can be expressed as an integer or by using one of these suffixes: K, M, G, T
+	// (or their power-of two counterparts: Ki, Mi, Gi, Ti)
+	Mem *resource.Quantity `json:"mem,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Size of shared memory (/dev/shm) for sharing data between multiple processes
+	Shm *resource.Quantity `json:"shm,omitempty"`
+}
+
+type AutoscalingSpec struct {
+	// +kubebuilder:default=1
+	// Minimum number of replicas
+	MinReplicas int32 `json:"min_replicas,omitempty"`
+
+	// +kubebuilder:default=100
+	// Maximum number of replicas
+	MaxReplicas int32 `json:"max_replicas,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// Desired number of in-flight requests per replica (including requests actively being processed as well as queued),
+	// which the autoscaler tries to maintain
+	TargetInFlight string `json:"target_in_flight,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="60s"
+	// Duration over which to average the API's in-flight requests per replica
+	Window kmeta.Duration `json:"window,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="5m"
+	// The API will not scale below the highest recommendation made during this period
+	DownscaleStabilizationPeriod kmeta.Duration `json:"downscale_stabilization_period,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="1m"
+	// The API will not scale above the lowest recommendation made during this period
+	UpscaleStabilizationPeriod kmeta.Duration `json:"upscale_stabilization_period,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="0.75"
+	// Maximum factor by which to scale down the API on a single scaling event
+	MaxDownscaleFactor string `json:"max_downscale_factor,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="1.5"
+	// Maximum factor by which to scale up the API on a single scaling event
+	MaxUpscaleFactor string `json:"max_upscale_factor,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="0.5"
+	// Any recommendation falling within this factor below the current number of replicas will not trigger a
+	// scale down event
+	DownscaleTolerance string `json:"downscale_tolerance,omitempty"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="0.5"
+	// Any recommendation falling within this factor above the current number of replicas will not trigger a scale up event
+	UpscaleTolerance string `json:"upscale_tolerance,omitempty"`
+}
+
+type UpdateStrategySpec struct {
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="25%"
+	// Maximum number of replicas that can be scheduled above the desired number of replicas during an update;
+	// can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
+	// (set to 0 to disable rolling updates)
+	MaxSurge intstr.IntOrString `json:"max_surge"`
+
+	// +kubebuilder:validation:Optional
+	// +kubebuilder:default="25%"
+	// maximum number of replicas that can be unavailable during an update; can be an absolute number,
+	// e.g. 5, or a percentage of desired replicas, e.g. 10%
+	MaxUnavailable intstr.IntOrString `json:"max_unavailable"`
+}
+
+type NetworkingSpec struct {
+	// +kubebuilder:validation:Optional
+	// Endpoint for the API
+	Endpoint string `json:"endpoint,omitempty"`
+}
+
+// RealtimeAPIStatus defines the observed state of RealtimeAPI
+type RealtimeAPIStatus struct {
+	// +kubebuilder:validation:Type=string
+	Status        status.Code          `json:"status"`
+	ReplicaCounts status.ReplicaCounts `json:"replica_counts"`
+	Endpoint      string               `json:"endpoint,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+//+kubebuilder:subresource:status
+//+kubebuilder:printcolumn:JSONPath=".spec.pod.replicas",name="Replicas",type="integer"
+//+kubebuilder:printcolumn:JSONPath=".status.replica_counts.updated.ready",name="Ready",type="integer"
+//+kubebuilder:printcolumn:JSONPath=".status.status",name="Status",type="string"
+//+kubebuilder:printcolumn:JSONPath=".status.endpoint",name="Endpoint",type="string"
+
+// RealtimeAPI is the Schema for the realtimeapis API
+type RealtimeAPI struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	Spec   RealtimeAPISpec   `json:"spec,omitempty"`
+	Status RealtimeAPIStatus `json:"status,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+
+// RealtimeAPIList contains a list of RealtimeAPI
+type RealtimeAPIList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []RealtimeAPI `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&RealtimeAPI{}, &RealtimeAPIList{})
+}