From 81ec2f433e12222e6d9a733453ece780947305e2 Mon Sep 17 00:00:00 2001 From: Ben Eggers <64657842+beggers@users.noreply.github.com> Date: Tue, 27 Feb 2024 13:09:52 -0800 Subject: [PATCH] [ENH] Helm chart (#1776) ## Description of changes *Summarize the changes made by this PR.* - New functionality - Basic helm chart for distributed chroma ## Test plan *How are these changes tested?* - [ ] Tests pass locally with `pytest` for python, `yarn test` for js ## Documentation Changes *Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs repository](https://github.com/chroma-core/docs)?* --- Tiltfile | 97 +++-- bin/reset.sh | 13 - k8s/cr/worker_memberlist_cr.yaml | 48 --- k8s/deployment/kubernetes.yaml | 335 ------------------ k8s/dev/setup.yaml | 109 ------ k8s/dev/worker.yaml | 40 --- k8s/distributed-chroma/.helmignore | 23 ++ k8s/distributed-chroma/Chart.yaml | 30 ++ .../crds}/memberlist_crd.yaml | 1 + .../templates}/coordinator.yaml | 39 +- .../templates/frontend-server.yaml} | 16 +- .../templates}/logservice.yaml | 4 +- .../templates}/migration.yaml | 2 +- .../templates/namespace.yaml | 8 + .../templates/pod-watcher-role.yaml | 13 + .../templates}/postgres.yaml | 4 +- .../templates}/pulsar.yaml | 4 +- .../templates/worker.yaml} | 68 ++-- .../templates/worker_memberlist_cr.yaml | 79 +++++ k8s/distributed-chroma/values.yaml | 7 + k8s/test/README.md | 1 + k8s/test/minio.yaml | 32 +- ...server_service.yml => worker_service.yaml} | 6 +- 23 files changed, 338 insertions(+), 641 deletions(-) delete mode 100755 bin/reset.sh delete mode 100644 k8s/cr/worker_memberlist_cr.yaml delete mode 100644 k8s/deployment/kubernetes.yaml delete mode 100644 k8s/dev/setup.yaml delete mode 100644 k8s/dev/worker.yaml create mode 100644 k8s/distributed-chroma/.helmignore create mode 100644 k8s/distributed-chroma/Chart.yaml rename k8s/{crd => distributed-chroma/crds}/memberlist_crd.yaml (92%) rename k8s/{dev => distributed-chroma/templates}/coordinator.yaml (51%) rename k8s/{dev/server.yaml => distributed-chroma/templates/frontend-server.yaml} (86%) rename k8s/{dev => distributed-chroma/templates}/logservice.yaml (89%) rename k8s/{dev => distributed-chroma/templates}/migration.yaml (92%) create mode 100644 k8s/distributed-chroma/templates/namespace.yaml create mode 100644 k8s/distributed-chroma/templates/pod-watcher-role.yaml rename k8s/{dev => distributed-chroma/templates}/postgres.yaml (90%) rename k8s/{dev => distributed-chroma/templates}/pulsar.yaml (93%) rename k8s/{deployment/segment-server.yaml => distributed-chroma/templates/worker.yaml} (57%) create mode 100644 k8s/distributed-chroma/templates/worker_memberlist_cr.yaml create mode 100644 k8s/distributed-chroma/values.yaml create mode 100644 k8s/test/README.md rename k8s/test/{segment_server_service.yml => worker_service.yaml} (64%) diff --git a/Tiltfile b/Tiltfile index 5eae55776b8..b0a1c3ac17b 100644 --- a/Tiltfile +++ b/Tiltfile @@ -1,44 +1,93 @@ update_settings(max_parallel_updates=6) -docker_build('migration', - context='.', - dockerfile='./go/Dockerfile.migration' +docker_build( + 'migration', + context='.', + dockerfile='./go/Dockerfile.migration' ) -docker_build('coordinator', - context='.', - dockerfile='./go/Dockerfile' +docker_build( + 'coordinator', + context='.', + dockerfile='./go/Dockerfile' ) -docker_build('server', - context='.', - dockerfile='./Dockerfile', +docker_build( + 'server', + context='.', + dockerfile='./Dockerfile', ) -docker_build('worker', - context='.', - dockerfile='./rust/worker/Dockerfile' +docker_build( + 'worker', + context='.', + dockerfile='./rust/worker/Dockerfile' ) +k8s_yaml( + helm( + 'k8s/distributed-chroma', + namespace='chroma', + values=[ + 'k8s/distributed-chroma/values.yaml' + ] + ) +) -k8s_yaml(['k8s/dev/setup.yaml']) +# Lots of things assume the cluster is in a basic state. Get it into a basic +# state before deploying anything else. k8s_resource( - objects=['chroma:Namespace', 'memberlist-reader:ClusterRole', 'memberlist-reader:ClusterRoleBinding', 'pod-list-role:Role', 'pod-list-role-binding:RoleBinding', 'memberlists.chroma.cluster:CustomResourceDefinition','worker-memberlist:MemberList', 'test-memberlist:MemberList'], + objects=[ + 'chroma:Namespace', + 'pod-watcher:Role', + 'memberlists.chroma.cluster:CustomResourceDefinition', + 'worker-memberlist:MemberList', + + 'coordinator-serviceaccount:serviceaccount', + 'coordinator-serviceaccount-rolebinding:RoleBinding', + 'coordinator-worker-memberlist-binding:clusterrolebinding', + + 'worker-serviceaccount:serviceaccount', + 'worker-serviceaccount-rolebinding:RoleBinding', + 'worker-memberlist-readerwriter:ClusterRole', + 'worker-worker-memberlist-binding:clusterrolebinding', + 'worker-memberlist-readerwriter-binding:clusterrolebinding', + + 'test-memberlist:MemberList', + 'test-memberlist-reader:ClusterRole', + 'test-memberlist-reader-binding:ClusterRoleBinding', + ], new_name='k8s_setup', labels=["infrastructure"] ) -k8s_yaml(['k8s/dev/pulsar.yaml']) -k8s_resource('pulsar', resource_deps=['k8s_setup'], labels=["infrastructure"]) -k8s_yaml(['k8s/dev/postgres.yaml']) + +# Production Chroma k8s_resource('postgres', resource_deps=['k8s_setup'], labels=["infrastructure"]) -k8s_yaml(['k8s/dev/migration.yaml']) +k8s_resource('pulsar', resource_deps=['k8s_setup'], labels=["infrastructure"], port_forwards=['6650:6650', '8080:8080']) k8s_resource('migration', resource_deps=['postgres'], labels=["chroma"]) -k8s_yaml(['k8s/dev/logservice.yaml']) k8s_resource('logservice', resource_deps=['migration'], labels=["chroma"]) -k8s_resource('pulsar', resource_deps=['k8s_setup'], labels=["infrastructure"], port_forwards=['6650:6650', '8080:8080']) -k8s_yaml(['k8s/dev/server.yaml']) -k8s_resource('server', resource_deps=['pulsar'],labels=["chroma"], port_forwards=8000 ) -k8s_yaml(['k8s/dev/coordinator.yaml']) +k8s_resource('frontend-server', resource_deps=['pulsar'],labels=["chroma"], port_forwards=8000 ) k8s_resource('coordinator', resource_deps=['pulsar'], labels=["chroma"], port_forwards=50051) -k8s_yaml(['k8s/dev/worker.yaml']) k8s_resource('worker', resource_deps=['coordinator'],labels=["chroma"]) + +# Extra stuff to make debugging and testing easier +k8s_yaml([ + 'k8s/test/coordinator_service.yaml', + 'k8s/test/minio.yaml', + 'k8s/test/pulsar_service.yaml', + 'k8s/test/worker_service.yaml', + 'k8s/test/test_memberlist_cr.yaml', +]) +k8s_resource( + objects=[ + # I don't know why but Tilt denies the existence of 'coordinator:service' et al + # when you try to add them here. + 'worker:service', + ], + new_name='debug_setup', + resource_deps=['worker'], + labels=["debug"], +) + +# Local S3 +k8s_resource('minio-deployment', resource_deps=['k8s_setup'], labels=["debug"], port_forwards=9000) \ No newline at end of file diff --git a/bin/reset.sh b/bin/reset.sh deleted file mode 100755 index e1819f0c7a0..00000000000 --- a/bin/reset.sh +++ /dev/null @@ -1,13 +0,0 @@ - #!/usr/bin/env bash - -eval $(minikube -p chroma-test docker-env) - -docker build -t chroma-coordinator:latest -f go/Dockerfile . - -kubectl delete deployment coordinator -n chroma - -# Apply the kubernetes manifests -kubectl apply -f k8s/deployment -kubectl apply -f k8s/crd -kubectl apply -f k8s/cr -kubectl apply -f k8s/test diff --git a/k8s/cr/worker_memberlist_cr.yaml b/k8s/cr/worker_memberlist_cr.yaml deleted file mode 100644 index bc4df07f535..00000000000 --- a/k8s/cr/worker_memberlist_cr.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# These kubernetes manifests are UNDER ACTIVE DEVELOPMENT and are not yet ready for production use. -# They will be used for the upcoming distributed version of chroma. They are not even ready -# for testing yet. Please do not use them unless you are working on the distributed version of chroma. - -# Create a memberlist called worker-memberlist -apiVersion: chroma.cluster/v1 -kind: MemberList -metadata: - name: worker-memberlist - namespace: chroma -spec: - members: - ---- - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: worker-memberlist-reader -rules: -- apiGroups: - - chroma.cluster - resources: - - memberlists - verbs: - - get - - list - - watch - # TODO: FIX THIS LEAKY PERMISSION - - create - - update - - patch - - delete - ---- - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: worker-memberlist-reader-binding -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: worker-memberlist-reader -subjects: -- kind: ServiceAccount - name: default - namespace: chroma diff --git a/k8s/deployment/kubernetes.yaml b/k8s/deployment/kubernetes.yaml deleted file mode 100644 index 5b5ec4a7a84..00000000000 --- a/k8s/deployment/kubernetes.yaml +++ /dev/null @@ -1,335 +0,0 @@ -# These kubernetes manifests are UNDER ACTIVE DEVELOPMENT and are not yet ready for production use. -# They will be used for the upcoming distributed version of chroma. They are not even ready -# for testing yet. Please do not use them unless you are working on the distributed version of chroma. - -apiVersion: v1 -kind: Namespace -metadata: - name: chroma - ---- - -apiVersion: v1 -kind: Service -metadata: - name: pulsar - namespace: chroma -spec: - ports: - - name: pulsar-port - port: 6650 - targetPort: 6650 - - name: admin-port - port: 8080 - targetPort: 8080 - selector: - app: pulsar - type: ClusterIP - ---- - -# TODO: Should be stateful set locally or managed via terraform into streamnative for cloud deployment -apiVersion: apps/v1 -kind: Deployment -metadata: - name: pulsar - namespace: chroma -spec: - replicas: 1 - selector: - matchLabels: - app: pulsar - template: - metadata: - labels: - app: pulsar - spec: - containers: - - name: pulsar - image: apachepulsar/pulsar - command: [ "/pulsar/bin/pulsar", "standalone" ] - env: - # This is needed by github actions. We force this to be lower everywehre for now. - # Since real deployments will configure/use pulsar this way. - - name: PULSAR_MEM - value: "-Xms128m -Xmx512m" - ports: - - containerPort: 6650 - - containerPort: 8080 - volumeMounts: - - name: pulsardata - mountPath: /pulsar/data - # readinessProbe: - # httpGet: - # path: /admin/v2/brokers/health - # port: 8080 - # initialDelaySeconds: 10 - # periodSeconds: 5 - # livenessProbe: - # httpGet: - # path: /admin/v2/brokers/health - # port: 8080 - # initialDelaySeconds: 20 - # periodSeconds: 10 - volumes: - - name: pulsardata - emptyDir: {} - ---- - -apiVersion: v1 -kind: Service -metadata: - name: postgres - namespace: chroma -spec: - ports: - - name: postgres-port - port: 5432 - targetPort: 5432 - selector: - app: postgres - type: ClusterIP - ---- - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: postgres - namespace: chroma -spec: - replicas: 1 - selector: - matchLabels: - app: postgres - template: - metadata: - labels: - app: postgres - spec: - containers: - - name: postgres - image: postgres:14.1-alpine - env: - - name: POSTGRES_DB - value: chroma - - name: POSTGRES_USER - value: chroma - - name: POSTGRES_PASSWORD - value: chroma - ports: - - containerPort: 5432 - ---- - -apiVersion: batch/v1 -kind: Job -metadata: - name: migration - namespace: chroma -spec: - template: - metadata: - labels: - app: migration - spec: - restartPolicy: OnFailure - containers: - - args: - - 'migrate' - - 'apply' - - '--url' - - 'postgres://chroma:chroma@postgres:5432/chroma?sslmode=disable' - image: migration - imagePullPolicy: IfNotPresent - name: migration - ---- - -apiVersion: v1 -kind: Service -metadata: - name: server - namespace: chroma -spec: - ports: - - name: server - port: 8000 - targetPort: 8000 - selector: - app: server - type: LoadBalancer - ---- - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: server - namespace: chroma -spec: - replicas: 1 - selector: - matchLabels: - app: server - template: - metadata: - labels: - app: server - spec: - containers: - - name: server - image: server - imagePullPolicy: IfNotPresent - ports: - - containerPort: 8000 - volumeMounts: - - name: chroma - mountPath: /test - env: - - name: IS_PERSISTENT - value: "TRUE" - - name: CHROMA_PRODUCER_IMPL - value: "chromadb.ingest.impl.pulsar.PulsarProducer" - - name: CHROMA_CONSUMER_IMPL - value: "chromadb.ingest.impl.pulsar.PulsarConsumer" - - name: CHROMA_SEGMENT_MANAGER_IMPL - value: "chromadb.segment.impl.manager.distributed.DistributedSegmentManager" - - name: PULSAR_BROKER_URL - value: "pulsar.chroma" - - name: PULSAR_BROKER_PORT - value: "6650" - - name: PULSAR_ADMIN_PORT - value: "8080" - - name: ALLOW_RESET - value: "TRUE" - - name: CHROMA_SYSDB_IMPL - value: "chromadb.db.impl.grpc.client.GrpcSysDB" - - name: CHROMA_SERVER_GRPC_PORT - value: "50051" - - name: CHROMA_COORDINATOR_HOST - value: "coordinator.chroma" - readinessProbe: - httpGet: - path: /api/v1/heartbeat - port: 8000 - initialDelaySeconds: 10 - periodSeconds: 5 - # livenessProbe: - # httpGet: - # path: /healthz - # port: 8000 - # initialDelaySeconds: 20 - # periodSeconds: 10 - # Ephemeral for now - volumes: - - name: chroma - emptyDir: {} - ---- - -# apiVersion: v1 -# kind: PersistentVolumeClaim -# metadata: -# name: index-data -# namespace: chroma -# spec: -# accessModes: -# - ReadWriteOnce -# resources: -# requests: -# storage: 1Gi - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: coordinator - namespace: chroma -spec: - replicas: 1 - selector: - matchLabels: - app: coordinator - template: - metadata: - labels: - app: coordinator - spec: - containers: - - command: - - "coordinator" - - "coordinator" - - "--pulsar-admin-url=http://pulsar.chroma:8080" - - "--pulsar-url=pulsar://pulsar.chroma:6650" - - "--notifier-provider=pulsar" - image: chroma-coordinator - imagePullPolicy: IfNotPresent - name: coordinator - ports: - - containerPort: 50051 - name: grpc - resources: - limits: - cpu: 100m - memory: 128Mi - ---- - -apiVersion: v1 -kind: Service -metadata: - name: coordinator - namespace: chroma -spec: - ports: - - name: grpc - port: 50051 - targetPort: grpc - selector: - app: coordinator - type: ClusterIP - ---- - -apiVersion: v1 -kind: Service -metadata: - name: logservice - namespace: chroma -spec: - ports: - - name: grpc - port: 50051 - targetPort: grpc - selector: - app: logservice - type: ClusterIP - ---- - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: logservice - namespace: chroma -spec: - replicas: 1 - selector: - matchLabels: - app: logservice - template: - metadata: - labels: - app: logservice - spec: - containers: - - command: - - "logservice" - - "logservice" - image: chroma-coordinator - imagePullPolicy: IfNotPresent - name: logservice - ports: - - containerPort: 50051 - name: grpc diff --git a/k8s/dev/setup.yaml b/k8s/dev/setup.yaml deleted file mode 100644 index 75478e0b6f5..00000000000 --- a/k8s/dev/setup.yaml +++ /dev/null @@ -1,109 +0,0 @@ -kind: Namespace -apiVersion: v1 -metadata: - name: chroma ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: memberlist-reader -rules: -- apiGroups: - - chroma.cluster - resources: - - memberlists - verbs: - - get - - list - - watch - - create - - update - - patch - - delete ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: memberlist-reader -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: memberlist-reader -subjects: -- kind: ServiceAccount - name: default - namespace: chroma ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - namespace: chroma - name: pod-list-role -rules: -- apiGroups: [""] - resources: ["pods"] - verbs: ["get", "list", "watch"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: pod-list-role-binding - namespace: chroma -subjects: -- kind: ServiceAccount - name: default - namespace: chroma -roleRef: - kind: Role - name: pod-list-role - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: memberlists.chroma.cluster -spec: - group: chroma.cluster - versions: - - name: v1 - served: true - storage: true - schema: - openAPIV3Schema: - type: object - properties: - spec: - type: object - properties: - members: - type: array - items: - type: object - properties: - url: # Rename to ip - type: string - pattern: '^((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.?\b){4}$' - scope: Namespaced - names: - plural: memberlists - singular: memberlist - kind: MemberList - shortNames: - - ml ---- -apiVersion: chroma.cluster/v1 -kind: MemberList -metadata: - name: worker-memberlist - namespace: chroma -spec: - members: ---- -apiVersion: chroma.cluster/v1 -kind: MemberList -metadata: - name: test-memberlist - namespace: chroma -spec: - members: ---- \ No newline at end of file diff --git a/k8s/dev/worker.yaml b/k8s/dev/worker.yaml deleted file mode 100644 index 82b4c9d905b..00000000000 --- a/k8s/dev/worker.yaml +++ /dev/null @@ -1,40 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: worker - namespace: chroma -spec: - replicas: 1 - selector: - matchLabels: - app: worker - template: - metadata: - labels: - app: worker - member-type: worker - spec: - containers: - - name: worker - image: worker - imagePullPolicy: IfNotPresent - command: ["cargo", "run"] - ports: - - containerPort: 50051 - volumeMounts: - - name: chroma - mountPath: /index_data - env: - - name: CHROMA_WORKER__PULSAR_URL - value: pulsar://pulsar.chroma:6650 - - name: CHROMA_WORKER__PULSAR_NAMESPACE - value: default - - name: CHROMA_WORKER__PULSAR_TENANT - value: default - - name: CHROMA_WORKER__MY_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - volumes: - - name: chroma - emptyDir: {} \ No newline at end of file diff --git a/k8s/distributed-chroma/.helmignore b/k8s/distributed-chroma/.helmignore new file mode 100644 index 00000000000..0e8a0eb36f4 --- /dev/null +++ b/k8s/distributed-chroma/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/k8s/distributed-chroma/Chart.yaml b/k8s/distributed-chroma/Chart.yaml new file mode 100644 index 00000000000..c6532e29e6f --- /dev/null +++ b/k8s/distributed-chroma/Chart.yaml @@ -0,0 +1,30 @@ +# Copyright 2024 Chroma Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v2 +name: distributed-chroma +description: A helm chart for distributed Chroma +type: application +version: 0.1.0 +appVersion: "0.4.23" +keywords: + - chroma + - vector + - database + - retrieval + - llm + - rag +home: "https://www.trychroma.com/" +sources: + - "https://github.com/chroma-core/chroma" diff --git a/k8s/crd/memberlist_crd.yaml b/k8s/distributed-chroma/crds/memberlist_crd.yaml similarity index 92% rename from k8s/crd/memberlist_crd.yaml rename to k8s/distributed-chroma/crds/memberlist_crd.yaml index 9d31706aad2..fb593c7e2a7 100644 --- a/k8s/crd/memberlist_crd.yaml +++ b/k8s/distributed-chroma/crds/memberlist_crd.yaml @@ -2,6 +2,7 @@ # They will be used for the upcoming distributed version of chroma. They are not even ready # for testing yet. Please do not use them unless you are working on the distributed version of chroma. +# Note from ben: Before you modify this please read https://hackmd.io/@carvel/rJKraqlDD apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: diff --git a/k8s/dev/coordinator.yaml b/k8s/distributed-chroma/templates/coordinator.yaml similarity index 51% rename from k8s/dev/coordinator.yaml rename to k8s/distributed-chroma/templates/coordinator.yaml index f7f8c122bd4..9b993df7f8b 100644 --- a/k8s/dev/coordinator.yaml +++ b/k8s/distributed-chroma/templates/coordinator.yaml @@ -2,9 +2,9 @@ apiVersion: apps/v1 kind: Deployment metadata: name: coordinator - namespace: chroma + namespace: {{ .Values.namespace }} spec: - replicas: 1 + replicas: {{ .Values.coordinator.replicaCount }} selector: matchLabels: app: coordinator @@ -13,6 +13,7 @@ spec: labels: app: coordinator spec: + serviceAccountName: coordinator-serviceaccount containers: - command: - "coordinator" @@ -24,19 +25,47 @@ spec: imagePullPolicy: IfNotPresent name: coordinator ports: - - containerPort: 50051 + - containerPort: 50001 name: grpc + --- + apiVersion: v1 kind: Service metadata: name: coordinator - namespace: chroma + namespace: {{ .Values.namespace }} spec: ports: - name: grpc - port: 50051 + port: {{ .Values.coordinator.port }} targetPort: grpc selector: app: coordinator type: ClusterIP + +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: coordinator-serviceaccount + namespace: {{ .Values.namespace }} + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: coordinator-serviceaccount-rolebinding + namespace: {{ .Values.namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: pod-watcher +subjects: +- kind: ServiceAccount + name: coordinator-serviceaccount + namespace: {{ .Values.namespace }} + +--- \ No newline at end of file diff --git a/k8s/dev/server.yaml b/k8s/distributed-chroma/templates/frontend-server.yaml similarity index 86% rename from k8s/dev/server.yaml rename to k8s/distributed-chroma/templates/frontend-server.yaml index c7ab13df6d8..39678d78d11 100644 --- a/k8s/dev/server.yaml +++ b/k8s/distributed-chroma/templates/frontend-server.yaml @@ -1,20 +1,20 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: server - namespace: chroma + name: frontend-server + namespace: {{ .Values.namespace }} spec: replicas: 2 selector: matchLabels: - app: server + app: frontend-server template: metadata: labels: - app: server + app: frontend-server spec: containers: - - name: server + - name: frontend-server image: server imagePullPolicy: IfNotPresent ports: @@ -53,13 +53,13 @@ spec: apiVersion: v1 kind: Service metadata: - name: server - namespace: chroma + name: frontend-server + namespace: {{ .Values.namespace }} spec: ports: - name: server-port port: 8000 targetPort: 8000 selector: - app: server + app: frontend-server type: ClusterIP diff --git a/k8s/dev/logservice.yaml b/k8s/distributed-chroma/templates/logservice.yaml similarity index 89% rename from k8s/dev/logservice.yaml rename to k8s/distributed-chroma/templates/logservice.yaml index a4b491116ee..113b0813e37 100644 --- a/k8s/dev/logservice.yaml +++ b/k8s/distributed-chroma/templates/logservice.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: logservice - namespace: chroma + namespace: {{ .Values.namespace }} spec: replicas: 1 selector: @@ -28,7 +28,7 @@ apiVersion: v1 kind: Service metadata: name: logservice - namespace: chroma + namespace: {{ .Values.namespace }} spec: ports: - name: grpc diff --git a/k8s/dev/migration.yaml b/k8s/distributed-chroma/templates/migration.yaml similarity index 92% rename from k8s/dev/migration.yaml rename to k8s/distributed-chroma/templates/migration.yaml index df4ac881740..d7946979095 100644 --- a/k8s/dev/migration.yaml +++ b/k8s/distributed-chroma/templates/migration.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: name: migration - namespace: chroma + namespace: {{ .Values.namespace }} spec: template: metadata: diff --git a/k8s/distributed-chroma/templates/namespace.yaml b/k8s/distributed-chroma/templates/namespace.yaml new file mode 100644 index 00000000000..48685640e18 --- /dev/null +++ b/k8s/distributed-chroma/templates/namespace.yaml @@ -0,0 +1,8 @@ +--- + +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Values.namespace }} + +--- \ No newline at end of file diff --git a/k8s/distributed-chroma/templates/pod-watcher-role.yaml b/k8s/distributed-chroma/templates/pod-watcher-role.yaml new file mode 100644 index 00000000000..eb8ff467961 --- /dev/null +++ b/k8s/distributed-chroma/templates/pod-watcher-role.yaml @@ -0,0 +1,13 @@ +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: {{ .Values.namespace }} + name: pod-watcher +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] + +--- \ No newline at end of file diff --git a/k8s/dev/postgres.yaml b/k8s/distributed-chroma/templates/postgres.yaml similarity index 90% rename from k8s/dev/postgres.yaml rename to k8s/distributed-chroma/templates/postgres.yaml index e2b8fad3159..419e149dfc2 100644 --- a/k8s/dev/postgres.yaml +++ b/k8s/distributed-chroma/templates/postgres.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: postgres - namespace: chroma + namespace: {{ .Values.namespace }} spec: replicas: 1 selector: @@ -30,7 +30,7 @@ apiVersion: v1 kind: Service metadata: name: postgres - namespace: chroma + namespace: {{ .Values.namespace }} spec: ports: - name: postgres-port diff --git a/k8s/dev/pulsar.yaml b/k8s/distributed-chroma/templates/pulsar.yaml similarity index 93% rename from k8s/dev/pulsar.yaml rename to k8s/distributed-chroma/templates/pulsar.yaml index bcddf60c113..68cc5ce24f8 100644 --- a/k8s/dev/pulsar.yaml +++ b/k8s/distributed-chroma/templates/pulsar.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: pulsar - namespace: chroma + namespace: {{ .Values.namespace }} spec: replicas: 1 selector: @@ -38,7 +38,7 @@ apiVersion: v1 kind: Service metadata: name: pulsar - namespace: chroma + namespace: {{ .Values.namespace }} spec: ports: - name: pulsar-port diff --git a/k8s/deployment/segment-server.yaml b/k8s/distributed-chroma/templates/worker.yaml similarity index 57% rename from k8s/deployment/segment-server.yaml rename to k8s/distributed-chroma/templates/worker.yaml index 33af91d1314..a7f080459ba 100644 --- a/k8s/deployment/segment-server.yaml +++ b/k8s/distributed-chroma/templates/worker.yaml @@ -1,37 +1,41 @@ +--- + apiVersion: v1 kind: Service metadata: - name: segment-server - namespace: chroma + name: worker + namespace: {{ .Values.namespace }} spec: ports: - - name: segment-server-port + - name: worker-server-port port: 50051 targetPort: 50051 selector: - app: segment-server + app: worker-server type: ClusterIP --- + apiVersion: apps/v1 kind: Deployment metadata: - name: segment-server - namespace: chroma + name: worker + namespace: {{ .Values.namespace }} spec: - replicas: 1 + replicas: 2 selector: matchLabels: - app: segment-server + app: worker template: metadata: labels: - app: segment-server + app: worker member-type: worker spec: + serviceAccountName: worker-serviceaccount containers: - - name: segment-server + - name: worker image: worker imagePullPolicy: IfNotPresent command: ["cargo", "run"] @@ -43,45 +47,43 @@ spec: env: - name: CHROMA_WORKER__PULSAR_URL value: pulsar://pulsar.chroma:6650 - - name: CHROMA_WORKER__PULSAR_NAMESPACE - value: default - - name: CHROMA_WORKER__PULSAR_TENANT - value: default - name: CHROMA_WORKER__MY_IP valueFrom: fieldRef: fieldPath: status.podIP - # livenessProbe: - # grpc: - # port: 50051 - # initialDelaySeconds: 10 + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: "kubernetes.io/hostname" + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + member-type: worker volumes: - name: chroma emptyDir: {} --- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role +apiVersion: v1 +kind: ServiceAccount metadata: - namespace: chroma - name: pod-watcher -rules: -- apiGroups: [""] - resources: ["pods"] - verbs: ["get", "list", "watch"] + name: worker-serviceaccount + namespace: {{ .Values.namespace }} --- + apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: - name: pod-watcher-binding - namespace: chroma -subjects: -- kind: ServiceAccount - name: default - namespace: chroma + name: worker-serviceaccount-rolebinding + namespace: {{ .Values.namespace }} roleRef: + apiGroup: rbac.authorization.k8s.io kind: Role name: pod-watcher - apiGroup: rbac.authorization.k8s.io +subjects: +- kind: ServiceAccount + name: worker-serviceaccount + namespace: {{ .Values.namespace }} + +--- \ No newline at end of file diff --git a/k8s/distributed-chroma/templates/worker_memberlist_cr.yaml b/k8s/distributed-chroma/templates/worker_memberlist_cr.yaml new file mode 100644 index 00000000000..1b022afa2ce --- /dev/null +++ b/k8s/distributed-chroma/templates/worker_memberlist_cr.yaml @@ -0,0 +1,79 @@ +# These kubernetes manifests are UNDER ACTIVE DEVELOPMENT and are not yet ready for production use. +# They will be used for the upcoming distributed version of chroma. They are not even ready +# for testing yet. Please do not use them unless you are working on the distributed version of chroma. + +apiVersion: chroma.cluster/v1 +kind: MemberList +metadata: + name: worker-memberlist + namespace: {{ .Values.namespace}} +spec: + members: + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: worker-memberlist-readerwriter +rules: +- apiGroups: + - chroma.cluster + resources: + - memberlists + verbs: + - get + - list + - watch + # TODO: FIX THIS LEAKY PERMISSION + - create + - update + - patch + - delete + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: coordinator-worker-memberlist-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: worker-memberlist-readerwriter +subjects: +- kind: ServiceAccount + name: coordinator-serviceaccount + namespace: {{ .Values.namespace }} + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + # Awkward name, but this lets the worker-serviceaccount read + # the worker-memberlist. + name: worker-worker-memberlist-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: worker-memberlist-readerwriter +subjects: +- kind: ServiceAccount + name: worker-serviceaccount + namespace: {{ .Values.namespace }} + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: worker-memberlist-readerwriter-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: worker-memberlist-readerwriter +subjects: +- kind: ServiceAccount + name: default + namespace: {{ .Values.namespace }} diff --git a/k8s/distributed-chroma/values.yaml b/k8s/distributed-chroma/values.yaml new file mode 100644 index 00000000000..e7fd1d2bb57 --- /dev/null +++ b/k8s/distributed-chroma/values.yaml @@ -0,0 +1,7 @@ +# Default values for distributed-chroma. + +namespace: "chroma" + +coordinator: + replicaCount: 1 + port: 50051 diff --git a/k8s/test/README.md b/k8s/test/README.md new file mode 100644 index 00000000000..83ffcc949a5 --- /dev/null +++ b/k8s/test/README.md @@ -0,0 +1 @@ +This directory contains kubernetes manifests to be applied on top of our production manifests to make testing and debugging easier. For example, service endpoints to expose internal services. \ No newline at end of file diff --git a/k8s/test/minio.yaml b/k8s/test/minio.yaml index 148c5170fd8..d535e896234 100644 --- a/k8s/test/minio.yaml +++ b/k8s/test/minio.yaml @@ -18,25 +18,25 @@ spec: - name: minio emptyDir: {} containers: - - name: minio - image: minio/minio:latest - args: - - server - - /storage - env: - - name: MINIO_ACCESS_KEY - value: "minio" - - name: MINIO_SECRET_KEY - value: "minio123" - ports: - - containerPort: 9000 - hostPort: 9000 - volumeMounts: - name: minio - mountPath: /storage + image: minio/minio:latest + args: + - server + - /storage + env: + - name: MINIO_ACCESS_KEY + value: "minio" + - name: MINIO_SECRET_KEY + value: "minio123" + ports: + - containerPort: 9000 + hostPort: 9000 + name: http + volumeMounts: + - name: minio + mountPath: /storage --- - apiVersion: v1 kind: Service metadata: diff --git a/k8s/test/segment_server_service.yml b/k8s/test/worker_service.yaml similarity index 64% rename from k8s/test/segment_server_service.yml rename to k8s/test/worker_service.yaml index 7463333deef..9fac38d0e1f 100644 --- a/k8s/test/segment_server_service.yml +++ b/k8s/test/worker_service.yaml @@ -1,13 +1,13 @@ apiVersion: v1 kind: Service metadata: - name: segment-server-lb + name: worker-lb namespace: chroma spec: ports: - - name: segment-server-port + - name: worker-port port: 50052 targetPort: 50051 selector: - app: segment-server + app: worker type: LoadBalancer