From 25b6664d5102f699f74c720aeabe18905acd95ce Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Fri, 29 Mar 2019 13:02:37 -0700 Subject: [PATCH 01/31] DX-15734: add dremio-admin Make running it conditional - either this is running or the Dremio cluster. Controlled via helm value --set DremioAdmin=true Change-Id: Ia6d30bcda29e5ea606f57aa6ac22b65aad8a3a91 --- charts/dremio/README.md | 20 ++++++++++ charts/dremio/templates/dremio-admin.yaml | 40 +++++++++++++++++++ .../dremio/templates/dremio-coordinator.yaml | 2 + charts/dremio/templates/dremio-executor.yaml | 2 + charts/dremio/templates/dremio-master.yaml | 2 + .../templates/dremio-service-client.yaml | 2 + charts/dremio/templates/zookeeper.yaml | 2 + 7 files changed, 70 insertions(+) create mode 100644 charts/dremio/templates/dremio-admin.yaml diff --git a/charts/dremio/README.md b/charts/dremio/README.md index 08463be8..b5e620a5 100644 --- a/charts/dremio/README.md +++ b/charts/dremio/README.md @@ -93,6 +93,26 @@ helm upgrade dremio --set executor.count=5 You can also scale down the same way. +### Running offline dremio-admin commands +Administration commands restore, cleanup and set-password in dremio-admin needs to be run when +the Dremio cluster is not running. So, before running these commands, you need to shutdown +the Dremio cluster. Use the helm delete command to delete the helm release. +(Kubernetes does not delete the persistent store volumes when you delete statefulset pods and +when you install the cluster again using helm, the existing persistent store will be used and +you will get your Dremio cluster running again.) + +After Dremio cluster is shutdown, start the dremio-admin pod using +```bash +helm install --wait dremio --set DremioAdmin=true +``` +Once the pod is running, you can connect to the pod using +```bash +kubectl exec -it dremio-admin -- bash +``` +Now, you have a bash shell from where you can run the dremio-admin commands. + +Once you are done, you can delete the helm release for the dremio-admin and start your Dremio cluster. + #### Upgrading Dremio You should attempt upgrade when no queries are running on the cluster. Update the Dremio image tag in your values.yaml file. E.g. ```bash diff --git a/charts/dremio/templates/dremio-admin.yaml b/charts/dremio/templates/dremio-admin.yaml new file mode 100644 index 00000000..3de91c43 --- /dev/null +++ b/charts/dremio/templates/dremio-admin.yaml @@ -0,0 +1,40 @@ +{{ if .Values.DremioAdmin }} +# dremio-admin pod is used to run offline commands like +# clean, restore or set-password against the Dremio cluster. +# The Dremio cluster should be shutdown before attempting to +# create the dremio-admin pod. +# You connect to the pod (kubectl exec -it dremio-admin -- bash), +# go to /opt/dremio/bin and run dremio-admin commands as documented. +apiVersion: v1 +kind: Pod +metadata: + name: dremio-admin +spec: + containers: + - name: dremio-admin + image: {{.Values.image}} + imagePullPolicy: IfNotPresent + stdin: true + tty: true + resources: + requests: + memory: {{.Values.coordinator.memory}}M + cpu: {{.Values.coordinator.cpu}} + volumeMounts: + - name: dremio-master-volume + mountPath: /opt/dremio/data + - name: dremio-config + mountPath: /opt/dremio/conf + command: ["sleep", "infinity"] + {{- if .Values.imagePullSecrets }} + imagePullSecrets: + - name: {{ .Values.imagePullSecrets }} + {{- end}} + volumes: + - name: dremio-master-volume + persistentVolumeClaim: + claimName: dremio-master-volume-dremio-master-0 + - name: dremio-config + configMap: + name: dremio-config +{{ end }} diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index bfede34c..7867902b 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -70,3 +71,4 @@ spec: imagePullSecrets: - name: {{ .Values.imagePullSecrets }} {{- end}} +{{ end }} diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index 8facdd0c..72788140 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -91,3 +92,4 @@ spec: resources: requests: storage: {{.Values.executor.volumeSize}} +{{ end }} diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index 1930ba06..122728ed 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -111,3 +112,4 @@ spec: resources: requests: storage: {{.Values.coordinator.volumeSize}} +{{ end }} diff --git a/charts/dremio/templates/dremio-service-client.yaml b/charts/dremio/templates/dremio-service-client.yaml index 8149368e..a1906738 100644 --- a/charts/dremio/templates/dremio-service-client.yaml +++ b/charts/dremio/templates/dremio-service-client.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: v1 kind: Service metadata: @@ -38,3 +39,4 @@ spec: clusterIP: None selector: role: dremio-cluster-pod +{{ end }} diff --git a/charts/dremio/templates/zookeeper.yaml b/charts/dremio/templates/zookeeper.yaml index a272234c..7e36cb8e 100644 --- a/charts/dremio/templates/zookeeper.yaml +++ b/charts/dremio/templates/zookeeper.yaml @@ -1,3 +1,4 @@ +{{ if not .Values.DremioAdmin }} apiVersion: v1 kind: Service metadata: @@ -133,3 +134,4 @@ spec: resources: requests: storage: 10Gi +{{ end }} From a54caa82f3f02d3f4e4bbf88ed5e461b5cfe943f Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Wed, 3 Apr 2019 17:24:23 -0700 Subject: [PATCH 02/31] DX-15692: configurable zookeeper resource properties Change-Id: Ica602f61421506708689bedb36d0d30c887c8407 --- charts/dremio/templates/zookeeper.yaml | 10 +++++----- charts/dremio/values.yaml | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/charts/dremio/templates/zookeeper.yaml b/charts/dremio/templates/zookeeper.yaml index 7e36cb8e..d1bc3f93 100644 --- a/charts/dremio/templates/zookeeper.yaml +++ b/charts/dremio/templates/zookeeper.yaml @@ -49,7 +49,7 @@ spec: matchLabels: app: zk serviceName: zk-hs - replicas: 1 + replicas: {{.Values.zookeeper.count}} updateStrategy: type: RollingUpdate podManagementPolicy: Parallel @@ -74,8 +74,8 @@ spec: image: "k8s.gcr.io/kubernetes-zookeeper:1.0-3.4.10" resources: requests: - memory: "1Gi" - cpu: "0.5" + memory: "{{.Values.zookeeper.memory}}M" + cpu: "{{.Values.zookeeper.cpu}}" ports: - containerPort: 2181 name: client @@ -87,7 +87,7 @@ spec: - sh - -c - "start-zookeeper \ - --servers=1 \ + --servers={{.Values.zookeeper.count}} \ --data_dir=/var/lib/zookeeper/data \ --data_log_dir=/var/lib/zookeeper/data/log \ --conf_dir=/opt/zookeeper/conf \ @@ -133,5 +133,5 @@ spec: accessModes: [ "ReadWriteOnce" ] resources: requests: - storage: 10Gi + storage: {{.Values.zookeeper.volumeSize}} {{ end }} diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index ad01d742..eb1bf320 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -23,6 +23,11 @@ executor: cpu: 4 count: 3 volumeSize: 100Gi +zookeeper: + memory: 1024 + cpu: 0.5 + count: 3 + volumeSize: 10Gi # If your Kubernetes cluster does not support LoadBalancer, # comment out the line below for the helm chart to succeed or add From a7bff697e1557256668a9c9a93392a49b16239d2 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Wed, 3 Apr 2019 17:06:31 -0700 Subject: [PATCH 03/31] DX-15572: Add support of uploads in S3 or ADLS Enabling support of uploads to S3 or ADLS via config entries in values.yaml. Change-Id: Ie3233cb7085268ecd6aef010be5f61ad67cefc3a --- charts/dremio/config/core-site.xml | 52 +++++++++++++++++++ charts/dremio/config/dremio.conf | 12 ++++- charts/dremio/templates/dremio-configmap.yaml | 2 +- charts/dremio/values.yaml | 17 ++++++ 4 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 charts/dremio/config/core-site.xml diff --git a/charts/dremio/config/core-site.xml b/charts/dremio/config/core-site.xml new file mode 100644 index 00000000..1c2f3237 --- /dev/null +++ b/charts/dremio/config/core-site.xml @@ -0,0 +1,52 @@ + + + + + {{- if and .Values.uploads.type (eq .Values.uploads.type "aws") }} + + fs.s3a.access.key + AWS access key ID. + {{ required "AWS access key required" .Values.uploads.aws.accessKey}} + + + fs.s3a.secret.key + AWS secret key. + {{ required "AWS secret required" .Values.uploads.aws.secret}} + + {{- end }} + + {{- if and .Values.uploads.type (eq .Values.uploads.type "azure") }} + + + fs.adl.impl + Must be set to org.apache.hadoop.fs.adl.AdlFileSystem + org.apache.hadoop.fs.adl.AdlFileSystem + + + dfs.adls.oauth2.client.id + Application ID of the registered application under Azure Active Directory + {{required "Azure application ID required" .Values.uploads.azure.applicationId}} + + + dfs.adls.oauth2.credential + Generated password value for the registered application + {{required "Azure secret value required" .Values.uploads.azure.secret}} + + + dfs.adls.oauth2.refresh.url + Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. + {{required "Azure OAuth2 token endpoint required" .Values.uploads.azure.oauth2EndPoint}} + + + dfs.adls.oauth2.access.token.provider.type + Must be set to ClientCredential + ClientCredential + + + fs.adl.impl.disable.cache + Only include this property AFTER validating the ADLS connection. + false + + {{- end }} + diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 0aa7b656..4e754236 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -20,12 +20,22 @@ paths: { # the distributed path Dremio data including job results, downloads, uploads, etc #dist: "pdfs://"${paths.local}"/pdfs" + + # If you are editing the uploads value in this file, please delete all the lines starting with double curly braces + {{- if .Values.uploads.type }} + {{- if eq .Values.uploads.type "aws" }} + uploads: "s3a://{{required "AWS bucketname required" .Values.uploads.aws.bucketName}}{{required "Path required" .Values.uploads.aws.path}}" + {{- end }} + {{- if eq .Values.uploads.type "azure" }} + uploads: "adl://{{required "Azure Datalake store name required" .Values.uploads.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.uploads.azure.path}}" + {{- end }} + {{- end }} } services: { # The services running are controlled via command line options passed in # while starting the services via kubernetes. Updating the three values - # below will not impact what services are running. + # below will not impact what services are running. # coordinator.enabled: true, # coordinator.master.enabled: true, # executor.enabled: true diff --git a/charts/dremio/templates/dremio-configmap.yaml b/charts/dremio/templates/dremio-configmap.yaml index 33fb22bd..442ca1dd 100644 --- a/charts/dremio/templates/dremio-configmap.yaml +++ b/charts/dremio/templates/dremio-configmap.yaml @@ -3,4 +3,4 @@ kind: ConfigMap metadata: name: dremio-config data: - {{- (.Files.Glob "config/*").AsConfig | nindent 2 }} + {{- tpl (.Files.Glob "config/*").AsConfig . | nindent 2 }} diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index eb1bf320..82c8f7d9 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -52,3 +52,20 @@ serviceType: LoadBalancer # the credentials in a kubernetes secret and provide the secret name here. # For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod #imagePullSecrets=secretname + +# Control where uploaded files are stored. +# See https://docs.dremio.com/deployment/distributed-storage.html for more information +uploads: + # Valid values are local, aws or azure. aws and azure choice requires additional configuration data. + type: "local" + aws: + bucketName: "Your_AWS_bucket_name" + path: "/" + accessKey: "Your_AWS_Access_Key" + secret: "Your_AWS_Secret" + azure: + datalakeStoreName: "Your_Azure_DataLake_Storage_name" + path: "/" + applicationId: "Your_Azure_Application_Id" + secret: "Your_Azure_Secret" + oauth2EndPoint: "Azure_OAuth2_Endpoint" From e7ceae3f167a7851ca433e8105d9891efca17abb Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Tue, 9 Apr 2019 15:59:48 -0700 Subject: [PATCH 04/31] DX-15693: enable nodeSelector for pods Change-Id: I3b6746fb8eddeff8c56ae4876453d47bfc30ef9b --- charts/dremio/templates/dremio-coordinator.yaml | 6 ++++++ charts/dremio/templates/dremio-executor.yaml | 6 ++++++ charts/dremio/templates/dremio-master.yaml | 6 ++++++ charts/dremio/templates/zookeeper.yaml | 6 ++++++ charts/dremio/values.yaml | 5 +++++ 5 files changed, 29 insertions(+) diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index 7867902b..2e4c1eaa 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -25,6 +25,12 @@ spec: dremio-configmap/checksum: {{ (.Files.Glob "config/*").AsConfig | sha256sum }} spec: terminationGracePeriodSeconds: 5 + {{- if .Values.nodeSelector }} + nodeSelector: + {{- range $key, $value := .Values.nodeSelector }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} containers: - name: dremio-coordinator image: {{.Values.image}} diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index 72788140..78ddc7af 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -25,6 +25,12 @@ spec: dremio-configmap/checksum: {{ (.Files.Glob "config/*").AsConfig | sha256sum }} spec: terminationGracePeriodSeconds: 5 + {{- if .Values.nodeSelector }} + nodeSelector: + {{- range $key, $value := .Values.nodeSelector }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} containers: - name: dremio-executor image: {{.Values.image}} diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index 122728ed..4abb730e 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -28,6 +28,12 @@ spec: - dremio-master topologyKey: "kubernetes.io/hostname" terminationGracePeriodSeconds: 5 + {{- if .Values.nodeSelector }} + nodeSelector: + {{- range $key, $value := .Values.nodeSelector }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} containers: - name: dremio-master-coordinator image: {{.Values.image}} diff --git a/charts/dremio/templates/zookeeper.yaml b/charts/dremio/templates/zookeeper.yaml index d1bc3f93..4ad5a9c6 100644 --- a/charts/dremio/templates/zookeeper.yaml +++ b/charts/dremio/templates/zookeeper.yaml @@ -68,6 +68,12 @@ spec: values: - zk topologyKey: "kubernetes.io/hostname" + {{- if .Values.nodeSelector }} + nodeSelector: + {{- range $key, $value := .Values.nodeSelector }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} containers: - name: kubernetes-zookeeper imagePullPolicy: Always diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index 82c8f7d9..a4400a5e 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -53,6 +53,11 @@ serviceType: LoadBalancer # For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod #imagePullSecrets=secretname +# Target pods to nodes based on labels set on the nodes. +# For more information, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +#nodeSelector: +# key: value + # Control where uploaded files are stored. # See https://docs.dremio.com/deployment/distributed-storage.html for more information uploads: From 85252f0873f1c0d1a73d858e58e962c91f2e7c30 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Tue, 9 Apr 2019 16:11:12 -0700 Subject: [PATCH 05/31] DX-15949: enable readiness probe for dremio-master Change-Id: I2e3444eccd95cdce7fab18045b43270cbbb07f79 --- charts/dremio/templates/dremio-master.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index 4abb730e..423ce78f 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -70,6 +70,11 @@ spec: name: client - containerPort: 45678 name: server + readinessProbe: + tcpSocket: + port: 9047 + initialDelaySeconds: 5 + periodSeconds: 5 initContainers: - name: start-only-one-master image: busybox From 1fb92f7f1869f2bd0ebfe5e2ef522c6bafdc591d Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Thu, 9 May 2019 17:16:38 -0700 Subject: [PATCH 06/31] DX-16307: Deploy to Azure using ARM templates Change-Id: I3534f554ee2168ee88e7ae19380e33696a27f2a3 --- README.md | 9 +- azure/arm-templates/README.md | 61 +++ azure/arm-templates/azuredeploy.json | 187 +++++++ azure/arm-templates/nested/dremioCluster.json | 460 ++++++++++++++++++ azure/arm-templates/nested/dremioState.json | 408 ++++++++++++++++ azure/arm-templates/scripts/setupDremio.sh | 101 ++++ charts/dremio/config/logback-admin.xml | 67 +++ 7 files changed, 1289 insertions(+), 4 deletions(-) create mode 100644 azure/arm-templates/README.md create mode 100644 azure/arm-templates/azuredeploy.json create mode 100644 azure/arm-templates/nested/dremioCluster.json create mode 100644 azure/arm-templates/nested/dremioState.json create mode 100644 azure/arm-templates/scripts/setupDremio.sh create mode 100644 charts/dremio/config/logback-admin.xml diff --git a/README.md b/README.md index b21a8209..32e7c51a 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ -# Dremio Container Tools +# Dremio Cloud Tools -This repository contains: +This repository contains tools and utilities to deploy Dremio to cloud environments: -* Tools to build [Dremio Docker images](images/dremio-oss). -* Example [helm chart](charts/dremio) to deploy Dremio to Kubernetes. +* [Dockerfile](images/dremio-oss) to build Dremio Docker images. +* [Helm chart](charts/dremio) to deploy Dremio to Kubernetes. +* [Azure Resource Manager (ARM) template](azure/arm-templates) to deploy to Azure. These are currently *experimental* items and should be evaluated and extended based on individual needs. diff --git a/azure/arm-templates/README.md b/azure/arm-templates/README.md new file mode 100644 index 00000000..210a9f1c --- /dev/null +++ b/azure/arm-templates/README.md @@ -0,0 +1,61 @@ + +# Deploying Dremio to Azure + +This deploys a Dremio cluster on Azure VMs. The deployment creates a master coordinator node and number of executor nodes depending on the size of the cluster chosen. The table below provides the machine type and number of executor nodes for the different sizes of Dremio clusters. + +| Cluster size | Coordinator VM Type | Executor VM Type | No. of Executors | +|--------------|---------------------|------------------|------------------| +| X-Small | Standard_D4_v3 | Standard_E16s_v3 | 1 | +| Small | Standard_D4_v3 | Standard_E16s_v3 | 5 | +| Medium | Standard_D8_v3 | Standard_E16s_v3 | 10 | +| Large | Standard_D8_v3 | Standard_E16s_v3 | 25 | +| X-Large | Standard_D8_v3 | Standard_E16s_v3 | 50 | + +The deployment resources are: +``` +┌───────────────────────────┐ +│ WebUI on 9047 │ +│ JDBC/ODBC client on 31010 │ +└─────────────┬─────────────┘ + │ +┌────────────────────────────┼─────────────────────────────────────┐ +│ VirtualNetwork │ │ +│ ┌──────────────────────────▼───────────────────────────────────┐ │ +│ │ Subnet ┌──────────────────────────┐ ┌────────────────┐ │ │ +│ │ │ LoadBalancer │ │ Security Group │ │ │ +│ │ └──────────────────┬───────┘ │Allow access to │ │ │ +│ │ │ │22, 9047, 31010 │ │ │ +│ │ ┌───────────────────┘ └────────────────┘ │ │ +│ │ │ │ │ +│ │ │ │ │ +│ │ ▼ │ │ +│ │ ┌───────────────────┐ ┌───────────────────┐ │ │ +│ │ │Master Coordinator │ │ Executor ├┐ │ │ +│ │ │ (Azure VM) │───────────▶│(Azure VM Scaleset)│├─┐ │ │ +│ │ └───────────────────┘ └┬──────────────────┘│ │ │ │ +│ │ ┌───────────────────┐ └─┬─────────────────┘ │ │ │ +│ │ │ Dremio Metadata │ └───────────────────┘ │ │ +│ │ │ (Azure Disk) │ │ │ +│ │ └───────────────────┘ │ │ +│ └──────────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────────┘ +``` +You can try it out: [![Azure ARM Template](http://azuredeploy.net/deploybutton.png)](https://portal.azure.com/#create/microsoft.template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fdremio%2Fdremio-cloud-tools%2Fmaster%2Fazure%2Farm-templates%2Fazuredeploy.json) + +The inputs required during deployment are: + +|Input Parameter|Description | +|---|---| +| Subscription |Azure subscription where the cluster should be deployed. | +| Resource Group |The Azure Resource group where the cluster should be deployed. You can create a new one too. It is recommended to create a new one as all resources are created in that group and deleting the group will delete all resources created. | +| Location |The Azure location where the cluster resources will be deployed. | +| Cluster Name |A name for your cluster.| +| Cluster Size |Pick a size based on your needs.| +| SSH Username |The username that can be used to login to your nodes.| +| Authentication Type |Password or Key based authentication for ssh.| +| Password or SSH Public Key |The password or ssh public key | +| Use Existing Subnet | (Optional) id of an existing subnet. The subnet must be in the same region as the Dremio cluster resource group. It is of the form /subscriptions/xxxx/resourceGroups/xxxx/providers/Microsoft.Network/virtualNetworks/xxxx/subnets/xxxx| +| Use Private IP | Select true if you are using existing subnet and you want to use an internal ip from the subnet to access Dremio. | +| Dremio Binary | Publicly accessible URL to a Dremio installation rpm | + +Once the deployment is successful, you will find the URL to Dremio UI in the output section of the deployment. diff --git a/azure/arm-templates/azuredeploy.json b/azure/arm-templates/azuredeploy.json new file mode 100644 index 00000000..e9102fc8 --- /dev/null +++ b/azure/arm-templates/azuredeploy.json @@ -0,0 +1,187 @@ + +{ + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "clusterName": { + "type": "string", + "defaultValue": "mydremio", + "metadata": { + "description": "Dremio cluster name in Azure." + } + }, + "clusterSize": { + "type": "string", + "allowedValues": ["X-Small (1 executor)", "Small (5 executors)", "Medium (10 executors )", "Large (25 executors)", "X-Large (50 executors)"], + "metadata": { + "description": "The type and number of machines are chosen based on the size selected." + } + }, + "SSHUsername": { + "type": "string", + "defaultValue": "azuser", + "metadata": { + "description": "SSH username for the virtual machines. You need it if you want to login to the machines." + } + }, + "authenticationType": { + "type": "string", + "defaultValue": "password", + "allowedValues": [ + "password", + "SSHPublicKey" + ], + "metadata": { + "description": "Type of authentication to use on the virtual machines." + } + }, + "PasswordOrSSHPublicKey": { + "type": "securestring", + "metadata": { + "description": "Password or ssh public key for the virtual machines. If password, password must be minimum 12 characters with at least 1 upper case letter, 1 lower case letter and 1 number." + } + }, + "useExistingSubnet": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Optional - resource id of existing subnet to deploy to; the subnet needs to be in the same region as the cluster. If empty, a new virtual network and subnet will be created." + } + }, + "usePrivateIP": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Select true if you are using existing subnet and you want to use an internal ip from the subnet to access Dremio." + } + }, + "dremioBinary": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "Optional - publicly accessible URL to a Dremio installation rpm" + } + } + }, + "variables": { + "baseURI": "https://raw.githubusercontent.com/dremio/dremio-cloud-tools/master/azure/arm-templates/nested/", + "apiVersion": "2018-05-01", + "shortName": "[take(resourceGroup().name, 40)]", + "rgName": "[resourceGroup().name]", + "location": "[resourceGroup().location]", + "stateRgName": "[resourceGroup().name]", + "dataDiskName": "[concat(parameters('clusterName'), '-master-data-disk')]", + "dataDiskId": "[concat(subscription().id, '/resourceGroups/', variables('stateRgName'), '/providers/Microsoft.Compute/disks/', variables('dataDiskName'))]", + "clusterSizes": { + "X-Small (1 executor)": { + "coordinatorVmSize": "Standard_D4_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 1, + "metadataDiskSize": 10 + }, + "Small (5 executors)": { + "coordinatorVmSize": "Standard_D4_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 5, + "metadataDiskSize": 50 + }, + "Medium (10 executors)": { + "coordinatorVmSize": "Standard_D8_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 10, + "metadataDiskSize": 100 + }, + "Large (25 executors)": { + "coordinatorVmSize": "Standard_D8_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 25, + "metadataDiskSize": 100 + }, + "X-Large (50 executors)": { + "coordinatorVmSize": "Standard_D8_v3", + "coordinatorCount": 0, + "executorVmSize": "Standard_E16s_v3", + "executorCount": 50, + "metadataDiskSize": 100 + } + } + }, + "resources": [ + { + "apiVersion": "2018-02-01", + "name": "pid-1f30d282-b6d2-5dc6-9630-85533cc11b98", + "type": "Microsoft.Resources/deployments", + "properties": { + "mode": "Incremental", + "template": { + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "resources": [] + } + } + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "[variables('apiVersion')]", + "name": "[concat(variables('shortName'), '-state-deployment')]", + "resourceGroup": "[variables('stateRgName')]", + "dependsOn": [ + ], + "properties": { + "mode": "Incremental", + "templateLink": { + "uri": "[concat(variables('baseURI'), 'dremioState.json')]", + "contentVersion": "1.0.0.0" + }, + "parameters": { + "dremioClusterName": {"value": "[parameters('clusterName')]"}, + "dataDiskName": {"value": "[variables('dataDiskName')]"}, + "dataDiskSize": {"value": "[variables('clusterSizes')[parameters('clusterSize')].metadataDiskSize]"}, + "virtualNetworkNewOrExisting": {"value": "[if(equals(trim(parameters('useExistingSubnet')), ''), 'new', 'existing')]"}, + "existingSubnet": {"value": "[parameters('useExistingSubnet')]"} + } + } + }, + { + "type": "Microsoft.Resources/deployments", + "apiVersion": "[variables('apiVersion')]", + "name": "[concat(variables('shortName'), '-compute-deployment')]", + "resourceGroup": "[variables('rgName')]", + "dependsOn": [ + "[concat(variables('shortName'), '-state-deployment')]" + ], + "properties": { + "mode": "Incremental", + "templateLink": { + "uri": "[concat(variables('baseURI'), 'dremioCluster.json')]", + "contentVersion": "1.0.0.0" + }, + "parameters": { + "dremioClusterName": {"value": "[parameters('clusterName')]"}, + "executorCount": {"value": "[variables('clusterSizes')[parameters('clusterSize')].executorCount]"}, + "executorVmSize": {"value": "[variables('clusterSizes')[parameters('clusterSize')].executorVmSize]"}, + "coordinatorCount": {"value": "[variables('clusterSizes')[parameters('clusterSize')].coordinatorCount]"}, + "coordinatorVmSize": {"value": "[variables('clusterSizes')[parameters('clusterSize')].coordinatorVmSize]"}, + "dremioDownloadURL": {"value": "[parameters('dremioBinary')]"}, + "dataDiskId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.dataDiskId.value]"}, + "sshUsername": {"value": "[parameters('SSHUsername')]"}, + "sshPasswordOrKey": {"value": "[parameters('PasswordOrSSHPublicKey')]"}, + "subnetId": {"value": "[if(equals(trim(parameters('useExistingSubnet')), ''), reference(concat(variables('shortName'), '-state-deployment')).outputs.subnetId.value, parameters('useExistingSubnet'))]"}, + "loadBalancerId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.loadBalancerId.value]"}, + "nsgId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.nsgId.value]"}, + "usePrivateIP": {"value": "[parameters('usePrivateIP')]"} + } + } + } + ], + "outputs": { + "dremioUi": { + "type": "string", + "value": "[concat('http://', reference(concat(variables('shortName'), '-compute-deployment')).outputs.dremioHost.value, ':9047')]" + } + } +} diff --git a/azure/arm-templates/nested/dremioCluster.json b/azure/arm-templates/nested/dremioCluster.json new file mode 100644 index 00000000..527e9346 --- /dev/null +++ b/azure/arm-templates/nested/dremioCluster.json @@ -0,0 +1,460 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "dremioClusterName": { + "type": "string", + "defaultValue": "mydremio", + "metadata": { + "description": "Name for the Dremio Cluster" + } + }, + "sshUsername": { + "type": "string", + "metadata": { + "description": "SSH username for the virtual machines." + } + }, + "authenticationType": { + "type": "string", + "defaultValue": "password", + "allowedValues": [ + "password", + "sshPublicKey" + ], + "metadata": { + "description": "Type of authentication to use on the virtual machines." + } + }, + "sshPasswordOrKey": { + "type": "securestring", + "metadata": { + "description": "Password or ssh key for the virtual machines." + } + }, + "dataDiskId": { + "type": "string" + }, + "publicIpNewOrExisting": { + "type": "string", + "defaultValue": "new", + "metadata": { + "description": "Determines whether or not a new public ip should be provisioned." + } + }, + "publicIpName": { + "type": "string", + "defaultValue": "[concat(parameters('dremioClusterName'), '-master-publicip')]", + "metadata": { + "description": "Name of the public ip address" + } + }, + "publicIpDns": { + "type": "string", + "defaultValue": "[concat('dremio-master-', uniqueString(resourceGroup().id, parameters('dremioClusterName')))]", + "metadata": { + "description": "DNS of the public ip address for the VM" + } + }, + "publicIpResourceGroupName": { + "type": "string", + "defaultValue": "[resourceGroup().name]", + "metadata": { + "description": "Name of the resource group for the public ip address" + } + }, + "publicIpAllocationMethod": { + "type": "string", + "defaultValue": "Static", + "allowedValues": [ + "Dynamic", + "Static" + ], + "metadata": { + "description": "Allocation method for the public ip address" + } + }, + "publicIpSku": { + "type": "string", + "defaultValue": "Standard", + "allowedValues": [ + "Basic", + "Standard" + ], + "metadata": { + "description": "Name of the resource group for the public ip address" + } + }, + "coordinatorVmSize": { + "type": "string", + "defaultValue": "Standard_A2_v2", + "metadata": { + "description": "Size for the coordinator virtual machines." + } + }, + "coordinatorCount": { + "type": "int", + "defaultValue": 0, + "metadata": { + "description": "Number of coordinators in the cluster" + } + }, + "executorVmSize": { + "type": "string", + "defaultValue": "Standard_A2_v2", + "metadata": { + "description": "Size for the executor virtual machines." + } + }, + "executorCount": { + "type": "int", + "defaultValue": 3, + "metadata": { + "description": "Number of executors in the cluster" + } + }, + "dremioDownloadURL": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "(Optional) URL to download Dremio rpm. By default, it will install the latest CE version." + } + }, + "loadBalancerId": { + "type": "string", + "metadata": { + "description": "Loadbalancer fronting the coordinators" + } + }, + "nsgId": { + "type": "string", + "metadata": { + "description": "The security group required - ports 9047, 31010 and 22(ssh) should be allowed" + } + }, + "subnetId": { + "type": "string", + "metadata": { + "description": "The subnet in which the Dremio cluster is to be deployed" + } + }, + "usePrivateIP": { + "type": "bool", + "defaultValue": false, + "metadata": { + "description": "Select to use the private ip address of the subnet for Dremio access." + } + } + }, + "variables": { + "computeApiVersion": "2018-06-01", + "location": "[resourceGroup().location]", + "nicName": "[concat(parameters('dremioClusterName'), '-nic')]", + "dremioImage": { + "publisher": "OpenLogic", + "offer": "CentOS", + "sku": "7.5", + "version": "7.5.20180815" + }, + "linuxConfiguration": { + "disablePasswordAuthentication": true, + "ssh": { + "publicKeys": [ + { + "path": "[concat('/home/', parameters('sshUsername'), '/.ssh/authorized_keys')]", + "keyData": "[parameters('sshPasswordOrKey')]" + } + ] + } + }, + "publicIpAddressId": { + "id": "[resourceId(parameters('publicIpResourceGroupName'), 'Microsoft.Network/publicIPAddresses', parameters('publicIpName'))]" + }, + "singlePlacementGroup": "true", + "enableAcceleratedNetworking": "false", + "priority": "Regular", + "ipAllocationMethod": "Dynamic", + "upgradeMode": "Manual", + "namingInfix": "[toLower(substring(concat(parameters('dremioClusterName'), uniqueString(resourceGroup().id)), 0, 9))]", + "bePoolName": "[concat(variables('namingInfix'), 'bepool')]", + "baseURI": "https://raw.githubusercontent.com/dremio/dremio-cloud-tools/master/azure/arm-templates/scripts/", + "scriptFileName": "setupDremio.sh", + "scriptURL": "[concat(variables('baseURI'), variables('scriptFileName'))]", + "install": false + }, + "resources": [ + { + "condition": "[and(not(parameters('usePrivateIP')), equals(parameters('publicIpNewOrExisting'), 'new'))]", + "type": "Microsoft.Network/publicIPAddresses", + "apiVersion": "[variables('computeApiVersion')]", + "name": "[parameters('publicIpName')]", + "location": "[variables('location')]", + "sku": { + "name": "Standard" + }, + "properties": { + "publicIPAllocationMethod": "[parameters('publicIpAllocationMethod')]", + "dnsSettings": { + "domainNameLabel": "[parameters('publicIpDns')]" + } + } + }, + { + "apiVersion": "[variables('computeApiVersion')]", + "type": "Microsoft.Network/networkInterfaces", + "name": "[variables('nicName')]", + "location": "[variables('location')]", + "dependsOn": [ + "[parameters('publicIpName')]" + ], + "properties": { + "ipConfigurations": [ + { + "name": "ipconfig-master", + "properties": { + "privateIPAllocationMethod": "[variables('ipAllocationMethod')]", + "subnet": { + "id": "[parameters('subnetId')]" + }, + "publicIPAddress": "[if(and(not(parameters('usePrivateIP')), equals(parameters('publicIpNewOrExisting'), 'new')), variables('publicIpAddressId') , json('null'))]" + } + } + ], + "networkSecurityGroup": { + "id": "[parameters('nsgId')]" + } + } + }, + { + "apiVersion": "[variables('computeApiVersion')]", + "type": "Microsoft.Compute/virtualMachines", + "name": "[concat(parameters('dremioClusterName'), '-master')]", + "location": "[variables('location')]", + "dependsOn": [ + "[variables('nicName')]" + ], + "properties": { + "hardwareProfile": { + "vmSize": "[parameters('coordinatorVmSize')]" + }, + "osProfile": { + "computerName": "[parameters('dremioClusterName')]", + "adminUsername": "[parameters('sshUsername')]", + "adminPassword": "[parameters('sshPasswordOrKey')]", + "linuxConfiguration": "[if(equals(parameters('authenticationType'), 'password'), json('null'), variables('linuxConfiguration'))]" + }, + "storageProfile": { + "imageReference": "[variables('dremioImage')]", + "osDisk": { + "caching": "ReadWrite", + "createOption": "FromImage" + }, + "dataDisks": [ + { + "lun": 0, + "managedDisk": { + "id": "[parameters('dataDiskId')]" + }, + "caching": "ReadWrite", + "createOption": "Attach" + } + ] + }, + "networkProfile": { + "networkInterfaces": [ + { + "id": "[resourceId('Microsoft.Network/networkInterfaces', variables('nicName'))]" + } + ] + } + }, + "resources": [ + { + "type": "extensions", + "name": "configScript", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "dependsOn": [ + "[concat(parameters('dremioClusterName'), '-master')]" + ], + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.0", + "autoUpgradeMinorVersion": true, + "settings": { + "fileUris": [ + "[variables('scriptURL')]" + ] + }, + "protectedSettings": { + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' master ')]" + } + } + } + ] + }, + { + "name": "[concat(parameters('dremioClusterName'), '-coordinators')]", + "type": "Microsoft.Compute/virtualMachineScaleSets", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "dependsOn": ["[concat(parameters('dremioClusterName'), '-master')]"], + "condition": "[variables('install')]", + "sku": { + "name": "[parameters('coordinatorVmSize')]", + "tier": "Standard", + "capacity": "[parameters('coordinatorCount')]" + }, + "properties": { + "overprovision": "true", + "upgradePolicy": { + "mode": "[variables('upgradeMode')]" + }, + "singlePlacementGroup": "[variables('singlePlacementGroup')]", + "virtualMachineProfile": { + "storageProfile": { + "imageReference": "[variables('dremioImage')]", + "osDisk": { + "createOption": "FromImage", + "caching": "ReadWrite" + } + }, + "priority": "[variables('priority')]", + "osProfile": { + "computerNamePrefix": "[variables('namingInfix')]", + "adminUsername": "[parameters('sshUsername')]", + "adminPassword": "[parameters('sshPasswordOrKey')]" + }, + "networkProfile": { + "networkInterfaceConfigurations": [ + { + "name": "[concat(parameters('dremioClusterName'), 'Nic')]", + "properties": { + "primary": "true", + "enableAcceleratedNetworking": "[variables('enableAcceleratedNetworking')]", + "ipConfigurations": [ + { + "name": "[concat(parameters('dremioClusterName'), 'ipconfig-coordinators')]", + "properties": { + "subnet": { + "id": "[parameters('subnetId')]" + }, + "loadBalancerBackendAddressPools": [ + { + "id": "[concat(parameters('loadBalancerId'), '/backendAddressPools/', variables('bePoolName'))]" + } + ] + } + } + ] + } + } + ] + }, + "extensionProfile": { + "extensions": [ + { + "name": "updatescriptextension", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.0", + "autoUpgradeMinorVersion": true, + "settings": { + "fileUris": [ + "[variables('scriptURL')]" + ], + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' coordinator ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" + } + } + } + ] + } + } + } + + }, + { + "name": "[concat(parameters('dremioClusterName'), '-executors')]", + "type": "Microsoft.Compute/virtualMachineScaleSets", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "dependsOn": ["[concat(parameters('dremioClusterName'), '-master')]"], + "sku": { + "name": "[parameters('executorVmSize')]", + "tier": "Standard", + "capacity": "[parameters('executorCount')]" + }, + "properties": { + "overprovision": "true", + "upgradePolicy": { + "mode": "[variables('upgradeMode')]" + }, + "singlePlacementGroup": "[variables('singlePlacementGroup')]", + "virtualMachineProfile": { + "storageProfile": { + "imageReference": "[variables('dremioImage')]", + "osDisk": { + "createOption": "FromImage", + "caching": "ReadWrite" + } + }, + "priority": "[variables('priority')]", + "osProfile": { + "computerNamePrefix": "[variables('namingInfix')]", + "adminUsername": "[parameters('sshUsername')]", + "adminPassword": "[parameters('sshPasswordOrKey')]" + }, + "networkProfile": { + "networkInterfaceConfigurations": [ + { + "name": "[concat(parameters('dremioClusterName'), 'Nic')]", + "properties": { + "primary": "true", + "enableAcceleratedNetworking": "[variables('enableAcceleratedNetworking')]", + "ipConfigurations": [ + { + "name": "[concat(parameters('dremioClusterName'), 'ipconfig-executors')]", + "properties": { + "subnet": { + "id": "[parameters('subnetId')]" + } + } + } + ] + } + } + ] + }, + "extensionProfile": { + "extensions": [ + { + "name": "updatescriptextension", + "properties": { + "publisher": "Microsoft.Azure.Extensions", + "type": "CustomScript", + "typeHandlerVersion": "2.0", + "autoUpgradeMinorVersion": true, + "settings": { + "fileUris": [ + "[variables('scriptURL')]" + ], + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' executor ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" + } + } + } + ] + } + } + } + + } + ], + "outputs": { + "dremioHost": { + "type": "string", + "value": "[if(parameters('usePrivateIP'), first(reference(variables('nicName')).ipConfigurations).properties.privateIPAddress, reference(parameters('publicIpName'), variables('computeApiVersion')).dnsSettings.fqdn)]" + } + } +} diff --git a/azure/arm-templates/nested/dremioState.json b/azure/arm-templates/nested/dremioState.json new file mode 100644 index 00000000..1defac4b --- /dev/null +++ b/azure/arm-templates/nested/dremioState.json @@ -0,0 +1,408 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2018-05-01/subscriptionDeploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "dremioClusterName": { + "type": "string", + "defaultValue": "mydremio", + "metadata": { + "description": "Name for the Dremio Cluster" + } + }, + "dataDiskName": { + "type": "string", + "defaultValue": "[concat(parameters('dremioClusterName'), '-data-disk')]", + "metadata": { + "description": "Name for the Dremio Master Data Disk" + } + }, + "dataDiskSize": { + "type": "int", + "defaultValue": 20, + "metadata": { + "description": "Size of the Dremio Master Data Disk" + } + }, + "storageNewOrExisting": { + "type": "string", + "defaultValue": "new", + "metadata": { + "description": "Determines whether or not a new storage account should be provisioned." + } + }, + "storageAccountName": { + "type": "string", + "defaultValue": "[concat('storage', uniqueString(resourceGroup().id))]", + "metadata": { + "description": "Name of the storage account" + } + }, + "storageAccountType": { + "type": "string", + "defaultValue": "Standard_LRS", + "metadata": { + "description": "Storage account type" + } + }, + "virtualNetworkNewOrExisting": { + "type": "string", + "defaultValue": "new", + "metadata": { + "description": "Determines whether or not a new virtual network should be provisioned." + } + }, + "addressPrefixes": { + "type": "array", + "defaultValue": [ + "10.0.0.0/16" + ], + "metadata": { + "description": "Address prefix of the virtual network" + } + }, + "subnetName": { + "type": "string", + "defaultValue": "default", + "metadata": { + "description": "Name of the subnet" + } + }, + "subnetPrefix": { + "type": "string", + "defaultValue": "10.0.0.0/24", + "metadata": { + "description": "Subnet prefix of the virtual network" + } + }, + "publicIpNewOrExisting": { + "type": "string", + "defaultValue": "new", + "metadata": { + "description": "Determines whether or not a new public ip should be provisioned." + } + }, + "publicIpName": { + "type": "string", + "defaultValue": "[concat(parameters('dremioClusterName'), '-webui-publicip')]", + "metadata": { + "description": "Name of the public ip address" + } + }, + "publicIpDns": { + "type": "string", + "defaultValue": "[concat(parameters('dremioClusterName'), '-dremio-', uniqueString(resourceGroup().id, deployment().name))]", + "metadata": { + "description": "DNS of the public ip address for the VM" + } + }, + "publicIpResourceGroupName": { + "type": "string", + "defaultValue": "[resourceGroup().name]", + "metadata": { + "description": "Name of the resource group for the public ip address" + } + }, + "publicIpAllocationMethod": { + "type": "string", + "defaultValue": "Static", + "allowedValues": [ + "Dynamic", + "Static" + ], + "metadata": { + "description": "Allocation method for the public ip address" + } + }, + "publicIpSku": { + "type": "string", + "defaultValue": "Basic", + "allowedValues": [ + "Basic", + "Standard" + ], + "metadata": { + "description": "Name of the resource group for the public ip address" + } + }, + "externalLoadBalancer": { + "type": "bool", + "defaultValue": true, + "metadata": { + "description": "Create an external load balancer" + } + }, + "existingSubnet": { + "type": "string", + "defaultValue": "", + "metadata": { + "description": "(Optional) - existing subnet" + } + } + }, + "variables": { + "computeApiVersion": "2018-06-01", + "storageApiVersion": "2018-07-01", + "location": "[resourceGroup().location]", + "virtualNetworkName": "[concat(parameters('dremioClusterName'), '-vnet')]", + "publicIpAddressId": "[resourceId(parameters('publicIpResourceGroupName'), 'Microsoft.Network/publicIPAddresses', parameters('publicIpName'))]", + "networkSecurityGroupName": "dremio-nsg", + "singlePlacementGroup": "true", + "enableAcceleratedNetworking": "false", + "priority": "Regular", + "ipAllocationMethod": "[parameters('publicIpAllocationMethod')]", + "upgradeMode": "Manual", + "namingInfix": "[toLower(substring(concat(parameters('dremioClusterName'), uniqueString(resourceGroup().id)), 0, 9))]", + "loadBalancerName": "[concat(variables('namingInfix'), '-lb')]", + "lbID": "[resourceId('Microsoft.Network/loadBalancers',variables('loadBalancerName'))]", + "natPoolName": "[concat(variables('namingInfix'), 'natpool')]", + "bePoolName": "[concat(variables('namingInfix'), 'bepool')]", + "natStartPort": 50000, + "natEndPort": 50119, + "natBackendPort": 9047, + "frontEndIPConfigId": "[concat(variables('lbID'),'/frontendIPConfigurations/loadBalancerFrontEnd')]", + "backendAddressPoolId": "[concat(variables('lbID'),'/backendAddressPools/', variables('bePoolName'))]", + "externallb": "[or(parameters('externalLoadBalancer'), equals(trim(parameters('existingSubnet')), ''))]", + "lbfrontEndIPConfig": "[if(variables('externallb'), variables('externallbFronEndIpConfig'), variables('internallbFrontEndIpConfig'))]", + "externallbFronEndIpConfig": { + "publicIPAddress": { + "id": "[variables('publicIpAddressId')]" + } + }, + "internallbFrontEndIpConfig": { + "subnet": { + "privateIPAllocationMethod": "Dynamic", + "id": "[parameters('existingSubnet')]" + } + }, + "install": false + }, + "resources": [ + { + "type": "Microsoft.Compute/disks", + "sku": { + "name": "StandardSSD_LRS", + "tier": "Standard" + }, + "name": "[parameters('dataDiskName')]", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "scale": null, + "properties": { + "creationData": { + "createOption": "Empty" + }, + "diskSizeGB": "[parameters('dataDiskSize')]" + } + }, + { + "condition": "[equals(parameters('virtualNetworkNewOrExisting'), 'new')]", + "type": "Microsoft.Network/virtualNetworks", + "apiVersion": "[variables('computeApiVersion')]", + "name": "[variables('virtualNetworkName')]", + "location": "[variables('location')]", + "properties": { + "addressSpace": { + "addressPrefixes": "[parameters('addressPrefixes')]" + }, + "subnets": [ + { + "name": "[parameters('subnetName')]", + "properties": { + "addressPrefix": "[parameters('subnetPrefix')]" + } + } + ] + } + }, + { + "name": "[variables('networkSecurityGroupName')]", + "type": "Microsoft.Network/networkSecurityGroups", + "apiVersion": "[variables('computeApiVersion')]", + "location": "[variables('location')]", + "properties": { + "securityRules": [ + { + "name": "default-allow-ssh", + "properties": { + "priority": 1000, + "sourceAddressPrefix": "*", + "protocol": "Tcp", + "destinationPortRange": "22", + "access": "Allow", + "direction": "Inbound", + "sourcePortRange": "*", + "destinationAddressPrefix": "*" + } + }, + { + "name": "default-allow-dremio-ui", + "properties": { + "priority": 100, + "sourceAddressPrefix": "*", + "protocol": "Tcp", + "destinationPortRange": "9047", + "access": "Allow", + "direction": "Inbound", + "sourcePortRange": "*", + "destinationAddressPrefix": "*" + } + }, + { + "name": "default-allow-dremio-client", + "properties": { + "priority": 110, + "sourceAddressPrefix": "*", + "protocol": "Tcp", + "destinationPortRange": "31010", + "access": "Allow", + "direction": "Inbound", + "sourcePortRange": "*", + "destinationAddressPrefix": "*" + } + } + ] + } + }, + { + "condition": "[and(variables('install'), variables('externallb'))]", + "type": "Microsoft.Network/publicIPAddresses", + "apiVersion": "[variables('computeApiVersion')]", + "name": "[parameters('publicIpName')]", + "location": "[variables('location')]", + "sku": { + "name": "Standard" + }, + "properties": { + "publicIPAllocationMethod": "[parameters('publicIpAllocationMethod')]", + "dnsSettings": { + "domainNameLabel": "[parameters('publicIpDns')]" + } + } + }, + { + "condition": "[variables('install')]", + "type": "Microsoft.Network/loadBalancers", + "name": "[variables('loadBalancerName')]", + "location": "[variables('location')]", + "apiVersion": "[variables('computeApiVersion')]", + "sku": { + "name": "Standard" + }, + "dependsOn": [ + "[concat('Microsoft.Network/virtualNetworks/', variables('virtualNetworkName'))]", + "[concat('Microsoft.Network/publicIPAddresses/', parameters('publicIpName'))]" + ], + "properties": { + "frontendIPConfigurations": [ + { + "name": "LoadBalancerFrontEnd", + "properties": "[variables('lbfrontEndIPConfig')]" + } + ], + "backendAddressPools": [ + { + "name": "[variables('bePoolName')]" + } + ], + "loadBalancingRules": [ + { + "name": "dremio-ui", + "properties": { + "frontendIPConfiguration": { + "id": "[variables('frontEndIPConfigId')]" + }, + "frontendPort": 9047, + "backendPort": 9047, + "enableFloatingIP": false, + "idleTimeoutInMinutes": 4, + "protocol": "Tcp", + "enableTcpReset": false, + "loadDistribution": "SourceIP", + "disableOutboundSnat": false, + "backendAddressPool": { + "id": "[variables('backendAddressPoolId')]" + }, + "probe": { + "id": "[concat(variables('lbID'), '/probes/dremio-ui')]" + } + } + }, + { + "name": "dremio-client", + "properties": { + "frontendIPConfiguration": { + "id": "[variables('frontEndIPConfigId')]" + }, + "frontendPort": 31010, + "backendPort": 31010, + "enableFloatingIP": false, + "idleTimeoutInMinutes": 4, + "protocol": "Tcp", + "enableTcpReset": false, + "loadDistribution": "SourceIP", + "disableOutboundSnat": false, + "backendAddressPool": { + "id": "[variables('backendAddressPoolId')]" + }, + "probe": { + "id": "[concat(variables('lbID'), '/probes/dremio-ui')]" + } + } + } + ], + "probes": [ + { + "name": "dremio-ui", + "properties": { + "protocol": "Tcp", + "port": 9047, + "intervalInSeconds": 15, + "numberOfProbes": 2 + } + } + ], + "inboundNatRules": [], + "outboundRules": [], + "inboundNatPools": [ + { + "name": "[variables('natPoolName')]", + "properties": { + "frontendIPConfiguration": { + "id": "[variables('frontEndIPConfigID')]" + }, + "protocol": "tcp", + "idleTimeoutInMinutes": 4, + "enableFloatingIP": false, + "enableTcpReset": false, + "frontendPortRangeStart": "[variables('natStartPort')]", + "frontendPortRangeEnd": "[variables('natEndPort')]", + "backendPort": "[variables('natBackendPort')]" + } + } + ] + } + } + ], + "outputs": { + "dataDiskId": { + "type": "string", + "value": "[resourceId('Microsoft.Compute/disks/', parameters('dataDiskName'))]" + }, + "subnetId": { + "type": "string", + "value": "[if(equals(parameters('virtualNetworkNewOrExisting'), 'new'), resourceId(resourceGroup().name, 'Microsoft.Network/virtualNetworks/subnets/', variables('virtualNetworkName'), parameters('subnetName')), '')]" + }, + "loadBalancerId": { + "type": "string", + "value": "[if(variables('install'), resourceId('Microsoft.Network/loadBalancers/', variables('loadBalancerName')), '')]" + }, + "nsgId": { + "type": "string", + "value": "[resourceId('Microsoft.Network/networkSecurityGroups/', variables('networkSecurityGroupName'))]" + }, + "dremioUIAddress": { + "type": "string", + "value": "[if(variables('install'), if(variables('externallb'), reference(concat('Microsoft.Network/publicIPAddresses/', parameters('publicIpName')), variables('computeApiVersion')).dnsSettings.fqdn, first(reference(variables('loadBalancerName')).frontendIPConfigurations).properties.privateIPAddress), '')]" + } + } +} diff --git a/azure/arm-templates/scripts/setupDremio.sh b/azure/arm-templates/scripts/setupDremio.sh new file mode 100644 index 00000000..d4cb5f18 --- /dev/null +++ b/azure/arm-templates/scripts/setupDremio.sh @@ -0,0 +1,101 @@ +#/bin/bash -e + +[ -z $DOWNLOAD_URL ] && DOWNLOAD_URL=http://download.dremio.com/community-server/dremio-community-LATEST.noarch.rpm +if [ ! -f /opt/dremio/bin/dremio ]; then + command -v yum >/dev/null 2>&1 || { echo >&2 "This script works only on Centos or Red Hat. Aborting."; exit 1; } + yum install -y java-1.8.0-openjdk + wget $DOWNLOAD_URL -O dremio-download.rpm + yum -y localinstall dremio-download.rpm +fi + +service=$1 +if [ -z "$service" ]; then + echo "Require the service to start - master, coordinator or executor" + exit 1 +fi + +# In Azure, /dev/sdb is ephemeral storage mapped to /mnt/resource. +# Additional disks are mounted after that... +DISK_NAME=/dev/sdc +DISK_PART=${DISK_NAME}1 +DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf +DREMIO_DATA_DIR=/var/lib/dremio +# Azure Linux VMs have ephemeral/temporary disk +# always mounted on /mnt/resource/dremio +SPILL_DIR=/mnt/resource/dremio + +function partition_disk { + parted $DISK_NAME mklabel msdos + parted -s $DISK_NAME mkpart primary ext4 0% 100% + mkfs -t ext4 $DISK_PART +} + +if [ "$service" == "master" ]; then + lsblk -no FSTYPE $DISK_NAME | grep ext4 || partition_disk + mount $DISK_PART $DREMIO_DATA_DIR + chown dremio:dremio $DREMIO_DATA_DIR + echo "$DISK_PART $DREMIO_DATA_DIR ext4 defaults 0 0" >> /etc/fstab +else + zookeeper=$2 + if [ -z "$zookeeper" ]; then + echo "Non-master node requires zookeeper host" + exit 2 + fi +fi + +function setup_spill { + chmod +w /etc/sysconfig/dremio + cat >> /etc/sysconfig/dremio < /dev/null; do echo waiting for dremio master; sleep 2; done; + sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ + s/executor.enabled: true/executor.enabled: false/" \ + $DREMIO_CONFIG_FILE + echo "zookeeper: \"$zookeeper:2181\"" >> $DREMIO_CONFIG_FILE +} + +function setup_executor { + setup_spill + sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ + s/coordinator.enabled: true/coordinator.enabled: false/; \ + /local:/a \ \ spilling: [\"$SPILL_DIR/spill\"]" \ + $DREMIO_CONFIG_FILE + echo "zookeeper: \"$zookeeper:2181\"" >> $DREMIO_CONFIG_FILE +} + +setup_$service +service dremio start +chkconfig dremio on diff --git a/charts/dremio/config/logback-admin.xml b/charts/dremio/config/logback-admin.xml new file mode 100644 index 00000000..b393d02b --- /dev/null +++ b/charts/dremio/config/logback-admin.xml @@ -0,0 +1,67 @@ + + + + + + %msg%n%ex{0}%n + + + + + + + ${dremio.admin.log.verbosity:-OFF} + + + %date{ISO8601} [%thread] %-5level %logger{30} - %msg%n + + + + + + + + + ${dremio.admin.log.verbosity:-OFF} + + ${dremio.admin.log.path} + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + + + + + + + + + + + + From 72bd56d6af1e83669cfd7116771ec78b9ccf3ee2 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Mon, 13 May 2019 14:17:04 -0700 Subject: [PATCH 07/31] DX-16307: change the message for password chars Change-Id: I83a272457e977ea54dc35812a6d28b68dda666ee --- azure/arm-templates/azuredeploy.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure/arm-templates/azuredeploy.json b/azure/arm-templates/azuredeploy.json index e9102fc8..b008183a 100644 --- a/azure/arm-templates/azuredeploy.json +++ b/azure/arm-templates/azuredeploy.json @@ -38,7 +38,7 @@ "PasswordOrSSHPublicKey": { "type": "securestring", "metadata": { - "description": "Password or ssh public key for the virtual machines. If password, password must be minimum 12 characters with at least 1 upper case letter, 1 lower case letter and 1 number." + "description": "Password or ssh public key for the virtual machines. If password, password must be minimum 8 characters with at least 1 upper case letter, 1 lower case letter and 1 number." } }, "useExistingSubnet": { From ac9811c249e0629582e2bc99a43778058797301d Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Mon, 13 May 2019 14:25:23 -0700 Subject: [PATCH 08/31] rebase to 3.2 release 1. Pull in the latest config files 2. Update memory, cpu in values.yaml so that they work with the recommended machine types in the docs Change-Id: I47970acda6fc776171f1970ddacb7e0d78dbff58 --- charts/dremio/README.md | 2 + charts/dremio/config/logback-access.xml | 5 +- charts/dremio/config/logback.xml | 68 +++++++++++++++++++++++-- charts/dremio/values.yaml | 8 +-- 4 files changed, 74 insertions(+), 9 deletions(-) diff --git a/charts/dremio/README.md b/charts/dremio/README.md index b5e620a5..6d8ecd0e 100644 --- a/charts/dremio/README.md +++ b/charts/dremio/README.md @@ -18,6 +18,8 @@ An appropriate distributed file store (S3, ADLS, HDFS, etc) should be used for p This assumes you already have kubernetes cluster setup, kubectl configured to talk to your kubernetes cluster and helm setup in your cluster. Review and update values.yaml to reflect values for your environment before installing the helm chart. This is specially important for for the memory and cpu values - your kubernetes cluster should have sufficient resources to provision the pods with those values. If your kubernetes installation does not support serviceType LoadBalancer, it is recommended to comment the serviceType value in values.yaml file before deploying. #### Installing the helm chart +Review charts/dremio/values.yaml and adjust the values as per your requirements. Note that the values for cpu and memory for the coordinator and the executors are set to work with AKS on Azure with worker nodes setup with machine types Standard_E16s_v3. + Run this from the charts directory ```bash cd charts diff --git a/charts/dremio/config/logback-access.xml b/charts/dremio/config/logback-access.xml index c0f2ed28..a00ae338 100644 --- a/charts/dremio/config/logback-access.xml +++ b/charts/dremio/config/logback-access.xml @@ -24,8 +24,11 @@ ${dremio.log.path}/access.log - ${dremio.log.path}/archive/access.%d{yyyy-MM-dd}.log.gz + ${dremio.log.path}/archive/access.%d{yyyy-MM-dd}.%i.log.gz 30 + + 100MB + diff --git a/charts/dremio/config/logback.xml b/charts/dremio/config/logback.xml index 8999c3bc..0ab3528b 100644 --- a/charts/dremio/config/logback.xml +++ b/charts/dremio/config/logback.xml @@ -30,7 +30,22 @@ ${dremio.log.path}/server.log - ${dremio.log.path}/archive/server.%d{yyyy-MM-dd}.log.gz + ${dremio.log.path}/archive/server.%d{yyyy-MM-dd}.%i.log.gz + 30 + + 100MB + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + ${dremio.log.path}/metadata_refresh.log + + ${dremio.log.path}/archive/metadata_refresh.%d{yyyy-MM-dd}.log.gz 30 @@ -42,8 +57,11 @@ ${dremio.log.path}/json/server.json - ${dremio.log.path}/json/archive/server.%d{yyyy-MM-dd}.json.gz + ${dremio.log.path}/json/archive/server.%d{yyyy-MM-dd}.%i.json.gz 30 + + 100MB + @@ -56,15 +74,18 @@ message - + ${dremio.log.path}/queries.json - ${dremio.log.path}/archive/queries.%d{yyyy-MM-dd}.json.gz + ${dremio.log.path}/archive/queries.%d{yyyy-MM-dd}.%i.json.gz 30 + + 100MB + @@ -87,6 +108,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + ${dremio.log.path}/hive.deprecated.function.warning.log + + ${dremio.log.path}/archive/hive.deprecated.function.warning.%d{yyyy-MM-dd}.%i.log.gz + 30 + + 100MB + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index a4400a5e..d2505714 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -7,8 +7,8 @@ image: dremio/dremio-oss:latest # the coordinators and the executors. # The value of memory should be in MB. CPU is in no of cores. coordinator: - memory: 16384 - cpu: 8 + memory: 122880 + cpu: 15 # This count is for slave coordinators only. # The chart will always create one master coordinator - you are # not required to have more than one master coordinator. @@ -19,8 +19,8 @@ coordinator: port: 31010 volumeSize: 100Gi executor: - memory: 16384 - cpu: 4 + memory: 122880 + cpu: 15 count: 3 volumeSize: 100Gi zookeeper: From 8f40a515642c716008d85bcc04e2179b3905728f Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Tue, 21 May 2019 10:14:28 -0700 Subject: [PATCH 09/31] DX-16588: Remove unnecessary space Plus change REAME layout and two label changes. Change-Id: Ia5e05f13910db5c03361c6166411704b40bf4a8e --- azure/arm-templates/README.md | 39 ++++++++++++++-------------- azure/arm-templates/azuredeploy.json | 6 ++--- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/azure/arm-templates/README.md b/azure/arm-templates/README.md index 210a9f1c..59094555 100644 --- a/azure/arm-templates/README.md +++ b/azure/arm-templates/README.md @@ -1,6 +1,8 @@ # Deploying Dremio to Azure +You can try it out: [![Azure ARM Template](http://azuredeploy.net/deploybutton.png)](https://portal.azure.com/#create/microsoft.template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fdremio%2Fdremio-cloud-tools%2Fmaster%2Fazure%2Farm-templates%2Fazuredeploy.json) + This deploys a Dremio cluster on Azure VMs. The deployment creates a master coordinator node and number of executor nodes depending on the size of the cluster chosen. The table below provides the machine type and number of executor nodes for the different sizes of Dremio clusters. | Cluster size | Coordinator VM Type | Executor VM Type | No. of Executors | @@ -11,6 +13,24 @@ This deploys a Dremio cluster on Azure VMs. The deployment creates a master coor | Large | Standard_D8_v3 | Standard_E16s_v3 | 25 | | X-Large | Standard_D8_v3 | Standard_E16s_v3 | 50 | +The inputs required during deployment are: + +|Input Parameter|Description | +|---|---| +| Subscription |Azure subscription where the cluster should be deployed. | +| Resource Group |The Azure Resource group where the cluster should be deployed. You can create a new one too. It is recommended to create a new one as all resources are created in that group and deleting the group will delete all resources created. | +| Location |The Azure location where the cluster resources will be deployed. | +| Cluster Name |A name for your cluster.| +| Cluster Size |Pick a size based on your needs.| +| SSH Username |The username that can be used to login to your nodes.| +| Authentication Type |Password or Key based authentication for ssh.| +| Password or SSH Public Key |The password or ssh public key | +| Use Existing Subnet | (Optional) id of an existing subnet. The subnet must be in the same region as the Dremio cluster resource group. It is of the form /subscriptions/xxxx/resourceGroups/xxxx/providers/Microsoft.Network/virtualNetworks/xxxx/subnets/xxxx| +| Use Private IP | Select true if you are using existing subnet and you want to use an internal ip from the subnet to access Dremio. | +| Dremio Binary | Publicly accessible URL to a Dremio installation rpm | + +Once the deployment is successful, you will find the URL to Dremio UI in the output section of the deployment. + The deployment resources are: ``` ┌───────────────────────────┐ @@ -40,22 +60,3 @@ The deployment resources are: │ └──────────────────────────────────────────────────────────────┘ │ └──────────────────────────────────────────────────────────────────┘ ``` -You can try it out: [![Azure ARM Template](http://azuredeploy.net/deploybutton.png)](https://portal.azure.com/#create/microsoft.template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fdremio%2Fdremio-cloud-tools%2Fmaster%2Fazure%2Farm-templates%2Fazuredeploy.json) - -The inputs required during deployment are: - -|Input Parameter|Description | -|---|---| -| Subscription |Azure subscription where the cluster should be deployed. | -| Resource Group |The Azure Resource group where the cluster should be deployed. You can create a new one too. It is recommended to create a new one as all resources are created in that group and deleting the group will delete all resources created. | -| Location |The Azure location where the cluster resources will be deployed. | -| Cluster Name |A name for your cluster.| -| Cluster Size |Pick a size based on your needs.| -| SSH Username |The username that can be used to login to your nodes.| -| Authentication Type |Password or Key based authentication for ssh.| -| Password or SSH Public Key |The password or ssh public key | -| Use Existing Subnet | (Optional) id of an existing subnet. The subnet must be in the same region as the Dremio cluster resource group. It is of the form /subscriptions/xxxx/resourceGroups/xxxx/providers/Microsoft.Network/virtualNetworks/xxxx/subnets/xxxx| -| Use Private IP | Select true if you are using existing subnet and you want to use an internal ip from the subnet to access Dremio. | -| Dremio Binary | Publicly accessible URL to a Dremio installation rpm | - -Once the deployment is successful, you will find the URL to Dremio UI in the output section of the deployment. diff --git a/azure/arm-templates/azuredeploy.json b/azure/arm-templates/azuredeploy.json index b008183a..5f353e3c 100644 --- a/azure/arm-templates/azuredeploy.json +++ b/azure/arm-templates/azuredeploy.json @@ -12,7 +12,7 @@ }, "clusterSize": { "type": "string", - "allowedValues": ["X-Small (1 executor)", "Small (5 executors)", "Medium (10 executors )", "Large (25 executors)", "X-Large (50 executors)"], + "allowedValues": ["X-Small (1 executor)", "Small (5 executors)", "Medium (10 executors)", "Large (25 executors)", "X-Large (50 executors)"], "metadata": { "description": "The type and number of machines are chosen based on the size selected." } @@ -21,7 +21,7 @@ "type": "string", "defaultValue": "azuser", "metadata": { - "description": "SSH username for the virtual machines. You need it if you want to login to the machines." + "description": "SSH username for the virtual machines. (Can be used to SSH into machines for changing configuration, reviewing logs, etc.)" } }, "authenticationType": { @@ -32,7 +32,7 @@ "SSHPublicKey" ], "metadata": { - "description": "Type of authentication to use on the virtual machines." + "description": "Type of authentication to use for SSH." } }, "PasswordOrSSHPublicKey": { From 0238c7aa0deabcd22f39643d604986005607041f Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Wed, 22 May 2019 11:53:05 -0700 Subject: [PATCH 10/31] DX-16610: Remove the strategy from templates DX-16619: Fix the syntax in values.yaml Statefulsets attribute is updateStrategy and the default for that is RollingUpdate. So, deleting those lines would be sufficient and not loose any functionality. Kubernetes client libraries to v1.14 seems to be more strict than earlier version and throws an error with those lines in there. Dropping the lines works with earlier versions of helm. Change-Id: I727adc50a883a4801e737aec0be6b84788f6f357 --- charts/dremio/templates/dremio-coordinator.yaml | 5 ----- charts/dremio/templates/dremio-executor.yaml | 5 ----- charts/dremio/values.yaml | 2 +- 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index 2e4c1eaa..3bffa687 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -8,11 +8,6 @@ spec: replicas: {{.Values.coordinator.count}} podManagementPolicy: "Parallel" revisionHistoryLimit: 1 - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 selector: matchLabels: app: dremio-coordinator diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index 78ddc7af..e511e1d3 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -8,11 +8,6 @@ spec: replicas: {{.Values.executor.count}} podManagementPolicy: "Parallel" revisionHistoryLimit: 1 - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 selector: matchLabels: app: dremio-executor diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index d2505714..624a11bc 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -51,7 +51,7 @@ serviceType: LoadBalancer # For private and protected docker image repository, you should store # the credentials in a kubernetes secret and provide the secret name here. # For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod -#imagePullSecrets=secretname +#imagePullSecrets: secretname # Target pods to nodes based on labels set on the nodes. # For more information, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector From 28a244354686a7e2fc4471b9d44b7459e00d4256 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Tue, 21 May 2019 11:28:12 -0700 Subject: [PATCH 11/31] DX-16550: Support for Cloudformation in AWS Change-Id: Ic0aa8058ffbcd46144e451621312554b72dc4a67 --- aws/cloudformation/README.md | 34 +++ aws/cloudformation/dremio_cf.yaml | 363 ++++++++++++++++++++++++++++++ 2 files changed, 397 insertions(+) create mode 100644 aws/cloudformation/README.md create mode 100644 aws/cloudformation/dremio_cf.yaml diff --git a/aws/cloudformation/README.md b/aws/cloudformation/README.md new file mode 100644 index 00000000..dc837269 --- /dev/null +++ b/aws/cloudformation/README.md @@ -0,0 +1,34 @@ + +# Deploying Dremio to AWS + +_Note:_ To try on AWS, you should have: +* Permission to create Security Groups +* An AWS key pair created +* (Optional) A VPC and subnet created if you want to install to a non-default VPC + +Try it out [![AWS Cloudformation](https://s3.amazonaws.com/cloudformation-examples/cloudformation-launch-stack.png)](https://us-east-2.console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/new?templateURL=https://s3-us-west-2.amazonaws.com/aws-cloudformation.dremio.com/dremio_cf.yaml&stackName=myDremio) + +This deploys a Dremio cluster on EC2 instances. The deployment creates a master coordinator node and number of executor nodes depending on the size of the cluster chosen. The table below provides the machine type and number of executor nodes for the different sizes of Dremio clusters. + +| Cluster size | Coordinator VM Type | Executor VM Type | No. of Executors | +|--------------|---------------------|------------------|------------------| +| X-Small | m5.2xlarge | r5d.4xlarge | 1 | +| Small | m5.2xlarge | r5d.4xlarge | 5 | +| Medium | m5.4xlarge | r5d.4xlarge | 10 | +| Large | m5.4xlarge | r5d.4xlarge | 25 | +| X-Large | m5.4xlarge | r5d.4xlarge | 50 | + +Make sure you are in the AWS region you are planning to deploy your cluster in. + +The inputs required during deployment are: + +|Input Parameter|Description | +|---|---| +| Stack name |Name of the stack. | +| Cluster Size |Pick a size based on your needs.| +| Deploy to VPC |VPC to deploy the cluster into.| +| Deploy to Subnet |Subnet to deploy the cluster into. Must be in the selected VPC.| +| Dremio Binary | Publicly accessible URL to a Dremio installation RPM | +| AWS keypair | AWS key pair to use to SSH to the VMs. SSH username for the VMs are centos (has sudo privilege). SSH into machines for changing configuration, reviewing logs, etc. | + +Once the deployment is successful, you will find the URL to Dremio UI in the output section of the deployment. diff --git a/aws/cloudformation/dremio_cf.yaml b/aws/cloudformation/dremio_cf.yaml new file mode 100644 index 00000000..f7b1d21c --- /dev/null +++ b/aws/cloudformation/dremio_cf.yaml @@ -0,0 +1,363 @@ +--- +AWSTemplateFormatVersion: '2010-09-09' +Description: 'Setup a Dremio cluster.' +Parameters: + keyName: + Type: AWS::EC2::KeyPair::KeyName + AllowedPattern: ".+" + ConstraintDescription: Must select an existing EC2 KeyPair + Description: "AWS key pair to use to SSH to the VMs. SSH username for the VMs are centos (has sudo privilege). SSH into machines for changing configuration, reviewing logs, etc." + clusterSize: + Type: String + Description: "The type and number of machines are chosen based on the size selected." + AllowedValues: + - "X-Small--1-executor" + - "Small--5-executors" + - "Medium--10-executors" + - "Large--25-executors" + - "X-Large--50-executors" + Default: "Small--5-executors" + useVPC: + Type: AWS::EC2::VPC::Id + Description: "VPC to deploy the cluster into." + useSubnet: + Type: AWS::EC2::Subnet::Id + Description: "Subnet to deploy the cluster into. Must be in the selected VPC." + dremioDownloadURL: + Type: String + Description: "(Optional) HTTP or HTTPS URL to a Dremio RPM. Leave empty to install the latest Dremio CE release." + Default: "" +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - + Label: + default: Dremio Cluster + Parameters: + - coordinatorInstanceType + - coordinatorCount + - executorInstanceType + - executorCount + - clusterSize + - useVPC + - useSubnet + - dremioDownloadURL + - + Label: + default: AWS resource configuration + Parameters: + - keyName + ParameterLabels: + keyName: + default : "AWS keypair" + clusterSize: + default: "Cluster size" + useSubnet: + default: "Deploy to Subnet" + useVPC: + default: "Deploy to VPC" + dremioDownloadURL: + default: "Dremio download URL" +Mappings: + Custom: + Variables: + URL: https://download.dremio.com/community-server/dremio-community-LATEST.noarch.rpm + ClusterSizes: + X-Small--1-executor: + coordinatorInstanceType: m5.2xlarge + coordinatorDiskSize: 10 + executorInstanceType: r5d.4xlarge + executorCount: 1 + executorDiskSize: 10 + Small--5-executors: + coordinatorInstanceType: m5.2xlarge + coordinatorDiskSize: 50 + executorInstanceType: r5d.4xlarge + executorCount: 5 + executorDiskSize: 50 + Medium--10-executors: + coordinatorInstanceType: m5.4xlarge + coordinatorDiskSize: 100 + executorInstanceType: r5d.4xlarge + executorCount: 10 + executorDiskSize: 100 + Large--25-executors: + coordinatorInstanceType: m5.4xlarge + coordinatorDiskSize: 100 + executorInstanceType: r5d.4xlarge + executorCount: 25 + executorDiskSize: 100 + X-Large--50-executors: + coordinatorInstanceType: m5.4xlarge + coordinatorDiskSize: 100 + executorInstanceType: r5d.4xlarge + executorCount: 50 + executorDiskSize: 100 + RegionMap: + # Centos 7 Images + us-east-1: # N Virginia + AMI: ami-02eac2c0129f6376b + us-east-2: # Ohio + AMI: ami-0f2b4fc905b0bd1f1 + us-west-1: # California + AMI: ami-074e2d6769f445be5 + us-west-2: # Oregon + AMI: ami-01ed306a12b7d1c96 + ca-central-1: # Québec + AMI: ami-033e6106180a626d0 + eu-central-1: # Frankfurt + AMI: ami-04cf43aca3e6f3de3 + eu-west-1: # Ireland + AMI: ami-0ff760d16d9497662 + eu-west-2: # London + AMI: ami-0eab3a90fc693af19 + ap-southeast-1: # Singapore + AMI: ami-0b4dd9d65556cac22 + ap-southeast-2: # Sydney + AMI: ami-08bd00d7713a39e7d + ap-south-1 : # Mumbai + AMI: ami-02e60be79e78fef21 + ap-northeast-1: # Tokyo + AMI: ami-045f38c93733dd48d + ap-northeast-2: # Seoul + AMI: ami-06cf2a72dadf92410 + sa-east-1: # São Paulo + AMI: ami-0b8d86d4bf91850af + SubnetConfig: + VPC: + CIDR: 10.0.0.0/16 + Public: + CIDR: 10.0.0.0/24 +Conditions: + CreateVPC: !Equals [!Ref useSubnet, ""] +Resources: + VPC: + Condition: CreateVPC + Type: AWS::EC2::VPC + Properties: + EnableDnsSupport: 'true' + EnableDnsHostnames: 'true' + CidrBlock: !FindInMap [SubnetConfig, VPC, CIDR] + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "net"]] + + PublicSubnet: + Condition: CreateVPC + Type: AWS::EC2::Subnet + Properties: + VpcId: !Ref VPC + CidrBlock: !FindInMap [SubnetConfig, Public, CIDR] + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "public"]] + - Key: Network + Value: Public + + InternetGateway: + Condition: CreateVPC + Type: AWS::EC2::InternetGateway + Properties: + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "ig"]] + + VPCGatewayAttachment: + Condition: CreateVPC + Type: AWS::EC2::VPCGatewayAttachment + Properties: + VpcId: !Ref VPC + InternetGatewayId: !Ref InternetGateway + + PublicRouteTable: + Condition: CreateVPC + Type: AWS::EC2::RouteTable + Properties: + VpcId: !Ref VPC + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "public"]] + + PublicSubnetRouteTableAssociation: + Condition: CreateVPC + Type: AWS::EC2::SubnetRouteTableAssociation + Properties: + SubnetId: !Ref PublicSubnet + RouteTableId: !Ref PublicRouteTable + + PublicRoute: + Condition: CreateVPC + Type: AWS::EC2::Route + DependsOn: VPCGatewayAttachment + Properties: + RouteTableId: !Ref PublicRouteTable + DestinationCidrBlock: 0.0.0.0/0 + GatewayId: !Ref InternetGateway + + DremioSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupName: "Dremio Access" + GroupDescription: "Dremio Access" + VpcId: !If [CreateVPC, !Ref VPC, !Ref useVPC] + SecurityGroupIngress: + - IpProtocol: tcp + FromPort: '9047' + ToPort: '9047' + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: '31010' + ToPort: '31010' + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: '22' + ToPort: '22' + CidrIp: 0.0.0.0/0 + - IpProtocol: tcp + FromPort: '80' + ToPort: '80' + CidrIp: 0.0.0.0/0 + - IpProtocol: -1 + SourceSecurityGroupName: "Dremio Access" + AvailabilityWaitHandle: + Type: AWS::CloudFormation::WaitConditionHandle + AvailabilityWaitCondition: + Type: AWS::CloudFormation::WaitCondition + DependsOn: DremioMaster + Properties: + Handle: !Ref "AvailabilityWaitHandle" + Timeout: "600" + + DremioMaster: + Type: AWS::EC2::Instance + Properties: + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", "DremioMaster"]] + ImageId: !FindInMap [RegionMap, !Ref "AWS::Region", AMI] + KeyName: !Ref keyName + InstanceType: !FindInMap [ClusterSizes, !Ref clusterSize, coordinatorInstanceType] + NetworkInterfaces: + - DeleteOnTermination: "true" + AssociatePublicIpAddress: "true" + DeviceIndex: 0 + SubnetId: !If [CreateVPC, !Ref PublicSubnet, !Ref useSubnet] + GroupSet: [!Ref DremioSecurityGroup] + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: !FindInMap [ClusterSizes, !Ref clusterSize, coordinatorDiskSize] + DeleteOnTermination: true + VolumeType: gp2 + UserData: + Fn::Base64: !Sub + - | + #!/bin/bash -x + statusFile=/tmp/statusfile + + if [ ! -d /opt/dremio ]; then + url=${dremioDownloadURL} + [ -z $url ] && url=${DOWNLOAD_URL} + yum -y install java-1.8.0-openjdk-devel $url + if [ $? != 0 ]; then + echo "{ \"Status\" : \"FAILURE\", \"UniqueId\" : \"${AWS::StackName}\", \"Data\" : \"Failed\", \"Reason\" : \"Unable to download Dremio\" }" > $statusFile + curl -T $statusFile '${AvailabilityWaitHandle}' + exit 1 + fi + fi + + DREMIO_HOME=/opt/dremio + DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf + + sed -i "s/executor.enabled: true/executor.enabled: false/" $DREMIO_CONFIG_FILE + + cp $DREMIO_HOME/share/dremio/dremio.service /etc/systemd/system + systemctl daemon-reload + systemctl start dremio + systemctl enable dremio + + until curl -Iks http://localhost:9047; do + echo waiting for website availability + sleep 2 + done + echo "{ \"Status\" : \"SUCCESS\", \"UniqueId\" : \"${AWS::StackName}\", \"Data\" : \"Ready\", \"Reason\" : \"Website Available\" }" > $statusFile + curl -T $statusFile '${AvailabilityWaitHandle}' + - DOWNLOAD_URL: !FindInMap [ Custom, Variables, "URL"] + + DremioExecutorLC: + Type: AWS::AutoScaling::LaunchConfiguration + DependsOn: DremioMaster + Properties: + AssociatePublicIpAddress: true + #EbsOptimized: true + ImageId: + Fn::FindInMap: + - RegionMap + - !Ref AWS::Region + - AMI + InstanceMonitoring: true + InstanceType: !FindInMap [ClusterSizes, !Ref clusterSize, executorInstanceType] + KeyName: !Ref keyName + SecurityGroups: [!Ref DremioSecurityGroup] + BlockDeviceMappings: + - DeviceName: /dev/sda1 + Ebs: + VolumeSize: !FindInMap [ClusterSizes, !Ref clusterSize, executorDiskSize] + DeleteOnTermination: true + VolumeType: gp2 + UserData: + Fn::Base64: !Sub + - | + #!/bin/bash -x + + if [ ! -d /opt/dremio ]; then + url=${dremioDownloadURL} + [ -z $url ] && url=${DOWNLOAD_URL} + yum -y install java-1.8.0-openjdk-devel $url + fi + + mkdir /var/ephemeral + # Setup ephemeral disk - this is based on executors are r5d class machines + NVME=nvme1n1 + file -s /dev/$NVME | grep "/dev/$NVME: data" && mkfs -t xfs /dev/$NVME && \ + UUID=$(blkid | grep $NVME | awk -F'"' '{ print $2 }') && \ + echo "UUID=$UUID /var/ephemeral xfs defaults,nofail 0 2" >> /etc/fstab && \ + mount -a + chmod 777 /var/ephemeral + + SPILL_DIR=/var/ephemeral/dremio_spill + DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf + + sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ + s/coordinator.enabled: true/coordinator.enabled: false/; \ + /local:/a \ \ spilling: [\"$SPILL_DIR\"]" \ + $DREMIO_CONFIG_FILE + echo "zookeeper: \"${ZK}:2181\"" >> $DREMIO_CONFIG_FILE + cp $DREMIO_HOME/share/dremio/dremio.service /etc/systemd/system + systemctl daemon-reload + systemctl start dremio + systemctl enable dremio + + - ZK: !GetAtt DremioMaster.PrivateIp + DOWNLOAD_URL: !FindInMap [ Custom, Variables, "URL"] + + DremioExecutorASG: + Type: AWS::AutoScaling::AutoScalingGroup + DependsOn: DremioExecutorLC + Properties: + Tags: + - Key: Name + Value: !Join ["-", [!Ref "AWS::StackName", DremioExecutor]] + PropagateAtLaunch: true + ResourceType: "auto-scaling-group" + ResourceId: !Ref "AWS::StackName" + LaunchConfigurationName: !Ref DremioExecutorLC + VPCZoneIdentifier: [!If [CreateVPC, !Ref PublicSubnet, !Ref useSubnet]] + DesiredCapacity: !FindInMap [ClusterSizes, !Ref clusterSize, executorCount] + MaxSize: !FindInMap [ClusterSizes, !Ref clusterSize, executorCount] + MinSize: !FindInMap [ClusterSizes, !Ref clusterSize, executorCount] + +Outputs: + DremioUI: + Description: Dremio UI. + Value: !Join [ "", ["http://", !GetAtt DremioMaster.PublicIp, ":9047"]] From 4b85995343b9f74d17f4b1e39dca71dbdcf06395 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Wed, 29 May 2019 13:11:55 -0700 Subject: [PATCH 12/31] DX-16739: open port 80 Certbot uses port 80 to handshake with LetsEncrypt to generate SSL certificates. So, port 80 is being opened up by default. Change-Id: I565bd37b6b2657c67751efd1eee2f4cc45be4ba8 --- azure/arm-templates/nested/dremioState.json | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/azure/arm-templates/nested/dremioState.json b/azure/arm-templates/nested/dremioState.json index 1defac4b..bed09915 100644 --- a/azure/arm-templates/nested/dremioState.json +++ b/azure/arm-templates/nested/dremioState.json @@ -259,6 +259,19 @@ "sourcePortRange": "*", "destinationAddressPrefix": "*" } + }, + { + "name": "default-allow-for-letsencrypt", + "properties": { + "priority": 1100, + "sourceAddressPrefix": "*", + "protocol": "Tcp", + "destinationPortRange": "80", + "access": "Allow", + "direction": "Inbound", + "sourcePortRange": "*", + "destinationAddressPrefix": "*" + } } ] } From 7e4c51641e017492d4e08d9bfa9e6f65c4800775 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Tue, 4 Jun 2019 10:39:27 -0700 Subject: [PATCH 13/31] DX-16813: Add attribute for reverse DNS Azure VMs do not automatically do reverse DNS lookup of the VM's public ip address. Reverse DNS needs to be specifically enabled. Change-Id: Ie92fe9ca6c6358a3704947f201dae4e209602c66 --- azure/arm-templates/nested/dremioCluster.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/azure/arm-templates/nested/dremioCluster.json b/azure/arm-templates/nested/dremioCluster.json index 527e9346..ff4291a6 100644 --- a/azure/arm-templates/nested/dremioCluster.json +++ b/azure/arm-templates/nested/dremioCluster.json @@ -195,7 +195,8 @@ "properties": { "publicIPAllocationMethod": "[parameters('publicIpAllocationMethod')]", "dnsSettings": { - "domainNameLabel": "[parameters('publicIpDns')]" + "domainNameLabel": "[parameters('publicIpDns')]", + "reverseFqdn": "[concat(parameters('publicIpDns'), '.', variables('location'), '.cloudapp.azure.com')]" } } }, From b2fc9c00a452893d37c013774079c4c3c2affab4 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Thu, 6 Jun 2019 11:14:40 -0700 Subject: [PATCH 14/31] DX-16712: Use Azure Storage for uploads and accelerator data 1. The URL for path.dist values for S3, ADLS are different from pre-3.2.0 and 3.2.0+. Handle it. 2. Add support for Azure Storage v2 for path.dist Change-Id: I7730ed4caac22240e579e41641e720bdedf05ec0 --- charts/dremio/config/core-site.xml | 42 +++++++++++++++++++++++++----- charts/dremio/config/dremio.conf | 23 ++++++++++++---- charts/dremio/values.yaml | 16 ++++++++---- 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/charts/dremio/config/core-site.xml b/charts/dremio/config/core-site.xml index 1c2f3237..8d91d757 100644 --- a/charts/dremio/config/core-site.xml +++ b/charts/dremio/config/core-site.xml @@ -2,20 +2,20 @@ - {{- if and .Values.uploads.type (eq .Values.uploads.type "aws") }} + {{- if and .Values.distStorage.type (eq .Values.distStorage.type "aws") }} fs.s3a.access.key AWS access key ID. - {{ required "AWS access key required" .Values.uploads.aws.accessKey}} + {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} fs.s3a.secret.key AWS secret key. - {{ required "AWS secret required" .Values.uploads.aws.secret}} + {{ required "AWS secret required" .Values.distStorage.aws.secret}} {{- end }} - {{- if and .Values.uploads.type (eq .Values.uploads.type "azure") }} + {{- if and .Values.distStorage.type (eq .Values.distStorage.type "azure") }} @@ -26,17 +26,17 @@ dfs.adls.oauth2.client.id Application ID of the registered application under Azure Active Directory - {{required "Azure application ID required" .Values.uploads.azure.applicationId}} + {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} dfs.adls.oauth2.credential Generated password value for the registered application - {{required "Azure secret value required" .Values.uploads.azure.secret}} + {{required "Azure secret value required" .Values.distStorage.azure.secret}} dfs.adls.oauth2.refresh.url Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. - {{required "Azure OAuth2 token endpoint required" .Values.uploads.azure.oauth2EndPoint}} + {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} dfs.adls.oauth2.access.token.provider.type @@ -49,4 +49,32 @@ false {{- end }} + + {{- if and .Values.dremioVersion (ge .Values.dremioVersion "3.2.0") .Values.distStorage.type (eq .Values.distStorage.type "azureStorage") }} + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} + + + dremio.azure.key + The shared access key for the storage account. + {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} + + + dremio.azure.mode + The storage account type. Value: STORAGE_V2 + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. Value: True/False + True + + {{- end }} diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 4e754236..2b626a5c 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -22,12 +22,25 @@ paths: { #dist: "pdfs://"${paths.local}"/pdfs" # If you are editing the uploads value in this file, please delete all the lines starting with double curly braces - {{- if .Values.uploads.type }} - {{- if eq .Values.uploads.type "aws" }} - uploads: "s3a://{{required "AWS bucketname required" .Values.uploads.aws.bucketName}}{{required "Path required" .Values.uploads.aws.path}}" + {{- if .Values.distStorage.type }} + {{- if and .Values.dremioVersion (lt .Values.dremioVersion "3.2.0") }} + {{- if eq .Values.distStorage.type "aws" }} + uploads: "s3a://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" + {{- end }} + {{- if eq .Values.distStorage.type "azure" }} + uploads: "adl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" + {{- end }} + {{- else }} # dremio_version > 3.2.0 + {{- if eq .Values.distStorage.type "aws" }} + uploads: "dremioS3://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" + {{- end }} + {{- if eq .Values.distStorage.type "azure" }} + uploads: "dremioAdl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" + {{- end }} + {{- if eq .Values.distStorage.type "azureStorage" }} + uploads: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.uploadsPath}}" + accelerator: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.acceleratorPath}}" {{- end }} - {{- if eq .Values.uploads.type "azure" }} - uploads: "adl://{{required "Azure Datalake store name required" .Values.uploads.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.uploads.azure.path}}" {{- end }} {{- end }} } diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index 624a11bc..72e8bbf4 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -2,7 +2,6 @@ # version tag to the version that you are using. This will ensure that all # the pods are using the same version of the software. image: dremio/dremio-oss:latest - # Check out Dremio documentation for memory and cpu requirements for # the coordinators and the executors. # The value of memory should be in MB. CPU is in no of cores. @@ -60,17 +59,24 @@ serviceType: LoadBalancer # Control where uploaded files are stored. # See https://docs.dremio.com/deployment/distributed-storage.html for more information -uploads: - # Valid values are local, aws or azure. aws and azure choice requires additional configuration data. +dremioVersion: "3.2.0" # Dremio Version 3.2.0 or greater +distStorage: + # Valid values are local, aws, azure or azureStorage. aws and azure choice requires additional configuration data. type: "local" - aws: + aws: #S3 - used for only uploads bucketName: "Your_AWS_bucket_name" path: "/" accessKey: "Your_AWS_Access_Key" secret: "Your_AWS_Secret" - azure: + azure: #ADLS v1 - used for only uploads datalakeStoreName: "Your_Azure_DataLake_Storage_name" path: "/" applicationId: "Your_Azure_Application_Id" secret: "Your_Azure_Secret" oauth2EndPoint: "Azure_OAuth2_Endpoint" + azureStorage: #AzureStorage v2 - supported in Dremio version 3.2.0+ - used for uploads and accelerator + accountName: "Azure_storage_v2_account_name" + accessKey: "Access_key_for_the_storage_account" + filesystem: "Filesystem_in_storage_account" + uploadsPath: "Path_for_uploads" + acceleratorPath: "Path_for_accelerator" From 6ac0ad9da8b92ca6d71bde2adc663655e4f0c6b7 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Wed, 5 Jun 2019 15:37:26 -0700 Subject: [PATCH 15/31] DX-16710: Configure accelerator and uploads to Storage V2 Create a Azure Storage V2 account, create a filesystem and directories in there and configure dremio to use that for accelerator and uploads. Change-Id: Ic1ce87d3ed8f2bfc6ac5a8d822af75f742b61d1e --- azure/arm-templates/azuredeploy.json | 2 +- azure/arm-templates/nested/dremioCluster.json | 53 ++++++++- azure/arm-templates/nested/dremioState.json | 26 +---- azure/arm-templates/scripts/setupDremio.sh | 110 +++++++++++++++++- 4 files changed, 157 insertions(+), 34 deletions(-) diff --git a/azure/arm-templates/azuredeploy.json b/azure/arm-templates/azuredeploy.json index 5f353e3c..89593a09 100644 --- a/azure/arm-templates/azuredeploy.json +++ b/azure/arm-templates/azuredeploy.json @@ -170,7 +170,7 @@ "dataDiskId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.dataDiskId.value]"}, "sshUsername": {"value": "[parameters('SSHUsername')]"}, "sshPasswordOrKey": {"value": "[parameters('PasswordOrSSHPublicKey')]"}, - "subnetId": {"value": "[if(equals(trim(parameters('useExistingSubnet')), ''), reference(concat(variables('shortName'), '-state-deployment')).outputs.subnetId.value, parameters('useExistingSubnet'))]"}, + "subnetId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.subnetId.value]"}, "loadBalancerId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.loadBalancerId.value]"}, "nsgId": {"value": "[reference(concat(variables('shortName'), '-state-deployment')).outputs.nsgId.value]"}, "usePrivateIP": {"value": "[parameters('usePrivateIP')]"} diff --git a/azure/arm-templates/nested/dremioCluster.json b/azure/arm-templates/nested/dremioCluster.json index ff4291a6..18832f6d 100644 --- a/azure/arm-templates/nested/dremioCluster.json +++ b/azure/arm-templates/nested/dremioCluster.json @@ -144,10 +144,39 @@ "metadata": { "description": "Select to use the private ip address of the subnet for Dremio access." } + }, + "storageAccountName": { + "type": "string", + "defaultValue": "[concat('dremiometa',uniqueString(resourceGroup().id, deployment().name))]", + "metadata": { + "description": "Name of the storage account" + } + }, + "storageAccountType": { + "type": "string", + "defaultValue": "Standard_LRS", + "metadata": { + "description": "Storage account type" + } + }, + "storageKind": { + "type": "string", + "defaultValue": "StorageV2", + "metadata": { + "description": "Storage account kind" + } + }, + "storageAccessTier": { + "type": "string", + "defaultValue": "Hot", + "metadata": { + "description": "Storage access tier" + } } }, "variables": { "computeApiVersion": "2018-06-01", + "storageApiVersion": "2018-07-01", "location": "[resourceGroup().location]", "nicName": "[concat(parameters('dremioClusterName'), '-nic')]", "dremioImage": { @@ -183,6 +212,21 @@ "install": false }, "resources": [ + { + "name": "[parameters('storageAccountName')]", + "type": "Microsoft.Storage/storageAccounts", + "apiVersion": "[variables('storageApiVersion')]", + "location": "[variables('location')]", + "properties": { + "accessTier": "[parameters('storageAccessTier')]", + "supportsHttpsTrafficOnly": true, + "isHnsEnabled": true + }, + "sku": { + "name": "[parameters('storageAccountType')]" + }, + "kind": "[parameters('storageKind')]" + }, { "condition": "[and(not(parameters('usePrivateIP')), equals(parameters('publicIpNewOrExisting'), 'new'))]", "type": "Microsoft.Network/publicIPAddresses", @@ -232,7 +276,8 @@ "name": "[concat(parameters('dremioClusterName'), '-master')]", "location": "[variables('location')]", "dependsOn": [ - "[variables('nicName')]" + "[variables('nicName')]", + "[parameters('storageAccountName')]" ], "properties": { "hardwareProfile": { @@ -289,7 +334,7 @@ ] }, "protectedSettings": { - "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' master ')]" + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' master ', parameters('storageAccountName'), ' ', listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value)]" } } } @@ -366,7 +411,7 @@ "fileUris": [ "[variables('scriptURL')]" ], - "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' coordinator ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' coordinator ', parameters('storageAccountName'), ' ', listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value, ' ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" } } } @@ -441,7 +486,7 @@ "fileUris": [ "[variables('scriptURL')]" ], - "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' executor ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" + "commandToExecute": "[concat('DOWNLOAD_URL=\"', parameters('dremioDownloadURL'), '\" bash ', variables('scriptFileName'), ' executor ', parameters('storageAccountName'), ' ', listKeys(resourceId('Microsoft.Storage/storageAccounts', parameters('storageAccountName')), providers('Microsoft.Storage', 'storageAccounts').apiVersions[0]).keys[0].value, ' ', reference(concat(parameters('dremioClusterName'), '-nic'), variables('computeApiVersion')).ipConfigurations[0].properties.privateIPAddress)]" } } } diff --git a/azure/arm-templates/nested/dremioState.json b/azure/arm-templates/nested/dremioState.json index bed09915..47359950 100644 --- a/azure/arm-templates/nested/dremioState.json +++ b/azure/arm-templates/nested/dremioState.json @@ -23,27 +23,6 @@ "description": "Size of the Dremio Master Data Disk" } }, - "storageNewOrExisting": { - "type": "string", - "defaultValue": "new", - "metadata": { - "description": "Determines whether or not a new storage account should be provisioned." - } - }, - "storageAccountName": { - "type": "string", - "defaultValue": "[concat('storage', uniqueString(resourceGroup().id))]", - "metadata": { - "description": "Name of the storage account" - } - }, - "storageAccountType": { - "type": "string", - "defaultValue": "Standard_LRS", - "metadata": { - "description": "Storage account type" - } - }, "virtualNetworkNewOrExisting": { "type": "string", "defaultValue": "new", @@ -141,7 +120,6 @@ }, "variables": { "computeApiVersion": "2018-06-01", - "storageApiVersion": "2018-07-01", "location": "[resourceGroup().location]", "virtualNetworkName": "[concat(parameters('dremioClusterName'), '-vnet')]", "publicIpAddressId": "[resourceId(parameters('publicIpResourceGroupName'), 'Microsoft.Network/publicIPAddresses', parameters('publicIpName'))]", @@ -174,6 +152,8 @@ "id": "[parameters('existingSubnet')]" } }, + "networkAclsBypass": "AzureServices", + "networkAclsDefaultAction": "Deny", "install": false }, "resources": [ @@ -403,7 +383,7 @@ }, "subnetId": { "type": "string", - "value": "[if(equals(parameters('virtualNetworkNewOrExisting'), 'new'), resourceId(resourceGroup().name, 'Microsoft.Network/virtualNetworks/subnets/', variables('virtualNetworkName'), parameters('subnetName')), '')]" + "value": "[if(equals(parameters('virtualNetworkNewOrExisting'), 'new'), resourceId(resourceGroup().name, 'Microsoft.Network/virtualNetworks/subnets/', variables('virtualNetworkName'), parameters('subnetName')), parameters('existingSubnet'))]" }, "loadBalancerId": { "type": "string", diff --git a/azure/arm-templates/scripts/setupDremio.sh b/azure/arm-templates/scripts/setupDremio.sh index d4cb5f18..dc4fd877 100644 --- a/azure/arm-templates/scripts/setupDremio.sh +++ b/azure/arm-templates/scripts/setupDremio.sh @@ -3,9 +3,7 @@ [ -z $DOWNLOAD_URL ] && DOWNLOAD_URL=http://download.dremio.com/community-server/dremio-community-LATEST.noarch.rpm if [ ! -f /opt/dremio/bin/dremio ]; then command -v yum >/dev/null 2>&1 || { echo >&2 "This script works only on Centos or Red Hat. Aborting."; exit 1; } - yum install -y java-1.8.0-openjdk - wget $DOWNLOAD_URL -O dremio-download.rpm - yum -y localinstall dremio-download.rpm + yum install -y java-1.8.0-openjdk-devel $DOWNLOAD_URL fi service=$1 @@ -13,12 +11,20 @@ if [ -z "$service" ]; then echo "Require the service to start - master, coordinator or executor" exit 1 fi +storage_account=$2 +access_key=$3 + +if [ -n "$storage_account" -a -n "$access_key" ]; then + use_azure_storage=1 +fi # In Azure, /dev/sdb is ephemeral storage mapped to /mnt/resource. # Additional disks are mounted after that... DISK_NAME=/dev/sdc DISK_PART=${DISK_NAME}1 -DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf +DREMIO_HOME=/opt/dremio +DREMIO_CONFIG_DIR=/etc/dremio +DREMIO_CONFIG_FILE=$DREMIO_CONFIG_DIR/dremio.conf DREMIO_DATA_DIR=/var/lib/dremio # Azure Linux VMs have ephemeral/temporary disk # always mounted on /mnt/resource/dremio @@ -36,7 +42,11 @@ if [ "$service" == "master" ]; then chown dremio:dremio $DREMIO_DATA_DIR echo "$DISK_PART $DREMIO_DATA_DIR ext4 defaults 0 0" >> /etc/fstab else - zookeeper=$2 + if [ -n '$use_azure_storage' ]; then + zookeeper=$4 + else + zookeeper=$2 + fi if [ -z "$zookeeper" ]; then echo "Non-master node requires zookeeper host" exit 2 @@ -69,11 +79,18 @@ function upgrade_master { cd $DREMIO_DATA_DIR if [ -d db ]; then tar -zcvf dremio_db_$(date '+%Y-%m-%d_%H-%M').tar.gz db - /opt/dremio/bin/dremio-admin upgrade + sudo -u dremio /opt/dremio/bin/dremio-admin upgrade fi } function setup_master { + if [ -n '$use_azure_storage' ]; then + storage_create_action "dremiodata" filesystem && \ + storage_create_action "dremiodata/accelerator" directory && \ + storage_create_action "dremiodata/uploads" directory + fi + + configure_dremio_dist sed -i "s/executor.enabled: true/executor.enabled: false/" $DREMIO_CONFIG_FILE upgrade_master } @@ -81,6 +98,7 @@ function setup_master { function setup_coordinator { yum install -y nc until nc -z $zookeeper 9047 > /dev/null; do echo waiting for dremio master; sleep 2; done; + configure_dremio_dist sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ s/executor.enabled: true/executor.enabled: false/" \ $DREMIO_CONFIG_FILE @@ -88,6 +106,7 @@ function setup_coordinator { } function setup_executor { + configure_dremio_dist setup_spill sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ s/coordinator.enabled: true/coordinator.enabled: false/; \ @@ -96,6 +115,85 @@ function setup_executor { echo "zookeeper: \"$zookeeper:2181\"" >> $DREMIO_CONFIG_FILE } +function storage_create_action { + resource=$1 + resource_type=$2 + blob_store_url="dfs.core.windows.net" + authorization="SharedKey" + request_method="PUT" + request_date=$(TZ=GMT date "+%a, %d %h %Y %H:%M:%S %Z") + storage_service_version="2018-11-09" + # HTTP Request headers + x_ms_date_h="x-ms-date:$request_date" + x_ms_version_h="x-ms-version:$storage_service_version" + content_length_h="Content-Length: 0" + # Build the signature string + canonicalized_headers="${x_ms_date_h}\n${x_ms_version_h}" + canonicalized_resource="/${storage_account}/${resource}\nresource:${resource_type}" + string_to_sign="${request_method}\n\n\n\n\n\n\n\n\n\n\n\n${canonicalized_headers}\n${canonicalized_resource}" + # Decode the Base64 encoded access key, convert to Hex. + decoded_hex_key="$(echo -n $access_key | base64 -d -w0 | xxd -p -c256)" + # Create the HMAC signature for the Authorization header + signature=$(printf "$string_to_sign" | openssl dgst -sha256 -mac HMAC -macopt "hexkey:$decoded_hex_key" -binary | base64 -w0) + authorization_header="Authorization: $authorization $storage_account:$signature" + curl \ + -X $request_method \ + -H "$content_length_h" \ + -H "$x_ms_date_h" \ + -H "$x_ms_version_h" \ + -H "$authorization_header" \ + "https://${storage_account}.${blob_store_url}/${resource}?resource=${resource_type}" + return $? +} + +function write_coresite_xml { +cat > $DREMIO_CONFIG_DIR/core-site.xml < + + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + $storage_account + + + dremio.azure.key + The shared access key for the storage account. + $access_key + + + dremio.azure.mode + The storage account type. Value: STORAGE_V2 + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. Value: True/False + True + + +EOF +} + +function update_dremio_config { +cat >> $DREMIO_CONFIG_FILE < Date: Mon, 10 Jun 2019 14:21:04 -0700 Subject: [PATCH 16/31] DX-16819: Add TLS support in Helm chart. Change-Id: I17e70a423356849c0d2368648a72241b97d5afeb --- charts/dremio/config/dremio.conf | 15 ++++ .../dremio/templates/dremio-coordinator.yaml | 72 +++++++++++++++++ charts/dremio/templates/dremio-master.yaml | 78 ++++++++++++++++++- charts/dremio/values.yaml | 15 ++++ 4 files changed, 179 insertions(+), 1 deletion(-) diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 2b626a5c..12252069 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -55,3 +55,18 @@ services: { # # Other service parameters can be customized via this file. } + +{{- if .Values.tls.ui.enabled }} +services.coordinator.web.ssl.enabled: true +services.coordinator.web.ssl.auto-certificate.enabled: false + +services.coordinator.web.ssl.keyStore: "/opt/dremio/tls/ui.pkcs12" +{{- end }} + +{{- if .Values.tls.client.enabled }} +# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in Dremio Enterprise Edition. +services.coordinator.client-endpoint.ssl.enabled: true +services.coordinator.client-endpoint.ssl.auto-certificate.enabled: false + +services.coordinator.client-endpoint.ssl.keyStore: "/opt/dremio/tls/client.pkcs12" +{{- end }} \ No newline at end of file diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index 3bffa687..86d5f2ac 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -37,6 +37,10 @@ spec: volumeMounts: - name: dremio-config mountPath: /opt/dremio/conf + {{- if or .Values.tls.ui.enabled .Values.tls.client.enabled }} + - name: dremio-tls + mountPath: /opt/dremio/tls + {{- end }} env: - name: DREMIO_MAX_HEAP_MEMORY_SIZE_MB value: "{{ template "HeapMemory" .Values.coordinator.memory }}" @@ -64,10 +68,78 @@ spec: - name: wait-for-zk image: busybox command: ["sh", "-c", "until nc -z dremio-client {{ .Values.coordinator.web.port | default 9047 }} > /dev/null; do echo waiting for dremio master; sleep 2; done;"] + {{- if .Values.tls.ui.enabled }} + - name: generate-ui-keystore + image: {{.Values.image}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-ui + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: + - "pkcs12" + - "-export" + - "-inkey" + - "/dremio-tls-secret/tls.key" + - "-in" + - "/dremio-tls-secret/tls.crt" + - "-out" + - "/opt/dremio/tls/ui.pkcs12" + - "-passout" + - "pass:" + {{- end }} + {{- if .Values.tls.client.enabled }} + - name: generate-client-keystore + image: {{.Values.image}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-client + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: + - "pkcs12" + - "-export" + - "-inkey" + - "/dremio-tls-secret/tls.key" + - "-in" + - "/dremio-tls-secret/tls.crt" + - "-out" + - "/opt/dremio/tls/client.pkcs12" + - "-passout" + - "pass:" + {{- end }} volumes: - name: dremio-config configMap: name: dremio-config + {{- if or .Values.tls.ui.enabled .Values.tls.client.enabled }} + - name: dremio-tls + emptyDir: {} + {{- end }} + {{- if .Values.tls.ui.enabled }} + - name: dremio-tls-secret-ui + secret: + secretName: {{ .Values.tls.ui.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- if .Values.tls.client.enabled }} + - name: dremio-tls-secret-client + secret: + secretName: {{ .Values.tls.client.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} {{- if .Values.imagePullSecrets }} imagePullSecrets: - name: {{ .Values.imagePullSecrets }} diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index 423ce78f..f272eb32 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -47,6 +47,10 @@ spec: mountPath: /opt/dremio/data - name: dremio-config mountPath: /opt/dremio/conf + {{- if or .Values.tls.ui.enabled .Values.tls.client.enabled }} + - name: dremio-tls + mountPath: /opt/dremio/tls + {{- end }} env: - name: DREMIO_MAX_HEAP_MEMORY_SIZE_MB value: "{{ template "HeapMemory" .Values.coordinator.memory }}" @@ -71,7 +75,11 @@ spec: - containerPort: 45678 name: server readinessProbe: - tcpSocket: + httpGet: + path: / + {{- if .Values.tls.ui.enabled }} + scheme: HTTPS + {{- end }} port: 9047 initialDelaySeconds: 5 periodSeconds: 5 @@ -104,10 +112,78 @@ spec: command: ["/opt/dremio/bin/dremio-admin"] args: - "upgrade" + {{- if .Values.tls.ui.enabled }} + - name: generate-ui-keystore + image: {{.Values.image}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-ui + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: + - "pkcs12" + - "-export" + - "-inkey" + - "/dremio-tls-secret/tls.key" + - "-in" + - "/dremio-tls-secret/tls.crt" + - "-out" + - "/opt/dremio/tls/ui.pkcs12" + - "-passout" + - "pass:" + {{- end }} + {{- if .Values.tls.client.enabled }} + - name: generate-client-keystore + image: {{.Values.image}} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-client + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: + - "pkcs12" + - "-export" + - "-inkey" + - "/dremio-tls-secret/tls.key" + - "-in" + - "/dremio-tls-secret/tls.crt" + - "-out" + - "/opt/dremio/tls/client.pkcs12" + - "-passout" + - "pass:" + {{- end }} volumes: - name: dremio-config configMap: name: dremio-config + {{- if or .Values.tls.ui.enabled .Values.tls.client.enabled }} + - name: dremio-tls + emptyDir: {} + {{- end }} + {{- if .Values.tls.ui.enabled }} + - name: dremio-tls-secret-ui + secret: + secretName: {{ .Values.tls.ui.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- if .Values.tls.client.enabled }} + - name: dremio-tls-secret-client + secret: + secretName: {{ .Values.tls.client.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} {{- if .Values.imagePullSecrets }} imagePullSecrets: - name: {{ .Values.imagePullSecrets }} diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index 72e8bbf4..2601ebba 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -28,6 +28,21 @@ zookeeper: count: 3 volumeSize: 10Gi +# To create a TLS secret, use the following command: +# kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE} +tls: + ui: + # To enable TLS for the web UI, set the enabled flag to true and provide + # the appropriate Kubernetes TLS secret. + enabled: false + secret: dremio-tls-secret-ui + client: + # To enable TLS for the client endpoints, set the enabled flag to true and provide + # the appropriate Kubernetes TLS secret. Client endpoint encryption is available only on + # Dremio Enterprise Edition and should not be enabled otherwise. + enabled: false + secret: dremio-tls-secret-client + # If your Kubernetes cluster does not support LoadBalancer, # comment out the line below for the helm chart to succeed or add # the correct serviceType for your cluster. From 6519a4f368556b67cbf3d3eeeafa94063540ee61 Mon Sep 17 00:00:00 2001 From: Nirmalya Sen Date: Thu, 11 Jul 2019 10:31:50 -0700 Subject: [PATCH 17/31] DX-17385: Dockerfile to build dremio/cloud-tools The image includes useful cloud tools. See the Dockerfile for the list of tools/clients installed. Change-Id: I6edd554ffc37b353813b8c351a9d505332267930 --- utils/Dockerfile | 38 ++++++++++++++++++++++++++++++++++++++ utils/helm-init.sh | 14 ++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 utils/Dockerfile create mode 100755 utils/helm-init.sh diff --git a/utils/Dockerfile b/utils/Dockerfile new file mode 100644 index 00000000..da700d1d --- /dev/null +++ b/utils/Dockerfile @@ -0,0 +1,38 @@ +################################################################ +# Image with useful cloud tools installed: +# - aws cli +# - eksctl +# - azure cli +# - gcloud +# - kubectl +# - helm +# - git +# +# An image built with this is published in Dockerhub as +# dremio/cloud-tools +# +################################################################ +FROM centos + +ADD helm-init.sh /usr/local/bin + +RUN \ + yum install -y epel-release && \ + yum install -y which openssl git python-pip && \ + pip install --upgrade pip && \ + pip install awscli && \ + curl --silent --location "https://github.com/weaveworks/eksctl/releases/download/latest_release/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp && \ + mv /tmp/eksctl /usr/local/bin && \ + curl -s -o aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.13.7/2019-06-11/bin/linux/amd64/aws-iam-authenticator && \ + chmod +x ./aws-iam-authenticator && \ + mv aws-iam-authenticator /usr/local/bin && \ + curl -s -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl && \ + chmod +x kubectl && \ + mv kubectl /usr/local/bin && \ + curl -s -L https://git.io/get_helm.sh | bash && \ + rpm --import https://packages.microsoft.com/keys/microsoft.asc && \ + sh -c 'echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" > /etc/yum.repos.d/azure-cli.repo' && \ + yum install -y azure-cli && \ + curl https://sdk.cloud.google.com | bash + +WORKDIR /root diff --git a/utils/helm-init.sh b/utils/helm-init.sh new file mode 100755 index 00000000..4029ff5d --- /dev/null +++ b/utils/helm-init.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +if ! command -v helm 2>&1 > /dev/null; then + echo "Helm not found. Installing helm..." + curl -L https://git.io/get_helm.sh | bash + if ! command -v helm 2>&1 > /dev/null; then + echo "Failed installation of Helm. Please check the script and debug. " + exit 1 + fi + echo "Helm successfully installed on your machine." +fi +kubectl create serviceaccount -n kube-system tiller +kubectl create clusterrolebinding tiller-binding --clusterrole=cluster-admin --serviceaccount kube-system:tiller +helm init --service-account tiller --wait From 0ed2e686ff728a07bca0973cab6092a40985e2b0 Mon Sep 17 00:00:00 2001 From: Mikhail Stolpner Date: Thu, 8 Aug 2019 17:23:24 -0700 Subject: [PATCH 18/31] DX-18003: Fixed CloudFormation template for security group ingress. Change-Id: I90e2515ebc230fa8da5940498516f9ae313a019e --- aws/cloudformation/dremio_cf.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/aws/cloudformation/dremio_cf.yaml b/aws/cloudformation/dremio_cf.yaml index f7b1d21c..9b0e15c0 100644 --- a/aws/cloudformation/dremio_cf.yaml +++ b/aws/cloudformation/dremio_cf.yaml @@ -217,8 +217,6 @@ Resources: FromPort: '80' ToPort: '80' CidrIp: 0.0.0.0/0 - - IpProtocol: -1 - SourceSecurityGroupName: "Dremio Access" AvailabilityWaitHandle: Type: AWS::CloudFormation::WaitConditionHandle AvailabilityWaitCondition: @@ -228,6 +226,13 @@ Resources: Handle: !Ref "AvailabilityWaitHandle" Timeout: "600" + DremioSecurityGroupSelfIngress: + Type: AWS::EC2::SecurityGroupIngress + Properties: + GroupId: !Ref DremioSecurityGroup + IpProtocol: -1 + SourceSecurityGroupId: !Ref DremioSecurityGroup + DremioMaster: Type: AWS::EC2::Instance Properties: From 228380acfa54ec3177431ae2f77035fde1c953da Mon Sep 17 00:00:00 2001 From: J C Lawrence Date: Wed, 18 Sep 2019 15:46:48 -0700 Subject: [PATCH 19/31] DX-18724: Scale memory sizes with instance sizes Change-Id: If8b01fcc3b7d71563a07f8115ce0c4ae03bd3f9a --- aws/cloudformation/dremio_cf.yaml | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/aws/cloudformation/dremio_cf.yaml b/aws/cloudformation/dremio_cf.yaml index 9b0e15c0..0e7d3e1c 100644 --- a/aws/cloudformation/dremio_cf.yaml +++ b/aws/cloudformation/dremio_cf.yaml @@ -66,33 +66,43 @@ Mappings: X-Small--1-executor: coordinatorInstanceType: m5.2xlarge coordinatorDiskSize: 10 + coordinatorMaxMemory: 28672 executorInstanceType: r5d.4xlarge executorCount: 1 executorDiskSize: 10 + executorMaxMemory: 122880 Small--5-executors: coordinatorInstanceType: m5.2xlarge coordinatorDiskSize: 50 + coordinatorMaxMemory: 28672 executorInstanceType: r5d.4xlarge executorCount: 5 executorDiskSize: 50 + executorMaxMemory: 122880 Medium--10-executors: coordinatorInstanceType: m5.4xlarge coordinatorDiskSize: 100 + coordinatorMaxMemory: 61440 executorInstanceType: r5d.4xlarge executorCount: 10 executorDiskSize: 100 + executorMaxMemory: 122880 Large--25-executors: coordinatorInstanceType: m5.4xlarge coordinatorDiskSize: 100 + coordinatorMaxMemory: 61440 executorInstanceType: r5d.4xlarge executorCount: 25 executorDiskSize: 100 + executorMaxMemory: 122880 X-Large--50-executors: coordinatorInstanceType: m5.4xlarge coordinatorDiskSize: 100 + coordinatorMaxMemory: 61440 executorInstanceType: r5d.4xlarge executorCount: 50 executorDiskSize: 100 + executorMaxMemory: 122880 RegionMap: # Centos 7 Images us-east-1: # N Virginia @@ -273,8 +283,12 @@ Resources: DREMIO_HOME=/opt/dremio DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf + DREMIO_ENV_FILE=/etc/dremio/dremio-env - sed -i "s/executor.enabled: true/executor.enabled: false/" $DREMIO_CONFIG_FILE + sed -i -e "s/executor.enabled: true/executor.enabled: false/" \ + $DREMIO_CONFIG_FILE + sed -i -e "s/#DREMIO_MAX_MEMORY_SIZE_MB=/DREMIO_MAX_MEMORY_SIZE_MB=${MEMORY_SIZE}/" \ + $DREMIO_ENV_FILE cp $DREMIO_HOME/share/dremio/dremio.service /etc/systemd/system systemctl daemon-reload @@ -288,6 +302,7 @@ Resources: echo "{ \"Status\" : \"SUCCESS\", \"UniqueId\" : \"${AWS::StackName}\", \"Data\" : \"Ready\", \"Reason\" : \"Website Available\" }" > $statusFile curl -T $statusFile '${AvailabilityWaitHandle}' - DOWNLOAD_URL: !FindInMap [ Custom, Variables, "URL"] + MEMORY_SIZE: !FindInMap [ClusterSizes, !Ref clusterSize, coordinatorMaxMemory] DremioExecutorLC: Type: AWS::AutoScaling::LaunchConfiguration @@ -332,19 +347,23 @@ Resources: SPILL_DIR=/var/ephemeral/dremio_spill DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf + DREMIO_ENV_FILE=/etc/dremio/dremio-env - sed -i "s/coordinator.master.enabled: true/coordinator.master.enabled: false/; \ - s/coordinator.enabled: true/coordinator.enabled: false/; \ - /local:/a \ \ spilling: [\"$SPILL_DIR\"]" \ - $DREMIO_CONFIG_FILE + sed -i -e "s/coordinator.master.enabled: true/coordinator.master.enabled: false/" \ + -e "s/coordinator.enabled: true/coordinator.enabled: false/" \ + -e "/local:/a \ \ spilling: [\"$SPILL_DIR\"]" \ + $DREMIO_CONFIG_FILE + sed -i -e "s/#DREMIO_MAX_MEMORY_SIZE_MB=/DREMIO_MAX_MEMORY_SIZE_MB=${MEMORY_SIZE}/" \ + $DREMIO_ENV_FILE echo "zookeeper: \"${ZK}:2181\"" >> $DREMIO_CONFIG_FILE + cp $DREMIO_HOME/share/dremio/dremio.service /etc/systemd/system systemctl daemon-reload systemctl start dremio systemctl enable dremio - - ZK: !GetAtt DremioMaster.PrivateIp DOWNLOAD_URL: !FindInMap [ Custom, Variables, "URL"] + MEMORY_SIZE: !FindInMap [ClusterSizes, !Ref clusterSize, executorMaxMemory] DremioExecutorASG: Type: AWS::AutoScaling::AutoScalingGroup From fd8e42534d77847f2e5f858953bf17465760fd73 Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Wed, 18 Sep 2019 23:17:17 -0700 Subject: [PATCH 20/31] DX-18733: Enable C3 on Dremio executors for CFTs. - Additionally fixes deploying multiple instances of the CFT in the same subnet by removing the GroupName and allowing AWS to dynamically generate one since GroupName(s) must be unique in a subnet. Change-Id: I1fb6101528b857a40e09f41aebd11a9843ecb257 --- aws/cloudformation/dremio_cf.yaml | 37 ++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/aws/cloudformation/dremio_cf.yaml b/aws/cloudformation/dremio_cf.yaml index 0e7d3e1c..f30be27b 100644 --- a/aws/cloudformation/dremio_cf.yaml +++ b/aws/cloudformation/dremio_cf.yaml @@ -207,7 +207,6 @@ Resources: DremioSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: - GroupName: "Dremio Access" GroupDescription: "Dremio Access" VpcId: !If [CreateVPC, !Ref VPC, !Ref useVPC] SecurityGroupIngress: @@ -336,16 +335,28 @@ Resources: yum -y install java-1.8.0-openjdk-devel $url fi - mkdir /var/ephemeral - # Setup ephemeral disk - this is based on executors are r5d class machines - NVME=nvme1n1 - file -s /dev/$NVME | grep "/dev/$NVME: data" && mkfs -t xfs /dev/$NVME && \ - UUID=$(blkid | grep $NVME | awk -F'"' '{ print $2 }') && \ - echo "UUID=$UUID /var/ephemeral xfs defaults,nofail 0 2" >> /etc/fstab && \ + # Setup ephemeral disk for spill - this is based on executors are r5d class machines + SPILL_DIR=/var/ephemeral/spill + NVME_SPILL=nvme1n1 + + mkdir -p $SPILL_DIR + file -s /dev/$NVME_SPILL | grep "/dev/$NVME_SPILL: data" && mkfs -t xfs /dev/$NVME_SPILL && \ + UUID=$(blkid | grep $NVME_SPILL | awk -F'"' '{ print $2 }') && \ + echo "UUID=$UUID $SPILL_DIR xfs defaults,nofail 0 2" >> /etc/fstab && \ + mount -a + chown dremio:dremio $SPILL_DIR + + # Setup ephemeral disk for C3 - this is based on executors are r5d class machines + CLOUDCACHE_DIR=/var/ephemeral/cloudcache + NVME_CLOUDCACHE=nvme2n1 + + mkdir -p $CLOUDCACHE_DIR + file -s /dev/$NVME_CLOUDCACHE | grep "/dev/$NVME_CLOUDCACHE: data" && mkfs -t xfs /dev/$NVME_CLOUDCACHE && \ + UUID=$(blkid | grep $NVME_CLOUDCACHE | awk -F'"' '{ print $2 }') && \ + echo "UUID=$UUID $CLOUDCACHE_DIR xfs defaults,nofail 0 2" >> /etc/fstab && \ mount -a - chmod 777 /var/ephemeral + chown dremio:dremio $CLOUDCACHE_DIR - SPILL_DIR=/var/ephemeral/dremio_spill DREMIO_CONFIG_FILE=/etc/dremio/dremio.conf DREMIO_ENV_FILE=/etc/dremio/dremio-env @@ -355,7 +366,13 @@ Resources: $DREMIO_CONFIG_FILE sed -i -e "s/#DREMIO_MAX_MEMORY_SIZE_MB=/DREMIO_MAX_MEMORY_SIZE_MB=${MEMORY_SIZE}/" \ $DREMIO_ENV_FILE - echo "zookeeper: \"${ZK}:2181\"" >> $DREMIO_CONFIG_FILE + cat <> $DREMIO_CONFIG_FILE + zookeeper: "${ZK}:2181" + services.executor.cache.path.db: "$CLOUDCACHE_DIR" + services.executor.cache.path.fs: ["$CLOUDCACHE_DIR"] + services.executor.cache.pctquota.db: 10 + services.executor.cache.pctquota.fs: [100] + EOF cp $DREMIO_HOME/share/dremio/dremio.service /etc/systemd/system systemctl daemon-reload From d199978f1eaf648ec014e5e171e481698b6bb554 Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Thu, 19 Sep 2019 01:03:37 -0700 Subject: [PATCH 21/31] Add Helm C3 executor and dist store caching - Dremio 4.0.0 or later required. - Adds the concept of an imageTag to expose features that are introduced only in newer versions of Dremio. - Removes the dremioVersion value that needs to be manually set to reference the same version that is used by the image. - Adds optional Cloud Cache support. Dist is split between PDFS and cloud storage. Change-Id: Idccb0acbd82a8f3ed50b249599c0b04e93641406 --- charts/dremio/Chart.yaml | 2 +- charts/dremio/README.md | 168 +++++++++++++----- charts/dremio/config/core-site.xml | 163 +++++++++-------- charts/dremio/config/dremio.conf | 68 ++++--- charts/dremio/templates/dremio-admin.yaml | 2 +- .../dremio/templates/dremio-coordinator.yaml | 6 +- charts/dremio/templates/dremio-executor.yaml | 7 +- charts/dremio/templates/dremio-master.yaml | 10 +- charts/dremio/values.yaml | 51 ++++-- 9 files changed, 298 insertions(+), 179 deletions(-) diff --git a/charts/dremio/Chart.yaml b/charts/dremio/Chart.yaml index 0457134e..7a40ce28 100644 --- a/charts/dremio/Chart.yaml +++ b/charts/dremio/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v1" name: "dremio" -version: "0.0.7" +version: "0.1.0" keywords: - dremio - data diff --git a/charts/dremio/README.md b/charts/dremio/README.md index 6d8ecd0e..8bab30c8 100644 --- a/charts/dremio/README.md +++ b/charts/dremio/README.md @@ -2,7 +2,10 @@ ## Overview -This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses a persistent volume for the master node to store the metadata for the cluster. The default configuration uses the default persistent storage supported by the kubernetes platform. For example, +This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses +a persistent volume for the master node to store the metadata for the +cluster. The default configuration uses the default persistent storage +supported by the kubernetes platform. For example, | Kubernetes platform | Persistent store | |---------------------|------------------| @@ -11,26 +14,48 @@ This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses a persist | Google GKE | Persistent Disk | | Local K8S on Docker | Hostpath | -If you want to use a different storage class available in your kubernetes environment, add the storageClass in values.yaml. - -An appropriate distributed file store (S3, ADLS, HDFS, etc) should be used for paths.dist as this deployment will lose locally persisted reflections and uploads. You can update config/dremio.conf. Dremio [documentation](https://docs.dremio.com/deployment/distributed-storage.html) provides more information on this. - -This assumes you already have kubernetes cluster setup, kubectl configured to talk to your kubernetes cluster and helm setup in your cluster. Review and update values.yaml to reflect values for your environment before installing the helm chart. This is specially important for for the memory and cpu values - your kubernetes cluster should have sufficient resources to provision the pods with those values. If your kubernetes installation does not support serviceType LoadBalancer, it is recommended to comment the serviceType value in values.yaml file before deploying. +If you want to use a different storage class available in your +kubernetes environment, add the storageClass in values.yaml. + +An appropriate distributed file store (S3, ADLS, HDFS, etc) should be +used for paths.dist as this deployment will lose locally persisted +reflections and uploads. You can update config/dremio.conf. Dremio +[documentation](https://docs.dremio.com/deployment/distributed-storage.html) +provides more information on this. + +This assumes you already have kubernetes cluster setup, kubectl +configured to talk to your kubernetes cluster and helm setup in your +cluster. Review and update values.yaml to reflect values for your +environment before installing the helm chart. This is specially +important for for the memory and cpu values - your kubernetes cluster +should have sufficient resources to provision the pods with those +values. If your kubernetes installation does not support serviceType +LoadBalancer, it is recommended to comment the serviceType value in +values.yaml file before deploying. #### Installing the helm chart -Review charts/dremio/values.yaml and adjust the values as per your requirements. Note that the values for cpu and memory for the coordinator and the executors are set to work with AKS on Azure with worker nodes setup with machine types Standard_E16s_v3. + +Review charts/dremio/values.yaml and adjust the values as per your +requirements. Note that the values for cpu and memory for the +coordinator and the executors are set to work with AKS on Azure with +worker nodes setup with machine types Standard_E16s_v3. Run this from the charts directory + ```bash -cd charts -helm install --wait dremio -``` -If it takes longer than a couple of minutes to complete, check the status of the pods to see where they are waiting. If they are pending scheduling due to limited memory or cpu, either adjust the values in values.yaml and restart the process or add more resources to your kubernetes cluster. +cd charts helm install --wait dremio ``` + +If it takes longer than a couple of minutes to complete, check the +status of the pods to see where they are waiting. If they are pending +scheduling due to limited memory or cpu, either adjust the values in +values.yaml and restart the process or add more resources to your +kubernetes cluster. #### Connect to the Dremio UI -If your kubernetes supports serviceType LoadBalancer, you can get to the Dremio UI on the load balancer external ip. -For example, if your service output is: +If your kubernetes supports serviceType LoadBalancer, you can get to +the Dremio UI on the load balancer external IP. For example, if your +service output is: ```bash kubectl get services dremio-client @@ -38,24 +63,32 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -you can get to the Dremio UI using the value under column EXTERNAL-IP: +You can get to the Dremio UI using the value under column EXTERNAL-IP: http://35.226.31.211:9047 -If your kubernetes does not have support of serviceType LoadBalancer, you can access the Dremio UI on the port exposed on the node. For example, if the service output is: +If your kubernetes does not have support of serviceType LoadBalancer, +you can access the Dremio UI on the port exposed on the node. For +example, if the service output is: ```bash kubectl get services dremio-client NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE dremio-client NodePort 10.110.65.97 31010:32390/TCP,9047:30670/TCP 1h ``` -where there is no external ip and the Dremio master is running on node "localhost", you can get to Dremio UI using: -http://localhost:30670 +Where there is no external IP and the Dremio master is running on node +"localhost", you can get to Dremio UI using: +http://localhost:30670 #### Dremio Client Port -The port 31010 is used for ODBC and JDBC connections. You can look up service dremio-client in kubernetes to find the host to use for ODBC or JDBC connections. Depending on your kubernetes cluster supporting serviceType LoadBalancer, you will use the load balancer external-ip or the node on which a coordinator is running. + +The port 31010 is used for ODBC and JDBC connections. You can look up +service dremio-client in kubernetes to find the host to use for ODBC +or JDBC connections. Depending on your kubernetes cluster supporting +serviceType LoadBalancer, you will use the load balancer external-ip +or the node on which a coordinator is running. ```bash kubectl get services dremio-client @@ -63,32 +96,37 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -For example, in the above output, the service is exposed on an external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC connections. +For example, in the above output, the service is exposed on an +external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC +connections. #### Viewing logs -Logs are written to the container's console. All the logs - server.log, server.out, server.gc and access.log - are written into the console simultaneously. You can view the logs using kubectl. -``` -kubectl logs -``` -You can also tail the logs using the -f parameter. -``` -kubectl logs -f -``` + +Logs are written to the container's console. All the logs - +server.log, server.out, server.gc and access.log - are written into +the console simultaneously. You can view the logs using kubectl. ``` +kubectl logs ``` You can also tail the logs using the +-f parameter. ``` kubectl logs -f ``` #### Scale by adding additional Coordinators or Executors (optional) -Get the name of the helm release. In the example below, the release name is plundering-alpaca. + +Get the name of the helm release. In the example below, the release +name is plundering-alpaca: + ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 default ``` -Add additional coordinators +Add additional coordinators: + ```bash helm upgrade dremio --set coordinator.count=3 ``` -Add additional executors +Add additional executors: + ```bash helm upgrade dremio --set executor.count=5 ``` @@ -96,33 +134,44 @@ helm upgrade dremio --set executor.count=5 You can also scale down the same way. ### Running offline dremio-admin commands -Administration commands restore, cleanup and set-password in dremio-admin needs to be run when -the Dremio cluster is not running. So, before running these commands, you need to shutdown -the Dremio cluster. Use the helm delete command to delete the helm release. -(Kubernetes does not delete the persistent store volumes when you delete statefulset pods and -when you install the cluster again using helm, the existing persistent store will be used and -you will get your Dremio cluster running again.) - -After Dremio cluster is shutdown, start the dremio-admin pod using + +Administration commands restore, cleanup and set-password in +dremio-admin needs to be run when the Dremio cluster is not +running. So, before running these commands, you need to shutdown the +Dremio cluster. Use the helm delete command to delete the helm +release. (Kubernetes does not delete the persistent store volumes +when you delete statefulset pods and when you install the cluster +again using helm, the existing persistent store will be used and you +will get your Dremio cluster running again.) + +After Dremio cluster is shutdown, start the dremio-admin pod using: + ```bash helm install --wait dremio --set DremioAdmin=true ``` -Once the pod is running, you can connect to the pod using +Once the pod is running, you can connect to the pod using: + ```bash kubectl exec -it dremio-admin -- bash ``` Now, you have a bash shell from where you can run the dremio-admin commands. -Once you are done, you can delete the helm release for the dremio-admin and start your Dremio cluster. +Once you are done, you can delete the helm release for the +dremio-admin and start your Dremio cluster. #### Upgrading Dremio -You should attempt upgrade when no queries are running on the cluster. Update the Dremio image tag in your values.yaml file. E.g. + +You should attempt upgrade when no queries are running on the +cluster. Update the Dremio image tag in your values.yaml file. E.g: + ```bash image: dremio/dremio-oss:3.0.0 ... ``` -Get the name of the helm release. In the example below, the release name is plundering-alpaca. +Get the name of the helm release. In the example below, the release +name is plundering-alpaca. + ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE @@ -130,20 +179,45 @@ plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 defaul ``` Upgrade the deployment via helm upgrade command: + ``` helm upgrade . ``` -Existing pods will be terminated and new pods will be created with the new image. You can +Existing pods will be terminated and new pods will be created with the +new image. You can + monitor the status of the pods by running: ``` kubectl get pods ``` -Once all the pods are restarted and running, your Dremio cluster is upgraded. +Once all the pods are restarted and running, your Dremio cluster is +upgraded. #### Customizing Dremio configuration -Dremio configuration files used by the deployment are in the config directory. These files are propagated to all the pods in the cluster. Updating the configuration and upgrading the helm release - just like doing an upgrade - would refresh all the pods with the new configuration. [Dremio documentation](https://docs.dremio.com/deployment/README-config.html) covers the configuration capabilities in Dremio. - -If you need to add a core-site.xml, you can add the file to the config directory and it will be propagated to all the pods on install or upgrade of the deployment. +Dremio configuration files used by the deployment are in the config +directory. These files are propagated to all the pods in the +cluster. Updating the configuration and upgrading the helm release - +just like doing an upgrade - would refresh all the pods with the new +configuration. [Dremio +documentation](https://docs.dremio.com/deployment/README-config.html) +covers the configuration capabilities in Dremio. + +If you need to add a core-site.xml, you can add the file to the config +directory and it will be propagated to all the pods on install or +upgrade of the deployment. + +#### Important Changes + +2019-09-19 (v0.1.0): BREAKING CHANGE. + + Dremio versions before 4.0.0 are no longer supported by this Helm + chart. Dremio image specifier was split into an imageName and + imageTag parts to follow best practices. "dist" value in + dremio.conf moved to cloud storage where possible (otherwise + defaults to pdfs) -- this will lose any previously extant + reflections materialisations, user uploads, scratch files, etc. + Also added Cloud Cache support (new in Dremio 4.0). Please see + values.yaml for details on this new configuration. diff --git a/charts/dremio/config/core-site.xml b/charts/dremio/config/core-site.xml index 8d91d757..3283eb32 100644 --- a/charts/dremio/config/core-site.xml +++ b/charts/dremio/config/core-site.xml @@ -1,80 +1,93 @@ +{{- if and .Values.distStorage.type (ne .Values.distStorage.type "local") }} - {{- if and .Values.distStorage.type (eq .Values.distStorage.type "aws") }} - - fs.s3a.access.key - AWS access key ID. - {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} - - - fs.s3a.secret.key - AWS secret key. - {{ required "AWS secret required" .Values.distStorage.aws.secret}} - - {{- end }} - - {{- if and .Values.distStorage.type (eq .Values.distStorage.type "azure") }} - - - fs.adl.impl - Must be set to org.apache.hadoop.fs.adl.AdlFileSystem - org.apache.hadoop.fs.adl.AdlFileSystem - - - dfs.adls.oauth2.client.id - Application ID of the registered application under Azure Active Directory - {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} - - - dfs.adls.oauth2.credential - Generated password value for the registered application - {{required "Azure secret value required" .Values.distStorage.azure.secret}} - - - dfs.adls.oauth2.refresh.url - Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. - {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} - - - dfs.adls.oauth2.access.token.provider.type - Must be set to ClientCredential - ClientCredential - - - fs.adl.impl.disable.cache - Only include this property AFTER validating the ADLS connection. - false - - {{- end }} - - {{- if and .Values.dremioVersion (ge .Values.dremioVersion "3.2.0") .Values.distStorage.type (eq .Values.distStorage.type "azureStorage") }} - - fs.dremioAzureStorage.impl - FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem - com.dremio.plugins.azure.AzureStorageFileSystem - - - dremio.azure.account - The name of the storage account. - {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} - - - dremio.azure.key - The shared access key for the storage account. - {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} - - - dremio.azure.mode - The storage account type. Value: STORAGE_V2 - STORAGE_V2 - - - dremio.azure.secure - Boolean option to enable SSL connections. Value: True/False - True - - {{- end }} + {{- if eq .Values.distStorage.type "aws" }} + + fs.dremioS3.impl + The FileSystem implementation. Must be set to com.dremio.plugins.s3.store.S3FileSystem + com.dremio.plugins.s3.store.S3FileSystem + + + fs.s3a.aws.credentials.provider + The credential provider type. + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider + + + fs.s3a.access.key + AWS access key ID. + {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} + + + fs.s3a.secret.key + AWS secret key. + {{ required "AWS secret required" .Values.distStorage.aws.secret}} + + {{- end }} + + {{- if eq .Values.distStorage.type "azure" }} + + + fs.adl.impl + Must be set to org.apache.hadoop.fs.adl.AdlFileSystem + org.apache.hadoop.fs.adl.AdlFileSystem + + + dfs.adls.oauth2.client.id + Application ID of the registered application under Azure Active Directory + {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} + + + dfs.adls.oauth2.credential + Generated password value for the registered application + {{required "Azure secret value required" .Values.distStorage.azure.secret}} + + + dfs.adls.oauth2.refresh.url + Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. + {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} + + + dfs.adls.oauth2.access.token.provider.type + Must be set to ClientCredential + ClientCredential + + + fs.adl.impl.disable.cache + Only include this property AFTER validating the ADLS connection. + false + + {{- end }} + + {{- if eq .Values.distStorage.type "azureStorage" }} + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} + + + dremio.azure.key + The shared access key for the storage account. + {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} + + + dremio.azure.mode + The storage account type. Value: STORAGE_V2 + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. Value: True/False + True + + {{- end }} +{{- end }} diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 12252069..042b7640 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -15,33 +15,21 @@ # paths: { - # the local path for dremio to store data. + # Local path for dremio to store data. local: ${DREMIO_HOME}"/data" - - # the distributed path Dremio data including job results, downloads, uploads, etc - #dist: "pdfs://"${paths.local}"/pdfs" - - # If you are editing the uploads value in this file, please delete all the lines starting with double curly braces - {{- if .Values.distStorage.type }} - {{- if and .Values.dremioVersion (lt .Values.dremioVersion "3.2.0") }} - {{- if eq .Values.distStorage.type "aws" }} - uploads: "s3a://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" - {{- end }} - {{- if eq .Values.distStorage.type "azure" }} - uploads: "adl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" - {{- end }} - {{- else }} # dremio_version > 3.2.0 - {{- if eq .Values.distStorage.type "aws" }} - uploads: "dremioS3://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" - {{- end }} - {{- if eq .Values.distStorage.type "azure" }} - uploads: "dremioAdl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" - {{- end }} - {{- if eq .Values.distStorage.type "azureStorage" }} - uploads: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.uploadsPath}}" - accelerator: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.acceleratorPath}}" - {{- end }} - {{- end }} + # Distributed path Dremio data including job results, downloads, + # uploads, etc + {{- if ne .Values.distStorage.type "local" }} + results: "pdfs://"${paths.local}"/pdfs" + {{- if eq .Values.distStorage.type "aws" }} + dist: "dremioS3:///{{ required "AWS bucketname required" .Values.distStorage.aws.bucketName }}{{ required "Path required" .Values.distStorage.aws.path }}" + {{- else if eq .Values.distStorage.type "azure" }} + dist: "dremioAdl://{{ required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName }}.azuredatalakestore.net{{ required "Path required" .Values.distStorage.azure.path }}" + {{- else if eq .Values.distStorage.type "azureStorage" }} + dist: "dremioAzureStorage://:///{{ required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem }}/{{ required "Path for uploads required" .Values.distStorage.azureStorage.uploadsPath }}" + {{- end }} + {{- else }} + dist: "pdfs://"${paths.local}"/pdfs" {{- end }} } @@ -54,8 +42,30 @@ services: { # executor.enabled: true # # Other service parameters can be customized via this file. + + # Cloud Cache is supported in Dremio 4.0.0+. + {{- if and .Values.executor.cloudCache.enabled (or (ge .Values.imageTag "4.0.0") (eq .Values.imageTag "latest")) }} + executor: { + cache: { + path.db: "/var/lib/dremio", + path.fs: ["/var/lib/dremio"], + pctquota.db: {{ .Values.executor.cloudCache.quota.db_pct }}, + pctquota.fs: [{{ .Values.executor.cloudCache.quota.fs_pct }}] + } + } + {{- end }} } +{{- if and .Values.executor.cloudCache.enabled (ne .Values.distStorage.type "local") }} +debug: { + # Enable caching for distributed storage, it is turned off by default + dist.caching.enabled: true, + # Max percent of total available cache space to use when possible + # for distributed storage + dist.max.cache.space.percent: {{ .Values.executor.cloudCache.quota.cache_pct }} +} +{{- end }} + {{- if .Values.tls.ui.enabled }} services.coordinator.web.ssl.enabled: true services.coordinator.web.ssl.auto-certificate.enabled: false @@ -64,9 +74,9 @@ services.coordinator.web.ssl.keyStore: "/opt/dremio/tls/ui.pkcs12" {{- end }} {{- if .Values.tls.client.enabled }} -# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in Dremio Enterprise Edition. +# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in +# Dremio Enterprise Edition. services.coordinator.client-endpoint.ssl.enabled: true services.coordinator.client-endpoint.ssl.auto-certificate.enabled: false - services.coordinator.client-endpoint.ssl.keyStore: "/opt/dremio/tls/client.pkcs12" -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/dremio/templates/dremio-admin.yaml b/charts/dremio/templates/dremio-admin.yaml index 3de91c43..feda9d9b 100644 --- a/charts/dremio/templates/dremio-admin.yaml +++ b/charts/dremio/templates/dremio-admin.yaml @@ -12,7 +12,7 @@ metadata: spec: containers: - name: dremio-admin - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent stdin: true tty: true diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index 86d5f2ac..3089c1ad 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -28,7 +28,7 @@ spec: {{- end }} containers: - name: dremio-coordinator - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -70,7 +70,7 @@ spec: command: ["sh", "-c", "until nc -z dremio-client {{ .Values.coordinator.web.port | default 9047 }} > /dev/null; do echo waiting for dremio master; sleep 2; done;"] {{- if .Values.tls.ui.enabled }} - name: generate-ui-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls @@ -92,7 +92,7 @@ spec: {{- end }} {{- if .Values.tls.client.enabled }} - name: generate-client-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index e511e1d3..150e1fe1 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -28,7 +28,7 @@ spec: {{- end }} containers: - name: dremio-executor - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -61,9 +61,10 @@ spec: - name: wait-for-zk image: busybox command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] - # since we're mounting a separate volume, reset permission to dremio uid/gid + # since we're mounting a separate volume, reset permission to + # dremio uid/gid - name: chown-data-directory - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index f272eb32..51619193 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -36,7 +36,7 @@ spec: {{- end }} containers: - name: dremio-master-coordinator - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -92,7 +92,7 @@ spec: command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] # since we're mounting a separate volume, reset permission to dremio uid/gid - name: chown-data-directory - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 @@ -104,7 +104,7 @@ spec: - "dremio:dremio" - "/opt/dremio/data" - name: upgrade-task - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-master-volume @@ -114,7 +114,7 @@ spec: - "upgrade" {{- if .Values.tls.ui.enabled }} - name: generate-ui-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls @@ -136,7 +136,7 @@ spec: {{- end }} {{- if .Values.tls.client.enabled }} - name: generate-client-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index 2601ebba..a8a415fc 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -1,7 +1,9 @@ # The image used to build the Dremio cluster. It is recommended to update the # version tag to the version that you are using. This will ensure that all # the pods are using the same version of the software. -image: dremio/dremio-oss:latest +image: dremio/dremio-oss +imageTag: latest + # Check out Dremio documentation for memory and cpu requirements for # the coordinators and the executors. # The value of memory should be in MB. CPU is in no of cores. @@ -22,6 +24,20 @@ executor: cpu: 15 count: 3 volumeSize: 100Gi + cloudCache: + # Requires Dremio version 4.0.0 or later + enabled: false + quota: + # Percentage of the diskspace for the running Kubernetes node + # that can be used for Cloud Cache files. + fs_pct: 70 + # Percentage of that space that can be used for the internal + # Cloud Cache database. + db_pct: 70 + # Percentage of that space that can be used for cacheing + # materialised reflections. This is an upper-bound, not a + # reservation. + cache_pct: 100 zookeeper: memory: 1024 cpu: 0.5 @@ -37,9 +53,10 @@ tls: enabled: false secret: dremio-tls-secret-ui client: - # To enable TLS for the client endpoints, set the enabled flag to true and provide - # the appropriate Kubernetes TLS secret. Client endpoint encryption is available only on - # Dremio Enterprise Edition and should not be enabled otherwise. + # To enable TLS for the client endpoints, set the enabled flag to + # true and provide the appropriate Kubernetes TLS secret. Client + # endpoint encryption is available only on Dremio Enterprise + # Edition and should not be enabled otherwise. enabled: false secret: dremio-tls-secret-client @@ -63,35 +80,39 @@ serviceType: LoadBalancer #storageClass: managed-premium # For private and protected docker image repository, you should store -# the credentials in a kubernetes secret and provide the secret name here. -# For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod +# the credentials in a kubernetes secret and provide the secret name +# here. For more information, see +# https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod #imagePullSecrets: secretname -# Target pods to nodes based on labels set on the nodes. -# For more information, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +# Target pods to nodes based on labels set on the nodes. For more +# information, see +# https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector #nodeSelector: # key: value -# Control where uploaded files are stored. -# See https://docs.dremio.com/deployment/distributed-storage.html for more information -dremioVersion: "3.2.0" # Dremio Version 3.2.0 or greater +# Control where uploaded files are stored. See +# https://docs.dremio.com/deployment/distributed-storage.html for more +# information distStorage: - # Valid values are local, aws, azure or azureStorage. aws and azure choice requires additional configuration data. + # Valid values are local, aws, azure or azureStorage. aws and azure + # choice requires additional configuration data. type: "local" - aws: #S3 - used for only uploads + aws: # S3 bucketName: "Your_AWS_bucket_name" path: "/" accessKey: "Your_AWS_Access_Key" secret: "Your_AWS_Secret" - azure: #ADLS v1 - used for only uploads + azure: # ADLS gen1 datalakeStoreName: "Your_Azure_DataLake_Storage_name" path: "/" applicationId: "Your_Azure_Application_Id" secret: "Your_Azure_Secret" oauth2EndPoint: "Azure_OAuth2_Endpoint" - azureStorage: #AzureStorage v2 - supported in Dremio version 3.2.0+ - used for uploads and accelerator + azureStorage: # AzureStorage gen2v2 accountName: "Azure_storage_v2_account_name" accessKey: "Access_key_for_the_storage_account" + filesystem: "Filesystem_in_storage_account" uploadsPath: "Path_for_uploads" acceleratorPath: "Path_for_accelerator" From 45a04846d6d54712bd7eab846351665e66736dfc Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Fri, 27 Sep 2019 17:45:21 +0000 Subject: [PATCH 22/31] Revert "Add Helm C3 executor and dist store caching" This reverts commit d199978f1eaf648ec014e5e171e481698b6bb554. Change-Id: If484452e8608f2dbdfa2713ca33bb13b4c92c4f0 --- charts/dremio/Chart.yaml | 2 +- charts/dremio/README.md | 168 +++++------------- charts/dremio/config/core-site.xml | 163 ++++++++--------- charts/dremio/config/dremio.conf | 68 +++---- charts/dremio/templates/dremio-admin.yaml | 2 +- .../dremio/templates/dremio-coordinator.yaml | 6 +- charts/dremio/templates/dremio-executor.yaml | 7 +- charts/dremio/templates/dremio-master.yaml | 10 +- charts/dremio/values.yaml | 51 ++---- 9 files changed, 179 insertions(+), 298 deletions(-) diff --git a/charts/dremio/Chart.yaml b/charts/dremio/Chart.yaml index 7a40ce28..0457134e 100644 --- a/charts/dremio/Chart.yaml +++ b/charts/dremio/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v1" name: "dremio" -version: "0.1.0" +version: "0.0.7" keywords: - dremio - data diff --git a/charts/dremio/README.md b/charts/dremio/README.md index 8bab30c8..6d8ecd0e 100644 --- a/charts/dremio/README.md +++ b/charts/dremio/README.md @@ -2,10 +2,7 @@ ## Overview -This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses -a persistent volume for the master node to store the metadata for the -cluster. The default configuration uses the default persistent storage -supported by the kubernetes platform. For example, +This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses a persistent volume for the master node to store the metadata for the cluster. The default configuration uses the default persistent storage supported by the kubernetes platform. For example, | Kubernetes platform | Persistent store | |---------------------|------------------| @@ -14,48 +11,26 @@ supported by the kubernetes platform. For example, | Google GKE | Persistent Disk | | Local K8S on Docker | Hostpath | -If you want to use a different storage class available in your -kubernetes environment, add the storageClass in values.yaml. - -An appropriate distributed file store (S3, ADLS, HDFS, etc) should be -used for paths.dist as this deployment will lose locally persisted -reflections and uploads. You can update config/dremio.conf. Dremio -[documentation](https://docs.dremio.com/deployment/distributed-storage.html) -provides more information on this. - -This assumes you already have kubernetes cluster setup, kubectl -configured to talk to your kubernetes cluster and helm setup in your -cluster. Review and update values.yaml to reflect values for your -environment before installing the helm chart. This is specially -important for for the memory and cpu values - your kubernetes cluster -should have sufficient resources to provision the pods with those -values. If your kubernetes installation does not support serviceType -LoadBalancer, it is recommended to comment the serviceType value in -values.yaml file before deploying. +If you want to use a different storage class available in your kubernetes environment, add the storageClass in values.yaml. -#### Installing the helm chart +An appropriate distributed file store (S3, ADLS, HDFS, etc) should be used for paths.dist as this deployment will lose locally persisted reflections and uploads. You can update config/dremio.conf. Dremio [documentation](https://docs.dremio.com/deployment/distributed-storage.html) provides more information on this. -Review charts/dremio/values.yaml and adjust the values as per your -requirements. Note that the values for cpu and memory for the -coordinator and the executors are set to work with AKS on Azure with -worker nodes setup with machine types Standard_E16s_v3. +This assumes you already have kubernetes cluster setup, kubectl configured to talk to your kubernetes cluster and helm setup in your cluster. Review and update values.yaml to reflect values for your environment before installing the helm chart. This is specially important for for the memory and cpu values - your kubernetes cluster should have sufficient resources to provision the pods with those values. If your kubernetes installation does not support serviceType LoadBalancer, it is recommended to comment the serviceType value in values.yaml file before deploying. -Run this from the charts directory +#### Installing the helm chart +Review charts/dremio/values.yaml and adjust the values as per your requirements. Note that the values for cpu and memory for the coordinator and the executors are set to work with AKS on Azure with worker nodes setup with machine types Standard_E16s_v3. +Run this from the charts directory ```bash -cd charts helm install --wait dremio ``` - -If it takes longer than a couple of minutes to complete, check the -status of the pods to see where they are waiting. If they are pending -scheduling due to limited memory or cpu, either adjust the values in -values.yaml and restart the process or add more resources to your -kubernetes cluster. +cd charts +helm install --wait dremio +``` +If it takes longer than a couple of minutes to complete, check the status of the pods to see where they are waiting. If they are pending scheduling due to limited memory or cpu, either adjust the values in values.yaml and restart the process or add more resources to your kubernetes cluster. #### Connect to the Dremio UI +If your kubernetes supports serviceType LoadBalancer, you can get to the Dremio UI on the load balancer external ip. -If your kubernetes supports serviceType LoadBalancer, you can get to -the Dremio UI on the load balancer external IP. For example, if your -service output is: +For example, if your service output is: ```bash kubectl get services dremio-client @@ -63,32 +38,24 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -You can get to the Dremio UI using the value under column EXTERNAL-IP: +you can get to the Dremio UI using the value under column EXTERNAL-IP: http://35.226.31.211:9047 -If your kubernetes does not have support of serviceType LoadBalancer, -you can access the Dremio UI on the port exposed on the node. For -example, if the service output is: +If your kubernetes does not have support of serviceType LoadBalancer, you can access the Dremio UI on the port exposed on the node. For example, if the service output is: ```bash kubectl get services dremio-client NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE dremio-client NodePort 10.110.65.97 31010:32390/TCP,9047:30670/TCP 1h ``` - -Where there is no external IP and the Dremio master is running on node -"localhost", you can get to Dremio UI using: +where there is no external ip and the Dremio master is running on node "localhost", you can get to Dremio UI using: http://localhost:30670 -#### Dremio Client Port -The port 31010 is used for ODBC and JDBC connections. You can look up -service dremio-client in kubernetes to find the host to use for ODBC -or JDBC connections. Depending on your kubernetes cluster supporting -serviceType LoadBalancer, you will use the load balancer external-ip -or the node on which a coordinator is running. +#### Dremio Client Port +The port 31010 is used for ODBC and JDBC connections. You can look up service dremio-client in kubernetes to find the host to use for ODBC or JDBC connections. Depending on your kubernetes cluster supporting serviceType LoadBalancer, you will use the load balancer external-ip or the node on which a coordinator is running. ```bash kubectl get services dremio-client @@ -96,37 +63,32 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -For example, in the above output, the service is exposed on an -external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC -connections. +For example, in the above output, the service is exposed on an external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC connections. #### Viewing logs - -Logs are written to the container's console. All the logs - -server.log, server.out, server.gc and access.log - are written into -the console simultaneously. You can view the logs using kubectl. ``` -kubectl logs ``` You can also tail the logs using the --f parameter. ``` kubectl logs -f ``` +Logs are written to the container's console. All the logs - server.log, server.out, server.gc and access.log - are written into the console simultaneously. You can view the logs using kubectl. +``` +kubectl logs +``` +You can also tail the logs using the -f parameter. +``` +kubectl logs -f +``` #### Scale by adding additional Coordinators or Executors (optional) - -Get the name of the helm release. In the example below, the release -name is plundering-alpaca: - +Get the name of the helm release. In the example below, the release name is plundering-alpaca. ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 default ``` -Add additional coordinators: - +Add additional coordinators ```bash helm upgrade dremio --set coordinator.count=3 ``` -Add additional executors: - +Add additional executors ```bash helm upgrade dremio --set executor.count=5 ``` @@ -134,44 +96,33 @@ helm upgrade dremio --set executor.count=5 You can also scale down the same way. ### Running offline dremio-admin commands - -Administration commands restore, cleanup and set-password in -dremio-admin needs to be run when the Dremio cluster is not -running. So, before running these commands, you need to shutdown the -Dremio cluster. Use the helm delete command to delete the helm -release. (Kubernetes does not delete the persistent store volumes -when you delete statefulset pods and when you install the cluster -again using helm, the existing persistent store will be used and you -will get your Dremio cluster running again.) - -After Dremio cluster is shutdown, start the dremio-admin pod using: - +Administration commands restore, cleanup and set-password in dremio-admin needs to be run when +the Dremio cluster is not running. So, before running these commands, you need to shutdown +the Dremio cluster. Use the helm delete command to delete the helm release. +(Kubernetes does not delete the persistent store volumes when you delete statefulset pods and +when you install the cluster again using helm, the existing persistent store will be used and +you will get your Dremio cluster running again.) + +After Dremio cluster is shutdown, start the dremio-admin pod using ```bash helm install --wait dremio --set DremioAdmin=true ``` -Once the pod is running, you can connect to the pod using: - +Once the pod is running, you can connect to the pod using ```bash kubectl exec -it dremio-admin -- bash ``` Now, you have a bash shell from where you can run the dremio-admin commands. -Once you are done, you can delete the helm release for the -dremio-admin and start your Dremio cluster. +Once you are done, you can delete the helm release for the dremio-admin and start your Dremio cluster. #### Upgrading Dremio - -You should attempt upgrade when no queries are running on the -cluster. Update the Dremio image tag in your values.yaml file. E.g: - +You should attempt upgrade when no queries are running on the cluster. Update the Dremio image tag in your values.yaml file. E.g. ```bash image: dremio/dremio-oss:3.0.0 ... ``` -Get the name of the helm release. In the example below, the release -name is plundering-alpaca. - +Get the name of the helm release. In the example below, the release name is plundering-alpaca. ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE @@ -179,45 +130,20 @@ plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 defaul ``` Upgrade the deployment via helm upgrade command: - ``` helm upgrade . ``` -Existing pods will be terminated and new pods will be created with the -new image. You can - +Existing pods will be terminated and new pods will be created with the new image. You can monitor the status of the pods by running: ``` kubectl get pods ``` -Once all the pods are restarted and running, your Dremio cluster is -upgraded. +Once all the pods are restarted and running, your Dremio cluster is upgraded. #### Customizing Dremio configuration -Dremio configuration files used by the deployment are in the config -directory. These files are propagated to all the pods in the -cluster. Updating the configuration and upgrading the helm release - -just like doing an upgrade - would refresh all the pods with the new -configuration. [Dremio -documentation](https://docs.dremio.com/deployment/README-config.html) -covers the configuration capabilities in Dremio. - -If you need to add a core-site.xml, you can add the file to the config -directory and it will be propagated to all the pods on install or -upgrade of the deployment. - -#### Important Changes - -2019-09-19 (v0.1.0): BREAKING CHANGE. - - Dremio versions before 4.0.0 are no longer supported by this Helm - chart. Dremio image specifier was split into an imageName and - imageTag parts to follow best practices. "dist" value in - dremio.conf moved to cloud storage where possible (otherwise - defaults to pdfs) -- this will lose any previously extant - reflections materialisations, user uploads, scratch files, etc. - Also added Cloud Cache support (new in Dremio 4.0). Please see - values.yaml for details on this new configuration. +Dremio configuration files used by the deployment are in the config directory. These files are propagated to all the pods in the cluster. Updating the configuration and upgrading the helm release - just like doing an upgrade - would refresh all the pods with the new configuration. [Dremio documentation](https://docs.dremio.com/deployment/README-config.html) covers the configuration capabilities in Dremio. + +If you need to add a core-site.xml, you can add the file to the config directory and it will be propagated to all the pods on install or upgrade of the deployment. diff --git a/charts/dremio/config/core-site.xml b/charts/dremio/config/core-site.xml index 3283eb32..8d91d757 100644 --- a/charts/dremio/config/core-site.xml +++ b/charts/dremio/config/core-site.xml @@ -1,93 +1,80 @@ -{{- if and .Values.distStorage.type (ne .Values.distStorage.type "local") }} - {{- if eq .Values.distStorage.type "aws" }} - - fs.dremioS3.impl - The FileSystem implementation. Must be set to com.dremio.plugins.s3.store.S3FileSystem - com.dremio.plugins.s3.store.S3FileSystem - - - fs.s3a.aws.credentials.provider - The credential provider type. - org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider - - - fs.s3a.access.key - AWS access key ID. - {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} - - - fs.s3a.secret.key - AWS secret key. - {{ required "AWS secret required" .Values.distStorage.aws.secret}} - - {{- end }} - - {{- if eq .Values.distStorage.type "azure" }} - - - fs.adl.impl - Must be set to org.apache.hadoop.fs.adl.AdlFileSystem - org.apache.hadoop.fs.adl.AdlFileSystem - - - dfs.adls.oauth2.client.id - Application ID of the registered application under Azure Active Directory - {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} - - - dfs.adls.oauth2.credential - Generated password value for the registered application - {{required "Azure secret value required" .Values.distStorage.azure.secret}} - - - dfs.adls.oauth2.refresh.url - Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. - {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} - - - dfs.adls.oauth2.access.token.provider.type - Must be set to ClientCredential - ClientCredential - - - fs.adl.impl.disable.cache - Only include this property AFTER validating the ADLS connection. - false - - {{- end }} - - {{- if eq .Values.distStorage.type "azureStorage" }} - - fs.dremioAzureStorage.impl - FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem - com.dremio.plugins.azure.AzureStorageFileSystem - - - dremio.azure.account - The name of the storage account. - {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} - - - dremio.azure.key - The shared access key for the storage account. - {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} - - - dremio.azure.mode - The storage account type. Value: STORAGE_V2 - STORAGE_V2 - - - dremio.azure.secure - Boolean option to enable SSL connections. Value: True/False - True - - {{- end }} -{{- end }} + {{- if and .Values.distStorage.type (eq .Values.distStorage.type "aws") }} + + fs.s3a.access.key + AWS access key ID. + {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} + + + fs.s3a.secret.key + AWS secret key. + {{ required "AWS secret required" .Values.distStorage.aws.secret}} + + {{- end }} + + {{- if and .Values.distStorage.type (eq .Values.distStorage.type "azure") }} + + + fs.adl.impl + Must be set to org.apache.hadoop.fs.adl.AdlFileSystem + org.apache.hadoop.fs.adl.AdlFileSystem + + + dfs.adls.oauth2.client.id + Application ID of the registered application under Azure Active Directory + {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} + + + dfs.adls.oauth2.credential + Generated password value for the registered application + {{required "Azure secret value required" .Values.distStorage.azure.secret}} + + + dfs.adls.oauth2.refresh.url + Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. + {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} + + + dfs.adls.oauth2.access.token.provider.type + Must be set to ClientCredential + ClientCredential + + + fs.adl.impl.disable.cache + Only include this property AFTER validating the ADLS connection. + false + + {{- end }} + + {{- if and .Values.dremioVersion (ge .Values.dremioVersion "3.2.0") .Values.distStorage.type (eq .Values.distStorage.type "azureStorage") }} + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} + + + dremio.azure.key + The shared access key for the storage account. + {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} + + + dremio.azure.mode + The storage account type. Value: STORAGE_V2 + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. Value: True/False + True + + {{- end }} diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 042b7640..12252069 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -15,21 +15,33 @@ # paths: { - # Local path for dremio to store data. + # the local path for dremio to store data. local: ${DREMIO_HOME}"/data" - # Distributed path Dremio data including job results, downloads, - # uploads, etc - {{- if ne .Values.distStorage.type "local" }} - results: "pdfs://"${paths.local}"/pdfs" - {{- if eq .Values.distStorage.type "aws" }} - dist: "dremioS3:///{{ required "AWS bucketname required" .Values.distStorage.aws.bucketName }}{{ required "Path required" .Values.distStorage.aws.path }}" - {{- else if eq .Values.distStorage.type "azure" }} - dist: "dremioAdl://{{ required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName }}.azuredatalakestore.net{{ required "Path required" .Values.distStorage.azure.path }}" - {{- else if eq .Values.distStorage.type "azureStorage" }} - dist: "dremioAzureStorage://:///{{ required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem }}/{{ required "Path for uploads required" .Values.distStorage.azureStorage.uploadsPath }}" - {{- end }} - {{- else }} - dist: "pdfs://"${paths.local}"/pdfs" + + # the distributed path Dremio data including job results, downloads, uploads, etc + #dist: "pdfs://"${paths.local}"/pdfs" + + # If you are editing the uploads value in this file, please delete all the lines starting with double curly braces + {{- if .Values.distStorage.type }} + {{- if and .Values.dremioVersion (lt .Values.dremioVersion "3.2.0") }} + {{- if eq .Values.distStorage.type "aws" }} + uploads: "s3a://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" + {{- end }} + {{- if eq .Values.distStorage.type "azure" }} + uploads: "adl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" + {{- end }} + {{- else }} # dremio_version > 3.2.0 + {{- if eq .Values.distStorage.type "aws" }} + uploads: "dremioS3://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" + {{- end }} + {{- if eq .Values.distStorage.type "azure" }} + uploads: "dremioAdl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" + {{- end }} + {{- if eq .Values.distStorage.type "azureStorage" }} + uploads: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.uploadsPath}}" + accelerator: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.acceleratorPath}}" + {{- end }} + {{- end }} {{- end }} } @@ -42,30 +54,8 @@ services: { # executor.enabled: true # # Other service parameters can be customized via this file. - - # Cloud Cache is supported in Dremio 4.0.0+. - {{- if and .Values.executor.cloudCache.enabled (or (ge .Values.imageTag "4.0.0") (eq .Values.imageTag "latest")) }} - executor: { - cache: { - path.db: "/var/lib/dremio", - path.fs: ["/var/lib/dremio"], - pctquota.db: {{ .Values.executor.cloudCache.quota.db_pct }}, - pctquota.fs: [{{ .Values.executor.cloudCache.quota.fs_pct }}] - } - } - {{- end }} } -{{- if and .Values.executor.cloudCache.enabled (ne .Values.distStorage.type "local") }} -debug: { - # Enable caching for distributed storage, it is turned off by default - dist.caching.enabled: true, - # Max percent of total available cache space to use when possible - # for distributed storage - dist.max.cache.space.percent: {{ .Values.executor.cloudCache.quota.cache_pct }} -} -{{- end }} - {{- if .Values.tls.ui.enabled }} services.coordinator.web.ssl.enabled: true services.coordinator.web.ssl.auto-certificate.enabled: false @@ -74,9 +64,9 @@ services.coordinator.web.ssl.keyStore: "/opt/dremio/tls/ui.pkcs12" {{- end }} {{- if .Values.tls.client.enabled }} -# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in -# Dremio Enterprise Edition. +# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in Dremio Enterprise Edition. services.coordinator.client-endpoint.ssl.enabled: true services.coordinator.client-endpoint.ssl.auto-certificate.enabled: false + services.coordinator.client-endpoint.ssl.keyStore: "/opt/dremio/tls/client.pkcs12" -{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/dremio/templates/dremio-admin.yaml b/charts/dremio/templates/dremio-admin.yaml index feda9d9b..3de91c43 100644 --- a/charts/dremio/templates/dremio-admin.yaml +++ b/charts/dremio/templates/dremio-admin.yaml @@ -12,7 +12,7 @@ metadata: spec: containers: - name: dremio-admin - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent stdin: true tty: true diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index 3089c1ad..86d5f2ac 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -28,7 +28,7 @@ spec: {{- end }} containers: - name: dremio-coordinator - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent resources: requests: @@ -70,7 +70,7 @@ spec: command: ["sh", "-c", "until nc -z dremio-client {{ .Values.coordinator.web.port | default 9047 }} > /dev/null; do echo waiting for dremio master; sleep 2; done;"] {{- if .Values.tls.ui.enabled }} - name: generate-ui-keystore - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls @@ -92,7 +92,7 @@ spec: {{- end }} {{- if .Values.tls.client.enabled }} - name: generate-client-keystore - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index 150e1fe1..e511e1d3 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -28,7 +28,7 @@ spec: {{- end }} containers: - name: dremio-executor - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent resources: requests: @@ -61,10 +61,9 @@ spec: - name: wait-for-zk image: busybox command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] - # since we're mounting a separate volume, reset permission to - # dremio uid/gid + # since we're mounting a separate volume, reset permission to dremio uid/gid - name: chown-data-directory - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index 51619193..f272eb32 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -36,7 +36,7 @@ spec: {{- end }} containers: - name: dremio-master-coordinator - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent resources: requests: @@ -92,7 +92,7 @@ spec: command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] # since we're mounting a separate volume, reset permission to dremio uid/gid - name: chown-data-directory - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 @@ -104,7 +104,7 @@ spec: - "dremio:dremio" - "/opt/dremio/data" - name: upgrade-task - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-master-volume @@ -114,7 +114,7 @@ spec: - "upgrade" {{- if .Values.tls.ui.enabled }} - name: generate-ui-keystore - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls @@ -136,7 +136,7 @@ spec: {{- end }} {{- if .Values.tls.client.enabled }} - name: generate-client-keystore - image: {{.Values.image}}:{{.Values.imageTag}} + image: {{.Values.image}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index a8a415fc..2601ebba 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -1,9 +1,7 @@ # The image used to build the Dremio cluster. It is recommended to update the # version tag to the version that you are using. This will ensure that all # the pods are using the same version of the software. -image: dremio/dremio-oss -imageTag: latest - +image: dremio/dremio-oss:latest # Check out Dremio documentation for memory and cpu requirements for # the coordinators and the executors. # The value of memory should be in MB. CPU is in no of cores. @@ -24,20 +22,6 @@ executor: cpu: 15 count: 3 volumeSize: 100Gi - cloudCache: - # Requires Dremio version 4.0.0 or later - enabled: false - quota: - # Percentage of the diskspace for the running Kubernetes node - # that can be used for Cloud Cache files. - fs_pct: 70 - # Percentage of that space that can be used for the internal - # Cloud Cache database. - db_pct: 70 - # Percentage of that space that can be used for cacheing - # materialised reflections. This is an upper-bound, not a - # reservation. - cache_pct: 100 zookeeper: memory: 1024 cpu: 0.5 @@ -53,10 +37,9 @@ tls: enabled: false secret: dremio-tls-secret-ui client: - # To enable TLS for the client endpoints, set the enabled flag to - # true and provide the appropriate Kubernetes TLS secret. Client - # endpoint encryption is available only on Dremio Enterprise - # Edition and should not be enabled otherwise. + # To enable TLS for the client endpoints, set the enabled flag to true and provide + # the appropriate Kubernetes TLS secret. Client endpoint encryption is available only on + # Dremio Enterprise Edition and should not be enabled otherwise. enabled: false secret: dremio-tls-secret-client @@ -80,39 +63,35 @@ serviceType: LoadBalancer #storageClass: managed-premium # For private and protected docker image repository, you should store -# the credentials in a kubernetes secret and provide the secret name -# here. For more information, see -# https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod +# the credentials in a kubernetes secret and provide the secret name here. +# For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod #imagePullSecrets: secretname -# Target pods to nodes based on labels set on the nodes. For more -# information, see -# https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +# Target pods to nodes based on labels set on the nodes. +# For more information, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector #nodeSelector: # key: value -# Control where uploaded files are stored. See -# https://docs.dremio.com/deployment/distributed-storage.html for more -# information +# Control where uploaded files are stored. +# See https://docs.dremio.com/deployment/distributed-storage.html for more information +dremioVersion: "3.2.0" # Dremio Version 3.2.0 or greater distStorage: - # Valid values are local, aws, azure or azureStorage. aws and azure - # choice requires additional configuration data. + # Valid values are local, aws, azure or azureStorage. aws and azure choice requires additional configuration data. type: "local" - aws: # S3 + aws: #S3 - used for only uploads bucketName: "Your_AWS_bucket_name" path: "/" accessKey: "Your_AWS_Access_Key" secret: "Your_AWS_Secret" - azure: # ADLS gen1 + azure: #ADLS v1 - used for only uploads datalakeStoreName: "Your_Azure_DataLake_Storage_name" path: "/" applicationId: "Your_Azure_Application_Id" secret: "Your_Azure_Secret" oauth2EndPoint: "Azure_OAuth2_Endpoint" - azureStorage: # AzureStorage gen2v2 + azureStorage: #AzureStorage v2 - supported in Dremio version 3.2.0+ - used for uploads and accelerator accountName: "Azure_storage_v2_account_name" accessKey: "Access_key_for_the_storage_account" - filesystem: "Filesystem_in_storage_account" uploadsPath: "Path_for_uploads" acceleratorPath: "Path_for_accelerator" From c8124e9cc68ba49b30c85147f7059ed2fe9843c2 Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Fri, 27 Sep 2019 13:53:55 -0700 Subject: [PATCH 23/31] DX-18737: Add Helm C3 executor and dist store caching - Dremio 4.0.0 or later required. - Adds the concept of an imageTag to expose features that are introduced only in newer versions of Dremio. - Removes the dremioVersion value that needs to be manually set to reference the same version that is used by the image. - Adds optional Cloud Cache support. Dist is split between PDFS and cloud storage. Change-Id: I645c53bb772c0d52362052ef77925c08b30cc494 --- charts/dremio/Chart.yaml | 2 +- charts/dremio/README.md | 168 +++++++++++++----- charts/dremio/config/core-site.xml | 163 +++++++++-------- charts/dremio/config/dremio.conf | 68 ++++--- charts/dremio/templates/dremio-admin.yaml | 2 +- .../dremio/templates/dremio-coordinator.yaml | 6 +- charts/dremio/templates/dremio-executor.yaml | 7 +- charts/dremio/templates/dremio-master.yaml | 10 +- charts/dremio/values.yaml | 54 ++++-- 9 files changed, 299 insertions(+), 181 deletions(-) diff --git a/charts/dremio/Chart.yaml b/charts/dremio/Chart.yaml index 0457134e..7a40ce28 100644 --- a/charts/dremio/Chart.yaml +++ b/charts/dremio/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v1" name: "dremio" -version: "0.0.7" +version: "0.1.0" keywords: - dremio - data diff --git a/charts/dremio/README.md b/charts/dremio/README.md index 6d8ecd0e..8bab30c8 100644 --- a/charts/dremio/README.md +++ b/charts/dremio/README.md @@ -2,7 +2,10 @@ ## Overview -This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses a persistent volume for the master node to store the metadata for the cluster. The default configuration uses the default persistent storage supported by the kubernetes platform. For example, +This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses +a persistent volume for the master node to store the metadata for the +cluster. The default configuration uses the default persistent storage +supported by the kubernetes platform. For example, | Kubernetes platform | Persistent store | |---------------------|------------------| @@ -11,26 +14,48 @@ This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses a persist | Google GKE | Persistent Disk | | Local K8S on Docker | Hostpath | -If you want to use a different storage class available in your kubernetes environment, add the storageClass in values.yaml. - -An appropriate distributed file store (S3, ADLS, HDFS, etc) should be used for paths.dist as this deployment will lose locally persisted reflections and uploads. You can update config/dremio.conf. Dremio [documentation](https://docs.dremio.com/deployment/distributed-storage.html) provides more information on this. - -This assumes you already have kubernetes cluster setup, kubectl configured to talk to your kubernetes cluster and helm setup in your cluster. Review and update values.yaml to reflect values for your environment before installing the helm chart. This is specially important for for the memory and cpu values - your kubernetes cluster should have sufficient resources to provision the pods with those values. If your kubernetes installation does not support serviceType LoadBalancer, it is recommended to comment the serviceType value in values.yaml file before deploying. +If you want to use a different storage class available in your +kubernetes environment, add the storageClass in values.yaml. + +An appropriate distributed file store (S3, ADLS, HDFS, etc) should be +used for paths.dist as this deployment will lose locally persisted +reflections and uploads. You can update config/dremio.conf. Dremio +[documentation](https://docs.dremio.com/deployment/distributed-storage.html) +provides more information on this. + +This assumes you already have kubernetes cluster setup, kubectl +configured to talk to your kubernetes cluster and helm setup in your +cluster. Review and update values.yaml to reflect values for your +environment before installing the helm chart. This is specially +important for for the memory and cpu values - your kubernetes cluster +should have sufficient resources to provision the pods with those +values. If your kubernetes installation does not support serviceType +LoadBalancer, it is recommended to comment the serviceType value in +values.yaml file before deploying. #### Installing the helm chart -Review charts/dremio/values.yaml and adjust the values as per your requirements. Note that the values for cpu and memory for the coordinator and the executors are set to work with AKS on Azure with worker nodes setup with machine types Standard_E16s_v3. + +Review charts/dremio/values.yaml and adjust the values as per your +requirements. Note that the values for cpu and memory for the +coordinator and the executors are set to work with AKS on Azure with +worker nodes setup with machine types Standard_E16s_v3. Run this from the charts directory + ```bash -cd charts -helm install --wait dremio -``` -If it takes longer than a couple of minutes to complete, check the status of the pods to see where they are waiting. If they are pending scheduling due to limited memory or cpu, either adjust the values in values.yaml and restart the process or add more resources to your kubernetes cluster. +cd charts helm install --wait dremio ``` + +If it takes longer than a couple of minutes to complete, check the +status of the pods to see where they are waiting. If they are pending +scheduling due to limited memory or cpu, either adjust the values in +values.yaml and restart the process or add more resources to your +kubernetes cluster. #### Connect to the Dremio UI -If your kubernetes supports serviceType LoadBalancer, you can get to the Dremio UI on the load balancer external ip. -For example, if your service output is: +If your kubernetes supports serviceType LoadBalancer, you can get to +the Dremio UI on the load balancer external IP. For example, if your +service output is: ```bash kubectl get services dremio-client @@ -38,24 +63,32 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -you can get to the Dremio UI using the value under column EXTERNAL-IP: +You can get to the Dremio UI using the value under column EXTERNAL-IP: http://35.226.31.211:9047 -If your kubernetes does not have support of serviceType LoadBalancer, you can access the Dremio UI on the port exposed on the node. For example, if the service output is: +If your kubernetes does not have support of serviceType LoadBalancer, +you can access the Dremio UI on the port exposed on the node. For +example, if the service output is: ```bash kubectl get services dremio-client NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE dremio-client NodePort 10.110.65.97 31010:32390/TCP,9047:30670/TCP 1h ``` -where there is no external ip and the Dremio master is running on node "localhost", you can get to Dremio UI using: -http://localhost:30670 +Where there is no external IP and the Dremio master is running on node +"localhost", you can get to Dremio UI using: +http://localhost:30670 #### Dremio Client Port -The port 31010 is used for ODBC and JDBC connections. You can look up service dremio-client in kubernetes to find the host to use for ODBC or JDBC connections. Depending on your kubernetes cluster supporting serviceType LoadBalancer, you will use the load balancer external-ip or the node on which a coordinator is running. + +The port 31010 is used for ODBC and JDBC connections. You can look up +service dremio-client in kubernetes to find the host to use for ODBC +or JDBC connections. Depending on your kubernetes cluster supporting +serviceType LoadBalancer, you will use the load balancer external-ip +or the node on which a coordinator is running. ```bash kubectl get services dremio-client @@ -63,32 +96,37 @@ NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) dremio-client LoadBalancer 10.99.227.180 35.226.31.211 31010:32260/TCP,9047:30620/TCP 2d ``` -For example, in the above output, the service is exposed on an external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC connections. +For example, in the above output, the service is exposed on an +external-ip. So, you can use 35.226.31.211:31010 in your ODBC or JDBC +connections. #### Viewing logs -Logs are written to the container's console. All the logs - server.log, server.out, server.gc and access.log - are written into the console simultaneously. You can view the logs using kubectl. -``` -kubectl logs -``` -You can also tail the logs using the -f parameter. -``` -kubectl logs -f -``` + +Logs are written to the container's console. All the logs - +server.log, server.out, server.gc and access.log - are written into +the console simultaneously. You can view the logs using kubectl. ``` +kubectl logs ``` You can also tail the logs using the +-f parameter. ``` kubectl logs -f ``` #### Scale by adding additional Coordinators or Executors (optional) -Get the name of the helm release. In the example below, the release name is plundering-alpaca. + +Get the name of the helm release. In the example below, the release +name is plundering-alpaca: + ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 default ``` -Add additional coordinators +Add additional coordinators: + ```bash helm upgrade dremio --set coordinator.count=3 ``` -Add additional executors +Add additional executors: + ```bash helm upgrade dremio --set executor.count=5 ``` @@ -96,33 +134,44 @@ helm upgrade dremio --set executor.count=5 You can also scale down the same way. ### Running offline dremio-admin commands -Administration commands restore, cleanup and set-password in dremio-admin needs to be run when -the Dremio cluster is not running. So, before running these commands, you need to shutdown -the Dremio cluster. Use the helm delete command to delete the helm release. -(Kubernetes does not delete the persistent store volumes when you delete statefulset pods and -when you install the cluster again using helm, the existing persistent store will be used and -you will get your Dremio cluster running again.) - -After Dremio cluster is shutdown, start the dremio-admin pod using + +Administration commands restore, cleanup and set-password in +dremio-admin needs to be run when the Dremio cluster is not +running. So, before running these commands, you need to shutdown the +Dremio cluster. Use the helm delete command to delete the helm +release. (Kubernetes does not delete the persistent store volumes +when you delete statefulset pods and when you install the cluster +again using helm, the existing persistent store will be used and you +will get your Dremio cluster running again.) + +After Dremio cluster is shutdown, start the dremio-admin pod using: + ```bash helm install --wait dremio --set DremioAdmin=true ``` -Once the pod is running, you can connect to the pod using +Once the pod is running, you can connect to the pod using: + ```bash kubectl exec -it dremio-admin -- bash ``` Now, you have a bash shell from where you can run the dremio-admin commands. -Once you are done, you can delete the helm release for the dremio-admin and start your Dremio cluster. +Once you are done, you can delete the helm release for the +dremio-admin and start your Dremio cluster. #### Upgrading Dremio -You should attempt upgrade when no queries are running on the cluster. Update the Dremio image tag in your values.yaml file. E.g. + +You should attempt upgrade when no queries are running on the +cluster. Update the Dremio image tag in your values.yaml file. E.g: + ```bash image: dremio/dremio-oss:3.0.0 ... ``` -Get the name of the helm release. In the example below, the release name is plundering-alpaca. +Get the name of the helm release. In the example below, the release +name is plundering-alpaca. + ```bash helm list NAME REVISION UPDATED STATUS CHART NAMESPACE @@ -130,20 +179,45 @@ plundering-alpaca 1 Wed Jul 18 09:36:14 2018 DEPLOYED dremio-0.0.5 defaul ``` Upgrade the deployment via helm upgrade command: + ``` helm upgrade . ``` -Existing pods will be terminated and new pods will be created with the new image. You can +Existing pods will be terminated and new pods will be created with the +new image. You can + monitor the status of the pods by running: ``` kubectl get pods ``` -Once all the pods are restarted and running, your Dremio cluster is upgraded. +Once all the pods are restarted and running, your Dremio cluster is +upgraded. #### Customizing Dremio configuration -Dremio configuration files used by the deployment are in the config directory. These files are propagated to all the pods in the cluster. Updating the configuration and upgrading the helm release - just like doing an upgrade - would refresh all the pods with the new configuration. [Dremio documentation](https://docs.dremio.com/deployment/README-config.html) covers the configuration capabilities in Dremio. - -If you need to add a core-site.xml, you can add the file to the config directory and it will be propagated to all the pods on install or upgrade of the deployment. +Dremio configuration files used by the deployment are in the config +directory. These files are propagated to all the pods in the +cluster. Updating the configuration and upgrading the helm release - +just like doing an upgrade - would refresh all the pods with the new +configuration. [Dremio +documentation](https://docs.dremio.com/deployment/README-config.html) +covers the configuration capabilities in Dremio. + +If you need to add a core-site.xml, you can add the file to the config +directory and it will be propagated to all the pods on install or +upgrade of the deployment. + +#### Important Changes + +2019-09-19 (v0.1.0): BREAKING CHANGE. + + Dremio versions before 4.0.0 are no longer supported by this Helm + chart. Dremio image specifier was split into an imageName and + imageTag parts to follow best practices. "dist" value in + dremio.conf moved to cloud storage where possible (otherwise + defaults to pdfs) -- this will lose any previously extant + reflections materialisations, user uploads, scratch files, etc. + Also added Cloud Cache support (new in Dremio 4.0). Please see + values.yaml for details on this new configuration. diff --git a/charts/dremio/config/core-site.xml b/charts/dremio/config/core-site.xml index 8d91d757..3283eb32 100644 --- a/charts/dremio/config/core-site.xml +++ b/charts/dremio/config/core-site.xml @@ -1,80 +1,93 @@ +{{- if and .Values.distStorage.type (ne .Values.distStorage.type "local") }} - {{- if and .Values.distStorage.type (eq .Values.distStorage.type "aws") }} - - fs.s3a.access.key - AWS access key ID. - {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} - - - fs.s3a.secret.key - AWS secret key. - {{ required "AWS secret required" .Values.distStorage.aws.secret}} - - {{- end }} - - {{- if and .Values.distStorage.type (eq .Values.distStorage.type "azure") }} - - - fs.adl.impl - Must be set to org.apache.hadoop.fs.adl.AdlFileSystem - org.apache.hadoop.fs.adl.AdlFileSystem - - - dfs.adls.oauth2.client.id - Application ID of the registered application under Azure Active Directory - {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} - - - dfs.adls.oauth2.credential - Generated password value for the registered application - {{required "Azure secret value required" .Values.distStorage.azure.secret}} - - - dfs.adls.oauth2.refresh.url - Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. - {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} - - - dfs.adls.oauth2.access.token.provider.type - Must be set to ClientCredential - ClientCredential - - - fs.adl.impl.disable.cache - Only include this property AFTER validating the ADLS connection. - false - - {{- end }} - - {{- if and .Values.dremioVersion (ge .Values.dremioVersion "3.2.0") .Values.distStorage.type (eq .Values.distStorage.type "azureStorage") }} - - fs.dremioAzureStorage.impl - FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem - com.dremio.plugins.azure.AzureStorageFileSystem - - - dremio.azure.account - The name of the storage account. - {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} - - - dremio.azure.key - The shared access key for the storage account. - {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} - - - dremio.azure.mode - The storage account type. Value: STORAGE_V2 - STORAGE_V2 - - - dremio.azure.secure - Boolean option to enable SSL connections. Value: True/False - True - - {{- end }} + {{- if eq .Values.distStorage.type "aws" }} + + fs.dremioS3.impl + The FileSystem implementation. Must be set to com.dremio.plugins.s3.store.S3FileSystem + com.dremio.plugins.s3.store.S3FileSystem + + + fs.s3a.aws.credentials.provider + The credential provider type. + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider + + + fs.s3a.access.key + AWS access key ID. + {{ required "AWS access key required" .Values.distStorage.aws.accessKey}} + + + fs.s3a.secret.key + AWS secret key. + {{ required "AWS secret required" .Values.distStorage.aws.secret}} + + {{- end }} + + {{- if eq .Values.distStorage.type "azure" }} + + + fs.adl.impl + Must be set to org.apache.hadoop.fs.adl.AdlFileSystem + org.apache.hadoop.fs.adl.AdlFileSystem + + + dfs.adls.oauth2.client.id + Application ID of the registered application under Azure Active Directory + {{required "Azure application ID required" .Values.distStorage.azure.applicationId}} + + + dfs.adls.oauth2.credential + Generated password value for the registered application + {{required "Azure secret value required" .Values.distStorage.azure.secret}} + + + dfs.adls.oauth2.refresh.url + Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. + {{required "Azure OAuth2 token endpoint required" .Values.distStorage.azure.oauth2EndPoint}} + + + dfs.adls.oauth2.access.token.provider.type + Must be set to ClientCredential + ClientCredential + + + fs.adl.impl.disable.cache + Only include this property AFTER validating the ADLS connection. + false + + {{- end }} + + {{- if eq .Values.distStorage.type "azureStorage" }} + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + {{required "Azure storage account name required" .Values.distStorage.azureStorage.accountName}} + + + dremio.azure.key + The shared access key for the storage account. + {{required "Shared access key for the storage account required" .Values.distStorage.azureStorage.accessKey}} + + + dremio.azure.mode + The storage account type. Value: STORAGE_V2 + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. Value: True/False + True + + {{- end }} +{{- end }} diff --git a/charts/dremio/config/dremio.conf b/charts/dremio/config/dremio.conf index 12252069..6dbe4f9a 100644 --- a/charts/dremio/config/dremio.conf +++ b/charts/dremio/config/dremio.conf @@ -15,33 +15,21 @@ # paths: { - # the local path for dremio to store data. + # Local path for dremio to store data. local: ${DREMIO_HOME}"/data" - - # the distributed path Dremio data including job results, downloads, uploads, etc - #dist: "pdfs://"${paths.local}"/pdfs" - - # If you are editing the uploads value in this file, please delete all the lines starting with double curly braces - {{- if .Values.distStorage.type }} - {{- if and .Values.dremioVersion (lt .Values.dremioVersion "3.2.0") }} - {{- if eq .Values.distStorage.type "aws" }} - uploads: "s3a://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" - {{- end }} - {{- if eq .Values.distStorage.type "azure" }} - uploads: "adl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" - {{- end }} - {{- else }} # dremio_version > 3.2.0 - {{- if eq .Values.distStorage.type "aws" }} - uploads: "dremioS3://{{required "AWS bucketname required" .Values.distStorage.aws.bucketName}}{{required "Path required" .Values.distStorage.aws.path}}" - {{- end }} - {{- if eq .Values.distStorage.type "azure" }} - uploads: "dremioAdl://{{required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName}}.azuredatalakestore.net{{required "Path required" .Values.distStorage.azure.path}}" - {{- end }} - {{- if eq .Values.distStorage.type "azureStorage" }} - uploads: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.uploadsPath}}" - accelerator: "dremioAzureStorage://:///{{required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem}}/{{required "Path for uploads required" .Values.distStorage.azureStorage.acceleratorPath}}" - {{- end }} - {{- end }} + # Distributed path Dremio data including job results, downloads, + # uploads, etc + {{- if ne .Values.distStorage.type "local" }} + results: "pdfs://"${paths.local}"/results" + {{- if eq .Values.distStorage.type "aws" }} + dist: "dremioS3:///{{ required "AWS bucketname required" .Values.distStorage.aws.bucketName }}{{ required "Path required" .Values.distStorage.aws.path }}" + {{- else if eq .Values.distStorage.type "azure" }} + dist: "dremioAdl://{{ required "Azure Datalake store name required" .Values.distStorage.azure.datalakeStoreName }}.azuredatalakestore.net{{ required "Path required" .Values.distStorage.azure.path }}" + {{- else if eq .Values.distStorage.type "azureStorage" }} + dist: "dremioAzureStorage://:///{{ required "Azure Storage filesystem required" .Values.distStorage.azureStorage.filesystem }}{{ required "Path required" .Values.distStorage.azureStorage.path }}" + {{- end }} + {{- else }} + dist: "pdfs://"${paths.local}"/pdfs" {{- end }} } @@ -54,8 +42,30 @@ services: { # executor.enabled: true # # Other service parameters can be customized via this file. + + # Cloud Cache is supported in Dremio 4.0.0+. + {{- if and .Values.executor.cloudCache.enabled (or (ge .Values.imageTag "4.0.0") (eq .Values.imageTag "latest")) }} + executor: { + cache: { + path.db: "/var/lib/dremio", + path.fs: ["/var/lib/dremio"], + pctquota.db: {{ .Values.executor.cloudCache.quota.db_pct }}, + pctquota.fs: [{{ .Values.executor.cloudCache.quota.fs_pct }}] + } + } + {{- end }} } +{{- if and .Values.executor.cloudCache.enabled (ne .Values.distStorage.type "local") }} +debug: { + # Enable caching for distributed storage, it is turned off by default + dist.caching.enabled: true, + # Max percent of total available cache space to use when possible + # for distributed storage + dist.max.cache.space.percent: {{ .Values.executor.cloudCache.quota.cache_pct }} +} +{{- end }} + {{- if .Values.tls.ui.enabled }} services.coordinator.web.ssl.enabled: true services.coordinator.web.ssl.auto-certificate.enabled: false @@ -64,9 +74,9 @@ services.coordinator.web.ssl.keyStore: "/opt/dremio/tls/ui.pkcs12" {{- end }} {{- if .Values.tls.client.enabled }} -# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in Dremio Enterprise Edition. +# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in +# Dremio Enterprise Edition. services.coordinator.client-endpoint.ssl.enabled: true services.coordinator.client-endpoint.ssl.auto-certificate.enabled: false - services.coordinator.client-endpoint.ssl.keyStore: "/opt/dremio/tls/client.pkcs12" -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/dremio/templates/dremio-admin.yaml b/charts/dremio/templates/dremio-admin.yaml index 3de91c43..feda9d9b 100644 --- a/charts/dremio/templates/dremio-admin.yaml +++ b/charts/dremio/templates/dremio-admin.yaml @@ -12,7 +12,7 @@ metadata: spec: containers: - name: dremio-admin - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent stdin: true tty: true diff --git a/charts/dremio/templates/dremio-coordinator.yaml b/charts/dremio/templates/dremio-coordinator.yaml index 86d5f2ac..3089c1ad 100644 --- a/charts/dremio/templates/dremio-coordinator.yaml +++ b/charts/dremio/templates/dremio-coordinator.yaml @@ -28,7 +28,7 @@ spec: {{- end }} containers: - name: dremio-coordinator - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -70,7 +70,7 @@ spec: command: ["sh", "-c", "until nc -z dremio-client {{ .Values.coordinator.web.port | default 9047 }} > /dev/null; do echo waiting for dremio master; sleep 2; done;"] {{- if .Values.tls.ui.enabled }} - name: generate-ui-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls @@ -92,7 +92,7 @@ spec: {{- end }} {{- if .Values.tls.client.enabled }} - name: generate-client-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls diff --git a/charts/dremio/templates/dremio-executor.yaml b/charts/dremio/templates/dremio-executor.yaml index e511e1d3..150e1fe1 100644 --- a/charts/dremio/templates/dremio-executor.yaml +++ b/charts/dremio/templates/dremio-executor.yaml @@ -28,7 +28,7 @@ spec: {{- end }} containers: - name: dremio-executor - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -61,9 +61,10 @@ spec: - name: wait-for-zk image: busybox command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] - # since we're mounting a separate volume, reset permission to dremio uid/gid + # since we're mounting a separate volume, reset permission to + # dremio uid/gid - name: chown-data-directory - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 diff --git a/charts/dremio/templates/dremio-master.yaml b/charts/dremio/templates/dremio-master.yaml index f272eb32..51619193 100644 --- a/charts/dremio/templates/dremio-master.yaml +++ b/charts/dremio/templates/dremio-master.yaml @@ -36,7 +36,7 @@ spec: {{- end }} containers: - name: dremio-master-coordinator - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent resources: requests: @@ -92,7 +92,7 @@ spec: command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo waiting for zookeeper host; sleep 2; done;"] # since we're mounting a separate volume, reset permission to dremio uid/gid - name: chown-data-directory - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent securityContext: runAsUser: 0 @@ -104,7 +104,7 @@ spec: - "dremio:dremio" - "/opt/dremio/data" - name: upgrade-task - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-master-volume @@ -114,7 +114,7 @@ spec: - "upgrade" {{- if .Values.tls.ui.enabled }} - name: generate-ui-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls @@ -136,7 +136,7 @@ spec: {{- end }} {{- if .Values.tls.client.enabled }} - name: generate-client-keystore - image: {{.Values.image}} + image: {{.Values.image}}:{{.Values.imageTag}} imagePullPolicy: IfNotPresent volumeMounts: - name: dremio-tls diff --git a/charts/dremio/values.yaml b/charts/dremio/values.yaml index 2601ebba..925b88a3 100644 --- a/charts/dremio/values.yaml +++ b/charts/dremio/values.yaml @@ -1,7 +1,9 @@ # The image used to build the Dremio cluster. It is recommended to update the # version tag to the version that you are using. This will ensure that all # the pods are using the same version of the software. -image: dremio/dremio-oss:latest +image: dremio/dremio-oss +imageTag: latest + # Check out Dremio documentation for memory and cpu requirements for # the coordinators and the executors. # The value of memory should be in MB. CPU is in no of cores. @@ -22,6 +24,20 @@ executor: cpu: 15 count: 3 volumeSize: 100Gi + cloudCache: + # Requires Dremio version 4.0.0 or later + enabled: true + quota: + # Percentage of the diskspace for the running Kubernetes node + # that can be used for Cloud Cache files. + fs_pct: 70 + # Percentage of that space that can be used for the internal + # Cloud Cache database. + db_pct: 70 + # Percentage of that space that can be used for cacheing + # materialised reflections. This is an upper-bound, not a + # reservation. + cache_pct: 100 zookeeper: memory: 1024 cpu: 0.5 @@ -37,9 +53,10 @@ tls: enabled: false secret: dremio-tls-secret-ui client: - # To enable TLS for the client endpoints, set the enabled flag to true and provide - # the appropriate Kubernetes TLS secret. Client endpoint encryption is available only on - # Dremio Enterprise Edition and should not be enabled otherwise. + # To enable TLS for the client endpoints, set the enabled flag to + # true and provide the appropriate Kubernetes TLS secret. Client + # endpoint encryption is available only on Dremio Enterprise + # Edition and should not be enabled otherwise. enabled: false secret: dremio-tls-secret-client @@ -63,35 +80,38 @@ serviceType: LoadBalancer #storageClass: managed-premium # For private and protected docker image repository, you should store -# the credentials in a kubernetes secret and provide the secret name here. -# For more information, see https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod +# the credentials in a kubernetes secret and provide the secret name +# here. For more information, see +# https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod #imagePullSecrets: secretname -# Target pods to nodes based on labels set on the nodes. -# For more information, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +# Target pods to nodes based on labels set on the nodes. For more +# information, see +# https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector #nodeSelector: # key: value -# Control where uploaded files are stored. -# See https://docs.dremio.com/deployment/distributed-storage.html for more information -dremioVersion: "3.2.0" # Dremio Version 3.2.0 or greater +# Control where uploaded files are stored. See +# https://docs.dremio.com/deployment/distributed-storage.html for more +# information distStorage: - # Valid values are local, aws, azure or azureStorage. aws and azure choice requires additional configuration data. + # Valid values are local, aws, azure or azureStorage. aws and azure + # choice requires additional configuration data. type: "local" - aws: #S3 - used for only uploads + aws: # S3 bucketName: "Your_AWS_bucket_name" path: "/" accessKey: "Your_AWS_Access_Key" secret: "Your_AWS_Secret" - azure: #ADLS v1 - used for only uploads + azure: # ADLS gen1 datalakeStoreName: "Your_Azure_DataLake_Storage_name" path: "/" applicationId: "Your_Azure_Application_Id" secret: "Your_Azure_Secret" oauth2EndPoint: "Azure_OAuth2_Endpoint" - azureStorage: #AzureStorage v2 - supported in Dremio version 3.2.0+ - used for uploads and accelerator + azureStorage: # AzureStorage gen2v2 accountName: "Azure_storage_v2_account_name" accessKey: "Access_key_for_the_storage_account" + filesystem: "Filesystem_in_storage_account" - uploadsPath: "Path_for_uploads" - acceleratorPath: "Path_for_accelerator" + path: "/" From ea3f65d8920a06ce42df889f69146ca55733be76 Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Thu, 30 Jul 2020 22:33:20 +0000 Subject: [PATCH 24/31] DX-23723: Update ARM template base image reference. - Updates the ARM template to use an updated base CentOS image. Change-Id: Id77a3e2bf3918ce1314aba435c66b0830a7a024a --- azure/arm-templates/nested/dremioCluster.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure/arm-templates/nested/dremioCluster.json b/azure/arm-templates/nested/dremioCluster.json index 18832f6d..ab8e54a6 100644 --- a/azure/arm-templates/nested/dremioCluster.json +++ b/azure/arm-templates/nested/dremioCluster.json @@ -183,7 +183,7 @@ "publisher": "OpenLogic", "offer": "CentOS", "sku": "7.5", - "version": "7.5.20180815" + "version": "latest" }, "linuxConfiguration": { "disablePasswordAuthentication": true, From 575e58e51fa869267fe2c9a2f605bcb530bceca8 Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Mon, 10 Aug 2020 08:29:37 -0700 Subject: [PATCH 25/31] DX-23310: Dremio Helm Chart v2 Change-Id: I4ea8b85a712047bcd4f6520da3c1ec5452d8b849 --- charts/dremio_v2/Chart.yaml | 7 + charts/dremio_v2/README.md | 109 ++ charts/dremio_v2/config/core-site.xml | 107 ++ charts/dremio_v2/config/dremio-env | 101 ++ charts/dremio_v2/config/dremio.conf | 88 ++ charts/dremio_v2/config/hive2/README.md | 2 + charts/dremio_v2/config/hive3/README.md | 2 + charts/dremio_v2/config/logback-access.xml | 51 + charts/dremio_v2/config/logback-admin.xml | 67 + charts/dremio_v2/config/logback.xml | 164 ++ charts/dremio_v2/docs/README.md | 13 + charts/dremio_v2/docs/Values-Reference.md | 1408 +++++++++++++++++ .../administration/Dremio-Administration.md | 47 + .../Scaling-Coordinators-and-Executors.md | 47 + .../docs/administration/Upgrading-Dremio.md | 33 + .../docs/administration/Viewing-Logs.md | 26 + .../setup/Customizing-Dremio-Configuration.md | 13 + .../setup/Important-Setup-Considerations.md | 10 + .../setup/Migrating-Helm-Chart-Versions.md | 71 + .../docs/setup/Setup-Hive-2-and-3.md | 9 + .../templates/_helpers_coordinator.tpl | 143 ++ .../dremio_v2/templates/_helpers_executor.tpl | 319 ++++ .../dremio_v2/templates/_helpers_general.tpl | 86 + .../templates/_helpers_zookeeper.tpl | 82 + charts/dremio_v2/templates/dremio-admin.yaml | 41 + .../dremio_v2/templates/dremio-configmap.yaml | 38 + .../templates/dremio-coordinator.yaml | 150 ++ .../dremio_v2/templates/dremio-executor.yaml | 116 ++ charts/dremio_v2/templates/dremio-master.yaml | 197 +++ .../templates/dremio-service-client.yaml | 38 + charts/dremio_v2/templates/zookeeper.yaml | 139 ++ charts/dremio_v2/values.yaml | 396 +++++ 32 files changed, 4120 insertions(+) create mode 100644 charts/dremio_v2/Chart.yaml create mode 100644 charts/dremio_v2/README.md create mode 100644 charts/dremio_v2/config/core-site.xml create mode 100644 charts/dremio_v2/config/dremio-env create mode 100644 charts/dremio_v2/config/dremio.conf create mode 100644 charts/dremio_v2/config/hive2/README.md create mode 100644 charts/dremio_v2/config/hive3/README.md create mode 100644 charts/dremio_v2/config/logback-access.xml create mode 100644 charts/dremio_v2/config/logback-admin.xml create mode 100644 charts/dremio_v2/config/logback.xml create mode 100644 charts/dremio_v2/docs/README.md create mode 100644 charts/dremio_v2/docs/Values-Reference.md create mode 100644 charts/dremio_v2/docs/administration/Dremio-Administration.md create mode 100644 charts/dremio_v2/docs/administration/Scaling-Coordinators-and-Executors.md create mode 100644 charts/dremio_v2/docs/administration/Upgrading-Dremio.md create mode 100644 charts/dremio_v2/docs/administration/Viewing-Logs.md create mode 100644 charts/dremio_v2/docs/setup/Customizing-Dremio-Configuration.md create mode 100644 charts/dremio_v2/docs/setup/Important-Setup-Considerations.md create mode 100644 charts/dremio_v2/docs/setup/Migrating-Helm-Chart-Versions.md create mode 100644 charts/dremio_v2/docs/setup/Setup-Hive-2-and-3.md create mode 100644 charts/dremio_v2/templates/_helpers_coordinator.tpl create mode 100644 charts/dremio_v2/templates/_helpers_executor.tpl create mode 100644 charts/dremio_v2/templates/_helpers_general.tpl create mode 100644 charts/dremio_v2/templates/_helpers_zookeeper.tpl create mode 100644 charts/dremio_v2/templates/dremio-admin.yaml create mode 100644 charts/dremio_v2/templates/dremio-configmap.yaml create mode 100644 charts/dremio_v2/templates/dremio-coordinator.yaml create mode 100644 charts/dremio_v2/templates/dremio-executor.yaml create mode 100644 charts/dremio_v2/templates/dremio-master.yaml create mode 100644 charts/dremio_v2/templates/dremio-service-client.yaml create mode 100644 charts/dremio_v2/templates/zookeeper.yaml create mode 100644 charts/dremio_v2/values.yaml diff --git a/charts/dremio_v2/Chart.yaml b/charts/dremio_v2/Chart.yaml new file mode 100644 index 00000000..6362cdb8 --- /dev/null +++ b/charts/dremio_v2/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: "v1" +name: "dremio" +version: "2.0.0" +keywords: + - dremio + - data +home: https://www.dremio.com/ diff --git a/charts/dremio_v2/README.md b/charts/dremio_v2/README.md new file mode 100644 index 00000000..5fa46f46 --- /dev/null +++ b/charts/dremio_v2/README.md @@ -0,0 +1,109 @@ +# Dremio on Kubernetes Installation Guide + +Before beginning to setup Dremio on Kubernetes, take a moment to review all the associated [documentation](./docs) for the Helm chart. + +Once you have reviewed the documentation, continue to the [Installation](#installation) steps below to get your Dremio cluster up and running. If you are upgrading from the previous Helm chart for Dremio, please see the [Migrating Helm Chart Versions](./docs/setup/Migrating-Helm-Chart-Versions.md) documentation. + +## Installation + +### Prerequisites + +This guide assumes you already have the following setup: + +* An existing Kubernetes cluster setup +* Local machine setup with Helm 3 +* Local `kubectl` configured to access your Kubernetes cluster + +### Installing the Helm Chart + +1. Review the default values provided in `values.yaml` and review the [Important Setup Considerations](./docs/setup/Important-Setup-Considerations.md) documentation for the Helm chart. + + For a complete reference on all the options available in the `values.yaml`, see the [`Values.yaml` Reference](./docs/Values-Reference.md) documentation — this document covers all the available options and provides small code samples for each configuration option. + + To customize the Dremio software configuration, see the [Customizing Dremio Configuration](./docs/setup/Customizing-Dremio-Configuration.md) documentation. + + ***Tip***: As a best practice, we recommend creating a `values.local.yaml` (or equivalently named file) that stores the values that you wish to override as part of your setup of Dremio. This allows you to quickly update to the latest version of the chart by copying the `values.local.yaml` across Helm chart updates. + +2. To install, run the following from the `charts` directory: + +```bash +$ cd charts +$ helm install dremio_v2 -f values.local.yaml +``` + +3. Check the status of the installation using the following command: + +```bash +$ kubectl get pods +``` + +If it takes longer than a couple of minutes to complete, check the status of the pods to see where they are waiting. If they are stuck in Pending state for an extended period of time, check on the status of the pod to check that there is sufficient resources for scheduling. To check, use the following command on the pending pod: + +```bash +$ kubectl describe pods +``` + +If the events at the bottom of the output mention insufficient CPU or memory, either adjust the values in your `values.local.yaml` and restart the process or add more resources to your Kubernetes cluster. + +4. Once you see all the pods in a "ready" state, your setup is all done! See below on how to connect to the Dremio UI to get your first user setup and also how to connect via JDBC/ODBC. + +### Connect to the Dremio UI + +You can look up the service `dremio-client` in Kubernetes to find the host for the web UI using the following command: + +```bash +$ kubectl get services dremio-client +``` + +#### Load Balancer Supported + +If your Kubernetes cluster supports a `service.type` of `LoadBalancer`, you can access the Dremio UI via port 9047 on the load balancer's external IP. You can optionally change the exposed port on the load balancer for the UI via `values.local.yaml` by setting `coordinator.web.port`. + +For example, in the output below, the value under the `EXTERNAL-IP` column is `8.8.8.8`. Therefore, you can get to the Dremio UI via port 9047 on that address: http://8.8.8.8:9047 + +```bash +$ kubectl get services dremio-client +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +dremio-client LoadBalancer 10.99.227.180 8.8.8.8 31010:32260/TCP,9047:30620/TCP 2d +``` + +#### Load Balancer Unsupported +If your Kubernetes cluster does not have support for a `service.type` of `LoadBalancer`, you can access the Dremio UI on the port exposed on the node. + +For example, in the output below, there is no value on the `EXTERNAL-IP` column and the Dremio master is running on node "localhost". Therefore, you can get to Dremio UI using: http://localhost:30670 + +```bash +$ kubectl get services dremio-client +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +dremio-client NodePort 10.110.65.97 31010:32390/TCP,9047:30670/TCP 1h +``` + +### Connect to Dremio via ODBC/JDBC + +You can look up the service `dremio-client` in Kubernetes to find the host for JDBC/ODBC connections using the following command: + +```bash +$ kubectl get services dremio-client +``` + +#### Load Balancer Supported +If your Kubernetes cluster supports a `service.type` of `LoadBalancer`, you can access Dremio using ODBC/JDBC via port 31010 on the load balancer's external IP. You can optionally change the exposed port for ODBC/JDBC connections via `values.local.yaml` by setting `coordinator.client.port`. + +For example, in the output below, the value under the `EXTERNAL-IP` column is `8.8.8.8`. Therefore, you can connect to Dremio using ODBC/JDBC using: `8.8.8.8:31010` + +```bash +$ kubectl get services dremio-client +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +dremio-client LoadBalancer 10.99.227.180 8.8.8.8 31010:32260/TCP,9047:30620/TCP 2d +``` + +#### Load Balancer Unsupported +If your Kubernetes cluster does not have support for a `service.type` of `LoadBalancer`, you can access Dremio using ODBC/JDBC on the port exposed on the node. + +For example, in the output below, there is no value on the `EXTERNAL-IP` column and the Dremio master is running on node "localhost". Therefore, you can connect to Dremio via ODBC/JDBC using: `localhost:32390` + +```bash +$ kubectl get services dremio-client +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +dremio-client NodePort 10.110.65.97 31010:32390/TCP,9047:30670/TCP 1h +``` diff --git a/charts/dremio_v2/config/core-site.xml b/charts/dremio_v2/config/core-site.xml new file mode 100644 index 00000000..d3b0aefc --- /dev/null +++ b/charts/dremio_v2/config/core-site.xml @@ -0,0 +1,107 @@ + + + +{{- if and $.Values.distStorage.type (ne $.Values.distStorage.type "local") }} + {{- if eq $.Values.distStorage.type "aws" }} + + + fs.dremioS3.impl + The FileSystem implementation. Must be set to com.dremio.plugins.s3.store.S3FileSystem + com.dremio.plugins.s3.store.S3FileSystem + + {{ if eq $.Values.distStorage.aws.authentication "accessKeySecret" -}} + + fs.s3a.aws.credentials.provider + The credential provider type. + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider + + + fs.s3a.access.key + AWS access key ID. + {{ required "AWS access key required" $.Values.distStorage.aws.credentials.accessKey}} + + + fs.s3a.secret.key + AWS secret key. + {{ required "AWS secret required" $.Values.distStorage.aws.credentials.secret}} + + {{ else if eq $.Values.distStorage.aws.authentication "metadata" -}} + + fs.s3a.aws.credentials.provider + The credential provider type. + com.amazonaws.auth.InstanceProfileCredentialsProvider + + {{- else -}} + {{ fail "Unrecognized AWS authentication mode." }} + {{- end -}} + {{- if $.Values.distStorage.aws.extraProperties -}} + {{- $.Values.distStorage.aws.extraProperties | nindent 4 }} + {{- end -}} + {{- end -}} + {{- if eq $.Values.distStorage.type "azure" }} + + + + fs.dremioAdl.impl + com.dremio.plugins.adl.store.DremioAdlFileSystem + + + dfs.adls.oauth2.client.id + Application ID of the registered application under Azure Active Directory. + {{ required "Azure application ID required" $.Values.distStorage.azure.credentials.applicationId }} + + + dfs.adls.oauth2.credential + Generated password value for the registered application. + {{ required "Azure secret value required" $.Values.distStorage.azure.credentials.secret }} + + + dfs.adls.oauth2.refresh.url + Azure Active Directory OAuth 2.0 Token Endpoint for registered applications. + {{ required "Azure OAuth2 token endpoint required" $.Values.distStorage.azure.credentials.oauth2Endpoint }} + + + dfs.adls.oauth2.access.token.provider.type + ClientCredential + + + fs.adl.impl.disable.cache + false + + {{- if $.Values.distStorage.azure.extraProperties -}} + {{- $.Values.distStorage.azure.extraProperties | nindent 4 }} + {{- end -}} + {{- end }} + {{- if eq $.Values.distStorage.type "azureStorage" }} + + + fs.dremioAzureStorage.impl + FileSystem implementation. Must always be com.dremio.plugins.azure.AzureStorageFileSystem + com.dremio.plugins.azure.AzureStorageFileSystem + + + dremio.azure.account + The name of the storage account. + {{required "Azure storage account name required" $.Values.distStorage.azureStorage.accountName }} + + + dremio.azure.key + The shared access key for the storage account. + {{ required "Shared access key required" $.Values.distStorage.azureStorage.credentials.accessKey }} + + + dremio.azure.mode + The storage account type. + STORAGE_V2 + + + dremio.azure.secure + Boolean option to enable SSL connections. + True + + {{- if $.Values.distStorage.azureStorage.extraProperties -}} + {{- $.Values.distStorage.azureStorage.extraProperties | nindent 4 }} + {{- end -}} + {{- end }} +{{- end}} + \ No newline at end of file diff --git a/charts/dremio_v2/config/dremio-env b/charts/dremio_v2/config/dremio-env new file mode 100644 index 00000000..6f6f8c09 --- /dev/null +++ b/charts/dremio_v2/config/dremio-env @@ -0,0 +1,101 @@ +# +# Copyright (C) 2017-2018 Dremio Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Dremio environment variables used by Dremio daemon +# + +# +# Directory where Dremio logs are written +# Default to $DREMIO_HOME/log +# +#DREMIO_LOG_DIR=${DREMIO_HOME}/log + +# +# Send logs to console and not to log files. The DREMIO_LOG_DIR is ignored if set. +# +#DREMIO_LOG_TO_CONSOLE=1 + +# +# Directory where Dremio pidfiles are written +# Default to $DREMIO_HOME/run +# +#DREMIO_PID_DIR=${DREMIO_HOME}/run + +# +# Max total memory size (in MB) for the Dremio process +# +# If not set, default to using max heap and max direct. +# +# If both max heap and max direct are set, this is not used +# If one is set, the other is calculated as difference +# of max memory and the one that is set. +# +#DREMIO_MAX_MEMORY_SIZE_MB= + +# +# Max heap memory size (in MB) for the Dremio process +# +# Default to 4096 for server +# +#DREMIO_MAX_HEAP_MEMORY_SIZE_MB=4096 + +# +# Max direct memory size (in MB) for the Dremio process +# +# Default to 8192 for server +# +#DREMIO_MAX_DIRECT_MEMORY_SIZE_MB=8192 + +# +# Max permanent generation memory size (in MB) for the Dremio process +# (Only used for Java 7) +# +# Default to 512 for server +# +#DREMIO_MAX_PERMGEN_MEMORY_SIZE_MB=512 + +# +# Garbage collection logging is enabled by default. Set the following +# parameter to "no" to disable garbage collection logging. +# +#DREMIO_GC_LOGS_ENABLED="yes" + +# +# The scheduling priority for the server +# +# Default to 0 +# +# DREMIO_NICENESS=0 +# + +# +# Number of seconds after which the server is killed forcibly it it hasn't stopped +# +# Default to 120 +# +#DREMIO_STOP_TIMEOUT=120 + +# Extra Java options - shared between dremio and dremio-admin commands +# +#DREMIO_JAVA_EXTRA_OPTS= + +# Extra Java options - client only (dremio-admin command) +# +#DREMIO_JAVA_CLIENT_EXTRA_OPTS= + +# Warning: Do not set DREMIO_JAVA_SERVER_EXTRA_OPTS in dremio-env. +# Please see the values.yaml extraStartParams for setting additional options for Dremio process startup. diff --git a/charts/dremio_v2/config/dremio.conf b/charts/dremio_v2/config/dremio.conf new file mode 100644 index 00000000..20991cae --- /dev/null +++ b/charts/dremio_v2/config/dremio.conf @@ -0,0 +1,88 @@ +# +# Copyright (C) 2017-2018 Dremio Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +paths: { + # Local path for dremio to store data. + local: ${DREMIO_HOME}"/data" + # Distributed path Dremio data including job results, downloads, + # uploads, etc + {{- if ne $.Values.distStorage.type "local" }} + results: "pdfs://"${paths.local}"/results" + {{- if eq $.Values.distStorage.type "aws" }} + dist: "dremioS3:///{{ required "AWS bucket name required" $.Values.distStorage.aws.bucketName }}{{ required "AWS bucket path required" $.Values.distStorage.aws.path }}" + {{- else if eq $.Values.distStorage.type "azure" }} + dist: "dremioAdl://{{ required "Azure Datalake store name required" $.Values.distStorage.azure.datalakeStoreName }}.azuredatalakestore.net{{ required "Azure Datalake path required" $.Values.distStorage.azure.path }}" + {{- else if eq $.Values.distStorage.type "azureStorage" }} + dist: "dremioAzureStorage://:///{{ required "Azure Storage filesystem required" $.Values.distStorage.azureStorage.filesystem }}{{ required "Azure Storage path required" $.Values.distStorage.azureStorage.path }}" + {{- else -}} + {{ fail "Unrecognized distStorage type." }} + {{- end }} + {{- else }} + dist: "pdfs://"${paths.local}"/pdfs" + {{- end }} +} + +services: { + # The services running are controlled via command line options passed in + # while starting the services via kubernetes. Updating the values listed below will not + # impact what is running: + # - coordinator.enabled + # - coordinator.master.enabled + # - coordinator.master.embedded-zookeeper.enabled + # - executor.enabled + # + # Other service parameters can be customized via this file. + + {{- if and $.Values.executor.cloudCache.enabled (eq $.Values.executor.cloudCache.enabled true) }} + executor: { + cache: { + path.db: "/opt/dremio/cloudcache/c0" + pctquota.db: 100 + + path.fs: ["/opt/dremio/cloudcache/c0"] + pctquota.fs: [100] + ensurefreespace.fs: [0] + {{ range $index, $_ := rest $.Values.executor.cloudCache.volumes -}} + path.fs += "/opt/dremio/cloudcache/c{{ add1 $index }}" + pctquota.fs += 100 + ensurefreespace.fs += 0 + {{- end }} + } + } + {{- end }} +} + +{{- if and $.Values.executor.cloudCache.enabled (ne $.Values.distStorage.type "local") }} +debug: { + # Enable caching for distributed storage, it is turned off by default + dist.caching.enabled: true, + # Max percent of total available cache space to use when possible for distributed storage + dist.max.cache.space.percent: 100 +} +{{- end }} + +{{- if $.Values.coordinator.web.tls.enabled }} +services.coordinator.web.ssl.enabled: true +services.coordinator.web.ssl.auto-certificate.enabled: false +services.coordinator.web.ssl.keyStore: "/opt/dremio/tls/ui.pkcs12" +{{- end }} + +{{- if $.Values.coordinator.client.tls.enabled }} +# Client endpoint (i.e. ODBC/JDBC) encryption is only supported in Dremio Enterprise Edition. +services.coordinator.client-endpoint.ssl.enabled: true +services.coordinator.client-endpoint.ssl.auto-certificate.enabled: false +services.coordinator.client-endpoint.ssl.keyStore: "/opt/dremio/tls/client.pkcs12" +{{- end }} diff --git a/charts/dremio_v2/config/hive2/README.md b/charts/dremio_v2/config/hive2/README.md new file mode 100644 index 00000000..982beef2 --- /dev/null +++ b/charts/dremio_v2/config/hive2/README.md @@ -0,0 +1,2 @@ +### Hive 2 Configuration Files +This directory is used to store Hive 2 configuration files to be deployed to Dremio. \ No newline at end of file diff --git a/charts/dremio_v2/config/hive3/README.md b/charts/dremio_v2/config/hive3/README.md new file mode 100644 index 00000000..94c8c9da --- /dev/null +++ b/charts/dremio_v2/config/hive3/README.md @@ -0,0 +1,2 @@ +### Hive 3 Configuration Files +This directory is used to store Hive 3 configuration files to be deployed to Dremio. \ No newline at end of file diff --git a/charts/dremio_v2/config/logback-access.xml b/charts/dremio_v2/config/logback-access.xml new file mode 100644 index 00000000..a00ae338 --- /dev/null +++ b/charts/dremio_v2/config/logback-access.xml @@ -0,0 +1,51 @@ + + + + + + + + + ${dremio.log.path}/access.log + + ${dremio.log.path}/archive/access.%d{yyyy-MM-dd}.%i.log.gz + 30 + + 100MB + + + + + combined + + + + + + + + + combined + + + + + + + diff --git a/charts/dremio_v2/config/logback-admin.xml b/charts/dremio_v2/config/logback-admin.xml new file mode 100644 index 00000000..b393d02b --- /dev/null +++ b/charts/dremio_v2/config/logback-admin.xml @@ -0,0 +1,67 @@ + + + + + + %msg%n%ex{0}%n + + + + + + + ${dremio.admin.log.verbosity:-OFF} + + + %date{ISO8601} [%thread] %-5level %logger{30} - %msg%n + + + + + + + + + ${dremio.admin.log.verbosity:-OFF} + + ${dremio.admin.log.path} + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + + + + + + + + + + + + diff --git a/charts/dremio_v2/config/logback.xml b/charts/dremio_v2/config/logback.xml new file mode 100644 index 00000000..0ab3528b --- /dev/null +++ b/charts/dremio_v2/config/logback.xml @@ -0,0 +1,164 @@ + + + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + ${dremio.log.path}/server.log + + ${dremio.log.path}/archive/server.%d{yyyy-MM-dd}.%i.log.gz + 30 + + 100MB + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + ${dremio.log.path}/metadata_refresh.log + + ${dremio.log.path}/archive/metadata_refresh.%d{yyyy-MM-dd}.log.gz + 30 + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + ${dremio.log.path}/json/server.json + + ${dremio.log.path}/json/archive/server.%d{yyyy-MM-dd}.%i.json.gz + 30 + + 100MB + + + + + + {"timestamp": "%date{ISO8601}", "host":"${HOSTNAME}" } + thread + levelName + levelValue + logger + message + + + + + + + + ${dremio.log.path}/queries.json + + ${dremio.log.path}/archive/queries.%d{yyyy-MM-dd}.%i.json.gz + 30 + + 100MB + + + + + %msg%n + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${dremio.log.path}/hive.deprecated.function.warning.log + + ${dremio.log.path}/archive/hive.deprecated.function.warning.%d{yyyy-MM-dd}.%i.log.gz + 30 + + 100MB + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + + + + + + + + + + diff --git a/charts/dremio_v2/docs/README.md b/charts/dremio_v2/docs/README.md new file mode 100644 index 00000000..c83d00dc --- /dev/null +++ b/charts/dremio_v2/docs/README.md @@ -0,0 +1,13 @@ +# Dremio on Kubernetes Documentation + +* **Setup** + * [Important Setup Considerations](./setup/Important-Setup-Considerations.md) + * [Migrating Helm Chart Versions](./setup/Migrating-Helm-Chart-Versions.md) + * [Customizing Dremio Configuration](./setup/Customizing-Dremio-Configuration.md) + * [Setup Hive 2 and 3](./setup/Setup-Hive-2-and-3.md) +* **Administration** + * [Dremio Administration](./administration/Dremio-Administration.md) + * [Scaling Coordinators and Executors](./administration/Scaling-Coordinators-and-Executors.md) + * [Upgrading Dremio](./administration/Upgrading-Dremio.md) + * [Viewing Logs](./administration/Viewing-Logs.md) +* [`Values.yaml` Reference](./Values-Reference.md) \ No newline at end of file diff --git a/charts/dremio_v2/docs/Values-Reference.md b/charts/dremio_v2/docs/Values-Reference.md new file mode 100644 index 00000000..e89b4d6e --- /dev/null +++ b/charts/dremio_v2/docs/Values-Reference.md @@ -0,0 +1,1408 @@ +# `Values.yaml` Reference + +🔎 To search this document for specific values, use dot-notation to search, i.e. `coordinator.volumeSize`. + +ℹ️ In all code examples, `[...]` denotes additional values that have been omitted. + +## Top Level Values + +### Image Configuration + +#### `image` + +Type: String + +By default, the image is set to `dremio/dremio-oss`, the community edition of Dremio. + +The `image` refers to the location to retrieve the specific container image for Dremio. In some cases, the `image` value may vary in corporate environments where there may be a private container registry that is used. + +#### `imageTag` + +Type: String + +By default, the value is set to `latest`. + +It is **strongly** recommended to pin the version of Dremio that we are deploying by setting the `imageTag` to a precise version and not leave the value as latest. Since Dremio versions are not backwards compatible, leaving it as latest may automatically upgrade dremio during pod creation. + +#### `imagePullSecrets` + +Type: Array + +By default, this value is not set. + +In some environments, an internal mirror may be used that requires authentication. For enterprise users, you may need to specify the `imagePullSecret` for the Kubernetes cluster to have access to the Dremio enterprise image. Please refer to the documentation [Pull an Image from a Private Repository](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/) provided by Kubernetes on how to create an image pull secret. + +### Storage Configuration + +#### `storageClass` + +Type: String + +By default, this value is not set and will use the default storage class configured for the cluster. + +Storage class has a direct impact on the performance of the Dremio cluster. Optionally set this value to use the same storage class for all persistent volumes created. This value is independently overridable in each section ([`coordinator`](#coordinator), [`executor`](#executor), [`zookeeper`](#zookeeper)). + +More Info: See the [Storage Classes](https://kubernetes.io/docs/concepts/storage/storage-classes/) documentation for Kubernetes. + +### Annotations, Labels, Node Selectors, Tags, and Tolerations + +By default, these values are set to empty. These values are independently overridable in each section ([`coordinator`](#coordinator), [`executor`](#executor), [`zookeeper`](#zookeeper)). + +#### `annotations` + +Type: Dictionary + +The annotations set at this root level are used by all `StatefulSet` resources unless overridden in their respective configuration sections. + +For example, you can set annotations as follows: + +```yaml +annotations: + example-annotation-one: "example-value-one" + example-annotation-two: "example-value-two" +[...] +``` + +More Info: See the [Annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) documentation for Kubernetes. + +#### `podAnnotations` + +Type: Dictionary + +The pod annotations set at this root level are used by all `Pod` resources unless overridden in their respective configuration sections. + +For example, you can set pod annotations as follows: + +```yaml +podAnnotations: + example-pod-annotation-one: "example-value-one" + example-pod-annotation-two: "example-value-two" +[...] +``` + +More Info: See the [Annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) documentation for Kubernetes. + +#### `labels` + +Type: Dictionary + +The labels set at this root level are used by all `StatefulSet` resources unless overridden in their respective configuration sections. + +For example, you can set labels as follows: + +```yaml +labels: + example-label-one: "example-value-one" + example-label-two: "example-value-two" +[...] +``` + +More Info: See the [Labels and Selectors](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/) documentation for Kubernetes. + +#### `podLabels` + +Type: Dictionary + +The pod labels set at this root level are inherited by all `Pod` resources unless overridden in their respective configuration sections. + +For example, you can set pod labels as follows: + +```yaml +podLabels: + example-pod-label-one: "example-value-one" + example-pod-label-two: "example-value-two" +[...] +``` + +More Info: See the [Labels and Selectors](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/) documentation for Kubernetes. + +#### `nodeSelector` + +Type: Dictionary + +The node selectors set at this root level are inherited by all `Pod` resources unless overridden in their respective configuration sections. + +For example, you can set the node selector to select nodes that have a label `diskType` of value `ssd` as follows: + +```yaml +nodeSelector: + diskType: "ssd" +[...] +``` + +More Info: See the [nodeSelector](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) section of Assigning Pods to Nodes documentation for Kubernetes. + +#### `tolerations` + +Type: Array + +The tolerations set at this root level are inherited by all `Pod` resources unless overridden in their respective configuration sections. + +For example, if there is a node with the taint `example-key=example-value:NoSchedule`, you can set the tolerations to allow the pod to be scheduled as follows: + +```yaml +tolerations: +- key: "example-key" + operator: "Exists" + effect: "NoSchedule" +[...] +``` + +More Info: See the [Taints and Tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) documentation for Kubernetes. + +### Dremio Configuration + +#### `coordinator` + +Type: Dictionary + +This section controls the deployment of coordinator instance(s). See the [Coordinator Values](#coordinator-values) section. + +#### `executor` + +Type: Dictionary + +This section controls the deployment of executor instance(s). See the [Executor Values](#executor-values) section. + +#### `distStorage` + +Type: Dictionary + +This section controls Dremio's distributed storage configuration. See the [Distributed Storage Values](#distributed-storage-values) section. + +#### `service` + +Type: Dictionary + +This section controls Dremio's Kubernetes service which is exposed to end users of Dremio. See the [Service Values](#storage-values) section. + +### Zookeeper Configuration + +#### `zookeeper` + +Type: Dictionary + +This section controls the deployment of Zookeeper in Kubernetes. See the [Zookeeper Values](#zookeeper-values) section. + +### Advanced Configuration + +#### `extraStartParams` + +Type: String + +By default, this value is not set. + +This value controls additional parameters passed to the Dremio process. + +For example, to pass an additional system property to the java process, you can specify the following: + +```yaml +extraStartParams: >- + -DsomeTestKey=someValue +[...] +``` + +#### `extraInitContainers` + +Type: String + +By default, this value is not set. + +This value controls additional `initContainers` that are started as part of the initialization process for Dremio's pods. The value specified here may reference values specified in the built-in `Values` object in Helm. + +For example, to have an `initContainer` with the Dremio image, you can specify the following: + +```yaml +extraInitContainers: | + - name: dremio-hello-world + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + command: ["echo", "Hello World"] +[...] +``` + +#### `extraVolumes` + +Type: Array + +By default, this value is not set. + +This value controls additional volumes that are attached to the Dremio's pods. This specifies additional volumes that should be mountable to the containers in Dremio's pods. This value is typically used in conjunction with `extraVolumeMounts`. + +For example, if you have a `ConfigMap` named `cm-dremio-additional-files` with additional files that you want to include in the running Dremio pods, you can specify the following: + +```yaml +extraVolumes: +- name: dremio-additional-files + configMap: + name: cm-dremio-additional-files +[...] +``` + +#### `extraVolumeMounts` + +Type: Array + +By default, this value is not set. + +This value controls the additional volumes that should be mounted to the Dremio containers and the paths that each volume should be mounted at. This value is typically used in conjunction with `extraVolumes`. + +For example, if you have set the above `extraVolumes` value as shown in the example, you can map this volume into the path `/additional-files` as follows: + +```yaml +extraVolumeMounts: +- name: dremio-additional-files + mountPath: "/additional-files" +[...] +``` + +## Coordinator Values + +### General Configuration + +#### `coordinator.cpu` & `coordinator.memory` + +Type: Integer + +By default, the value of `cpu` is `15` and the value of memory is `122800` (MB). + +The values for `cpu` and `memory` control the amount of CPU and memory in MB being requested for each coordinator instance for the purposes of scheduling a coordinator to a specific node in the Kubernetes cluster. + +***Note***: While the values specified are not upper bounds, the value of `memory` specified here is used by the chart to calculate the allocation of heap and direct memory used by Dremio. + +#### `coordinator.count` + +Type: Integer + +By default, the value is set to `0`. + +Increasing this number controls the *secondary* coordinators that are launched as part of the deployment. Regardless of this value, at minimum one master coordinator is launched as part of the deployment. The total number of coordinator instances launched will always be `coordinator.count + 1`. + +#### `coordinator.volumeSize` + +Type: String + +By default, the value is set to `128Gi`. + +The coordinator volume is used to store the RocksDB KV store and requires a performant disk. In most hosted Kubernetes environments, disk performance is determined by the size of the volume. + +### Web UI + +#### `coordinator.web.port` + +Type: Integer + +By default, the value is set to `9047`. + +To change the port that Dremio listens on, change the port to a desired value. The valid range of ports is 1 to 65535. + +#### `coordinator.web.tls.enabled` + +Type: Boolean + +By default, the value is set to `false`. + +To enable TLS on the web UI, set this value to `true`. Also, provide a value for `coordinator.web.tls.secret` that corresponds with the TLS secret that should be used. + +#### `coordinator.web.tls.secret` + +Type: String + +By default, the value is set to `dremio-tls-secret-ui`. + +This value is ignored if `coordinator.web.tls.enabled` is not set to `true`. This value should reference the TLS secret object in Kubernetes that contains the certificate for the client JDBC/ODBC connections. + +For example, to have TLS enabled for the web UI using a certificate created called `dremio-tls-secret-ui`, you can set the configuration as follows: + +```yaml +coordinator: + [...] + web: + tls: + enabled: true + secret: dremio-tls-secret-ui +[...] +``` + +To create a secret, use the following command: `kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE}` providing appropriate values for `TLS_SECRET_NAME`, `KEY_FILE`, `CERT_FILE`. + +***Note***: Dremio does not support auto-rotation of secrets. To update the secret used by Dremio, restart the coordinator pods to have the new TLS secret take effect. + +More Info: See the [Creating your own Secrets](https://kubernetes.io/docs/concepts/configuration/secret/#creating-your-own-secrets) section of the Secrets documentation for Kubernetes. + +### Client (JDBC/ODBC) + +#### `coordinator.client.tls.enabled` + +Type: Boolean + +By default, the value is set to `false`. This is an **enterprise only feature** and should not be set to true when using a community edition of Dremio. + +To enable TLS on the client ODBC/JDBC port, set this value to `true`. Also, provide a value for `coordinator.client.tls.secret` that corresponds with the TLS secret that should be used. + +#### `coordinator.client.tls.secret` + +Type: String + +By default, the value is set to `dremio-tls-secret-client`. + +This value is ignored if `coordinator.web.tls.enabled` is not set to `true`. This value should reference the TLS secret object in Kubernetes that contains the certificate for the client JDBC/ODBC connections. + +For example, to have TLS enabled for the client JDBC/ODBC connections using a certificate created called `dremio-tls-secret-client`, you can set the configuration as follows: + +```yaml +coordinator: + [...] + client: + tls: + enabled: true + secret: dremio-tls-secret-client +[...] +``` + +To create a secret, use the following command: `kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE}` providing appropriate values for `TLS_SECRET_NAME`, `KEY_FILE`, `CERT_FILE`. + +***Note***: Dremio does not support auto-rotation of secrets. To update the secret used by Dremio, restart the coordinator pods to have the new TLS secret take effect. + +More Info: See the [Creating your own Secrets](https://kubernetes.io/docs/concepts/configuration/secret/#creating-your-own-secrets) section of the Secrets documentation for Kubernetes. + +### Annotations, Labels, Node Selectors, Tags, and Tolerations + +By default, these values are not set. If the value is omitted or set to an empty array/dictionary, this value will be inherited from the top level equivalent. For more information about these configuration values, please refer to the top level equivalents of these values. + +#### `coordinator.annotations` + +Type: Dictionary + +The annotations set are used by all coordinator `StatefulSet` resources. + +For example, you can set annotations as follows: + +```yaml +coordinator: + [...] + annotations: + example-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`annotations`](#annotations) section of this reference. + +#### `coordinator.podAnnotations` + +Type: Dictionary + +The pod annotations set are used by all `Pod`(s) created by the coordinator `StatefulSet`(s). + +For example, you can set pod annotations as follows: + +```yaml +coordinator: + [...] + podAnnotations: + example-pod-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`podAnnotations`](#podannotations) section of this reference. + +#### `coordinator.labels` + +Type: Dictionary + +The labels set are used by all coordinator `StatefulSet` resources. + +For example, you can set labels as follows: + +```yaml +coordinator: + [...] + labels: + example-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`labels`](#labels) section of this reference. + +#### `coordinator.podLabels` + +Type: Dictionary + +The pod labels set are used by all `Pod`(s) created by the coordinator `StatefulSet`(s). + +For example, you can set pod labels as follows: + +```yaml +coordinator: + [...] + podLabels: + example-pod-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`podLabels`](#podlabels) section of this reference. + +#### `coordinator.nodeSelector` + +Type: Array + +The node selectors set are used by all `Pod`(s) created by the coordinator `StatefulSet`(s). + +For example, you can set node selectors as follows: + +```yaml +coordinator: + [...] + nodeSelector: + diskType: "ssd" +[...] +``` + +More Info: Refer to the [`nodeSelector`](#nodeselector) section of this reference. + +### Advanced Customizations + +#### `coordinator.storageClass` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `storageClass`. + +Storage class has a direct impact on the performance of the Dremio cluster. On the master coordinator node, RocksDB is stored on the persistent volume created with this storage class. + +More Info: Refer to the [`storageClass`](#storageclass) section of this reference. + +#### `coordinator.extraStartParams` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `extraStartParams`. + +This value controls additional parameters passed to the Dremio process. + +For example, to pass an additional system property to the java process, you can specify the following: + +```yaml +coordinator: + [...] + extraStartParams: >- + -DsomeTestKey=someValue +[...] +``` + +More Info: Refer to the [`extraStartParams`](#extrastartparams) section of this reference. + +#### `coordinator.extraInitContainers` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `extraInitContainers`. + +This value controls additional `initContainers` that are started as part of the initialization process for Dremio's coordinator pods. The value specified here may reference values specified in the `values.yaml` file. + +For example, to have an `initContainer` with the Dremio image, you can specify the following: + +```yaml +coordinator: + [...] + extraInitContainers: | + - name: dremio-hello-world + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + command: ["echo", "Hello World"] +[...] +``` + +More Info: Refer to the [`extraInitContainers`](#extrainitcontainers) section of this reference. + +#### `coordinator.extraVolumes` + +Type: Array + +By default, this value is not set. If this value is omitted or set to an empty array, this value will be inherited from the top level `extraVolumes`. + +This value controls additional volumes that are attached to the Dremio coordinator pod. This specifies additional volumes that should be mountable to the containers in the Dremio coordinator pod. This value is typically used in conjunction with `coordinator.extraVolumeMounts`. + +For example, if you have a `ConfigMap` named `cm-dremio-additional-files` with additional files that you want to include in the running Dremio coordinator pods, you can specify the following: + +```yaml +coordinator: + [...] + extraVolumes: + - name: dremio-additional-files + configMap: + name: cm-dremio-additional-files +[...] +``` + +More Info: Refer to the [`extraVolumes`](#extravolumes) section of this reference. + +#### `coordinator.extraVolumeMounts` + +Type: Array + +By default, this value is not set. If this value is omitted or set to an empty array, this value will be inherited from the top level `extraVolumeMounts`. + +This value controls the additional volumes that should be mounted to the Dremio coordinator container and the paths that the volume should be mounted at. This value is typically used in conjunction with `coordinator.extraVolumes`. + +For example, if you have set the above `coordinator.extraVolumes` value as shown in the example, you can map this volume into the path `/additional-files` as follows: + +```yaml +coordinator: + [...] + extraVolumeMounts: + - name: dremio-additional-files + mountPath: "/additional-files" +[...] +``` + +More Info: Refer to the [`extraVolumeMounts`](#extravolumemounts) section of this reference. + +## Executor Values + +### General Configuration + +#### `executor.cpu` & `executor.memory` + +Type: Integer + +By default, the value of `cpu` is `15` and the value of memory is `122800` (MB). This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +The values for `cpu` and `memory` control the amount of CPU and memory in MB being requested for each executor instance for the purposes of scheduling an executor to a specific node in the Kubernetes cluster. + +***Note***: While the values specified are not upper bounds, the value of `memory` specified here is used by the chart to calculate the allocation of heap and direct memory used by Dremio. + +#### `executor.engines` + +Type: Array + +By default, the value is `["default"]`. + +By adding additional values to this list, additional sets of executors are launched. By default, each set of executors will start with `executor.count` number of pods. See the Per-Engine Overrides section of this reference to customize the number of executors are started. + +#### `executor.count` + +Type: Integer + +By default, the value is set to `3`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +Increasing this number controls the number of executors that are launched as part of the engine. Without per-engine overrides, the total number of executor pods started is calulated as the `length(executor.engines) * executor.count`. + +#### `executor.volumeSize` + +Type: String + +By default, the value is set to `128Gi`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +The executor volume is used to store results of queries run. If the `distStore.type` is set to `local`, additional resources such as accelerations may be stored in the volume. In most hosted Kubernetes environments, disk performance is determined by the size of the volume. + +### Columnar Cloud Cache (C3) Configuration + +#### `executor.cloudCache.enabled` + +Type: Boolean + +By default, the value is set to `true`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +Columnar cloud cache (C3) is enabled by default on executors. To turn off cloud cache, set this value to `false`. + +#### `executor.cloudCache.storageClass` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from `executor.storageClass` or its parent value `storageClass`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +C3 is designed for usage with local NVMe storage devices. If available, it is recommended to setup a [local storage provisioner](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner/blob/master/docs/getting-started.md) to allow Dremio to utilize local NVMe storage on the Kubernetes nodes. + +#### `executor.cloudCache.volumes` + +Type: Array + +By default, the value is set to `[{size: 100Gi}]`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +By specifying more than one item in the list, additional volumes are provisioned for C3. Each volume must specify a `size` and optionally a `name` and custom `storageClass`. If the volume omits the `storageClass`, the value of `executor.cloudCache.storageClass` or its parent values are used. + +For example, if the Kubernetes nodes that are provisioned have three local NVMe storage devices available, then we can create three C3 cache volumes each using a different `size` and combination of custom `name` and `storageClass` values: + +```yaml +executor: + [...] + cloudCache: + volumes: + - size: 300Gi + - name: "executor-c3-0" + size: 100Gi + storageClass: "local-nvme" + - size: 50Gi + storageClass: "local-nvme" +[...] +``` + +### Annotations, Labels, Node Selectors, Tags, and Tolerations + +By default, these values are not set. If the value is omitted or set to an empty array/dictionary, this value will be inherited from the top level equivalent. For more information about these configuration values, please refer to the top level equivalents of these values. + +#### `executor.annotations` + +Type: Dictionary + +The annotations set are used by all executor `StatefulSet` resources. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +For example, you can set annotations as follows: + +```yaml +executor: + [...] + annotations: + example-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`annotations`](#annotations) section of this reference. + +#### `executor.podAnnotations` + +Type: Dictionary + +The pod annotations set are used by all `Pod`(s) created by the executor `StatefulSet`(s). This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +For example, you can set pod annotations as follows: + +```yaml +executor: + [...] + podAnnotations: + example-pod-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`podAnnotations`](#podannotations) section of this reference. + +#### `executor.labels` + +Type: Dictionary + +The labels set are used by all executor `StatefulSet` resources. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +For example, you can set labels as follows: + +```yaml +executor: + [...] + labels: + example-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`labels`](#labels) section of this reference. + +#### `executor.podLabels` + +Type: Dictionary + +The pod labels set are used by all `Pod`(s) created by the executor `StatefulSet`(s). This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +For example, you can set pod labels as follows: + +```yaml +executor: + [...] + podLabels: + example-pod-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`podLabels`](#podlabels) section of this reference. + +#### `executor.nodeSelector` + +Type: Array + +The node selectors set are used by all `Pod`(s) created by the executor `StatefulSet`(s). This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +For example, you can set node selectors as follows: + +```yaml +executor: + [...] + nodeSelector: + diskType: "ssd" +[...] +``` + +More Info: Refer to the [`nodeSelector`](#nodeselector) section of this reference. + +### Advanced Customizations + +#### `executor.storageClass` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `storageClass`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +Storage class has a direct impact on the performance of the Dremio cluster. On the master coordinator node, RocksDB is stored on the persistent volume created with this storage class. + +More Info: Refer to the [`storageClass`](#storageclass) section of this reference. + +#### `executor.extraStartParams` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `extraStartParams`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +This value controls additional parameters passed to the Dremio process. + +For example, to pass an additional system property to the java process, you can specify the following: + +```yaml +coordinator: + [...] + extraStartParams: >- + -DsomeTestKey=someValue +[...] +``` + +More Info: Refer to the [`extraStartParams`](#extrastartparams) section of this reference. + +#### `executor.extraInitContainers` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `extraInitContainers`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +This value controls additional `initContainers` that are started as part of the initialization process for Dremio's executor pods. The value specified here may reference values specified in the `values.yaml` file. + +For example, to have an `initContainer` with the Dremio image, you can specify the following: + +```yaml +coordinator: + [...] + extraInitContainers: | + - name: dremio-hello-world + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + command: ["echo", "Hello World"] +[...] +``` + +More Info: Refer to the [`extraInitContainers`](#extrainitcontainers) section of this reference. + +#### `executor.extraVolumes` + +Type: Array + +By default, this value is not set. If this value is omitted or set to an empty array, this value will be inherited from the top level `extraVolumes`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +This value controls additional volumes that are attached to the Dremio executor pod. This specifies additional volumes that should be mountable to the containers in the Dremio executor pod. This value is typically used in conjunction with `executor.extraVolumeMounts`. + +For example, if you have a `ConfigMap` named `cm-dremio-additional-files` with additional files that you want to include in the running Dremio executor pods, you can specify the following: + +```yaml +executor: + [...] + extraVolumes: + - name: dremio-additional-files + configMap: + name: cm-dremio-additional-files +[...] +``` + +More Info: Refer to the [`extraVolumes`](#extravolumes) section of this reference. + +#### `executor.extraVolumeMounts` + +Type: Array + +By default, this value is not set. If this value is omitted or set to an empty array, this value will be inherited from the top level `extraVolumeMounts`. This value can be set on a **per-engine basis**, see the [Per-Engine Configuration](#per-engine-configuration) section. + +This value controls the additional volumes that should be mounted to the Dremio executor container and the paths that the volume should be mounted at. This value is typically used in conjunction with `executor.extraVolumes`. + +For example, if you have set the above `executor.extraVolumes` value as shown in the example, you can map this volume into the path `/additional-files` as follows: + +```yaml +executor: + [...] + extraVolumeMounts: + - name: dremio-additional-files + mountPath: "/additional-files" +[...] +``` + +More Info: Refer to the [`extraVolumeMounts`](#extravolumemounts) section of this reference. + +### Per-Engine Configuration + +#### `executor.engineOverride.` + +Type: Dictionary + +By default, this value is not set. + +Engine overrides use the name of the engine provided in the `executor.engines` array to allow customization on a per-engine basis. The value of `` should be the name of an engine provided in `executor.engines`. + +For example, the following shows all the supported override values being set (which override the shared values from `executor`): + +```yaml +executor: + [...] + engineOverride: + : + cpu: 4 + memory: 144800 + + count: 2 + + annotations: + example-annotation-one: "example-value-one" + example-annotation-two: "example-value-two" + podAnnotations: + example-pod-annotation-one: "example-value-one" + example-pod-annotation-two: "example-value-two" + labels: + example-label-one: "example-value-one" + example-label-two: "example-value-two" + podLabels: + example-pod-label-one: "example-value-one" + example-pod-label-two: "example-value-two" + nodeSelector: + diskType: "ssd" + tolerations: + - key: "example-key" + operator: "Exists" + effect: "NoSchedule" + + extraStartParams: >- + -DsomeTestKey=someValue + + extraInitContainers: | + - name: dremio-hello-world + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + command: ["echo", "Hello World"] + + extraVolumes: + - name: dremio-additional-files + configMap: + name: cm-dremio-additional-files + + extraVolumeMounts: + - name: dremio-additional-files + mountPath: "/additional-files" + + volumeSize: 50Gi + storageClass: "managed-premium" + + cloudCache: + enabled: true + + storageClass: "local-nvme" + + volume: + - size: 300Gi + - name: "executor-c3-0" + size: 100Gi + storageClass: "local-nvme" + - size: 50Gi + storageClass: "local-nvme" +[...] +``` + +#### `executor.engineOverride..volumeClaimName` + +Type: String + +By default, this value is not set. + +When set, this will be the volume claim name used for the peristent volume by an engine. Unless moving from an old Helm chart with existing volume claims that must be retained, this value should not be used. This value should only be used for the `default` engine as persistent volume claims are pod name dependent as well and non-`default` engines will not match the pod name required. + +For example, if moving from an old Helm chart that used `dremio-executor-volume`, you can continue to use the volumes for the `default` engine by specifying the following: + +```yaml +executor: + [...] + engineOverride: + default: + volumeClaimName: dremio-executor-volume +[...] +``` + +## Distributed Storage Values + +### General Configuration + +#### `distStorage.type` + +Type: String + +By default, this value is set to `local`. + +The valid values for `distStorage.type` are `local` (not recommended), `aws`, `azure`, or `azureStorage`. For specific configuration values for each, see the associated sections: + +* `aws` (S3): [AWS S3](#aws-s3) +* `azure` (Azure ADLS Gen 1): [Azure ADLS Gen 1](#azure-adls-gen-1) +* `azureStorage` (Azure Storage Gen2): [Azure Storage Gen2](#azure-storage-gen2) + +For example, to use AWS S3 as the distributed storage location, you can specify the following: + +```yaml +distStorage: + [...] + type: "aws" +[...] +``` + +### AWS S3 + +#### `distStorage.aws.bucketName` + +Type: String + +By default, this value is set to `AWS Bucket Name` and must be changed to a valid bucket name. + +Specify a valid bucket name that Dremio has write access to. For the required permissions, please see the [Amazon S3](http://docs.dremio.com/deployment/dist-store-config.html#amazon-s3) section of the Configuration Distributed Storage documentation for Dremio. + +#### `distStorage.aws.path` + +Type: String + +By default, this value is set to `/`. + +Dremio will write to the root path of the provided bucket. Set this value to an alternative path if you would like Dremio to write its contents to a subdirectory. + +#### `distStorage.aws.authentication` + +Type: String + +By default, this value is set to `metadata`. + +The valid values for `distStorage.aws.authentication` are `metadata` or `accessKeySecret`. When set to `metadata`, Dremio will attempt to use the instance profile of the EKS node to authenticate to the S3 bucket. When set to `accesskeySecret`, the values `distStorage.aws.credentials.accessKey` and `distStorage.aws.credentials.secret` are used to authenticate. + +***Note***: Dremio does not support service account IAM roles on EKS. + +#### Credentials for AWS S3 + +When providing credentials, both `distStorage.aws.credentials.accessKey` and `distStorage.aws.credentials.secret` should be provided. + +For example, the following `distStorage` configuration may be used: + +```yaml +distStorage: + [...] + aws: + bucketName: "demo.dremio.com" + path: "/" + authentication: "accessKeySecret" + credentials: + accessKey: "SOME_VALID_KEY" + secret: "SOME_VALID_SECRET" +[...] +``` + +##### `distStorage.aws.credentials.accessKey` + +Type: String + +By default, this value is not set. + +For Dremio to authenticate via access key and secret, provide a valid access key value. + +##### `distStorage.aws.credentials.secret` + +Type: String + +By default, this value is not set. + +For Dremio to authenticate via access key and secret, provide a valid secret value. + +#### Advanced Configuration for AWS S3 + +##### `distStorage.aws.extraProperties` + +Type: String + +By default, this value is not set. + +This value can be used to specify additional properties to `core-site.xml` which is used to configure properties for the distributed storage source. + +For example, to set the S3 endpoint, you can do the following: + +```yaml +distStorage: + aws: + [...] + extraProperties: | + + fs.s3a.endpoint + s3.us-west-2.amazonaws.com + +[...] +``` + +### Azure ADLS Gen 1 + +#### `distStorage.azure.datalakeStoreName` + +Type: String + +By default, this value is set to `Azure Datalake Store Name` and must be changed to a valid ADLS datalake store name. + +Specify a valid datalake store name that Dremio has write access to. For the required permissions, please see the [Azure Configuration](http://docs.dremio.com/data-sources/azure-data-lake-store.html#azure-configuration) section of the Azure Data Lake Storage Gen1 documentation for Dremio. + +#### `distStorage.azure.path` + +Type: String + +By default, this value is set to `/`. + +Dremio will write to the root path of the provided datalake store. Set this value to an alternative path if you would like Dremio to write its contents to a subdirectory. + +#### Credentials for Azure ADLS Gen 1 + +##### `distStorage.azure.credentials.applicationId` + +Type: String + +By default, this value is set to `Azure Application ID` and must be changed to a valid Azure Application ID. + +For Dremio to authenticate to the datalake store, provide a valid application ID. + +##### `distStorage.azure.credentials.secret` + +Type: String + +By default, this value is set to `Azure Application Secret` and must be changed to a valid Azure Application Secret. + +For Dremio to authenticate to the datalake store, provide a valid secret value. + +##### `distStorage.azure.credentials.oauth2Endpoint` + +Type: String + +By default, this value is set to `Azure OAuth2 Endpoint` and must be changed to a valid Azure OAuth2 endpoint. + +For Dremio to authenticate to the datalake store, provide a valid OAuth2 endpoint. + +#### Advanced Configuration for Azure ADLS Gen 1 + +##### `distStorage.azure.extraProperties` + +Type: String + +By default, this value is not set. + +This value can be used to specify additional properties to `core-site.xml` which is used to configure properties for the distributed storage source. + +For example, to disable the cache (this value should not be set in production), you can do the following: + +```yaml +distStorage: + aws: + [...] + extraProperties: | + + fs.adl.impl.disable.cache + true + +``` + +### Azure Storage Gen2 + +#### `distStorage.azureStorage.accountName` + +Type: String + +By default, this value is set to `Azure Storage Account Name` and must be changed to a valid Azure Storage account name. + +Specify a valid datalake store name that Dremio has write access to. For the required permissions, please see the Azure Ac section of the Azure Data Lake Storage Gen1 documentation for Dremio. + +#### `distStorage.azureStorage.filesystem` + +Type: String + +By default, this value is set to `Azure Storage Account Blob Container` and must be changed to a valid Azure Storage blob container. + +Specify a valid Azure Storage blob container that Dremio has write access to. + +#### `distStorage.azureStorage.path` + +Type: String + +By default, this value is set to `/`. + +Dremio will write to the root path of the provided Azure Storage blob container. Set this value to an alternative path if you would like Dremio to write its contents to a subdirectory. + +#### Credentials for Azure Storage Gen2 + +##### `distStorage.azureStorage.credentials.accessKey` + +Type: String + +By default, this value is set to `Azure Storage Account Access Key` and must be changed to a valid access key. + +For Dremio to authenticate to the provided Azure Storage blob container, provide a valid access key. + +#### Advanced Configuration for Azure Storage Gen2 + +##### `distStorage.azureStorage.extraProperties` + +Type: String + +By default, this value is not set. + +This value can be used to specify additional properties to `core-site.xml` which is used to configure properties for the distributed storage source. + +For example, to disable SSL connections (this value should not be set in production), you can do the following: + +```yaml +distStorage: + aws: + [...] + extraProperties: | + + dremio.azure.secure + false + +[...] +``` + +## Storage Values + +### General Configuration + +#### `service.type` + +Type: String + +By default, this value is set to `LoadBalancer`. + +In some environments, a `LoadBalancer` may not be available. You may alternatively set the type to `ClusterIP` for cluster-only usage of Dremio or `NodePort` to make the service available via the port on the Kubernetes node. + +For example, to make Dremio only accessible in the Kubernetes cluster, you can do the following: + +```yaml +service: + [...] + type: ClusterIP +[...] +``` + +#### `service.sessionAffinity` + +Type: Boolean + +By default, this value is not set, which defaults to `false`. + +To enable session affinity, set this value to `true`. Session affinity is critical for the web UI when there `coordinator.count` is greater than 0. + +### Annotations and Labels + +By default, these values are not set. If the value is omitted or set to an empty array/dictionary, this value will be inherited from the top level equivalent. For more information about these configuration values, please refer to the top level equivalents of these values. + +#### `service.annotations` + +Type: Dictionary + +The annotations set are used by the `Service` resource. + +For example, you can set annotations as follows: + +```yaml +service: + [...] + annotations: + example-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`annotations`](#annotations) section of this reference. + +#### `service.labels` + +Type: Dictionary + +The labels set are used by the `Service` resource. + +For example, you can set labels as follows: + +```yaml +coordinator: + [...] + labels: + example-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`labels`](#labels) section of this reference. + +### Load Balancer + +#### `service.internalLoadBalancer` + +Type: Boolean + +By default, this value is not set, which defaults to `false`. + +When enabling this property, additional annotations are added to the pod for using an internal IP for the load balancer. Specifically, the following annotations are added which provide support for AWS, AKS, and GKE load balancers: + +- `service.beta.kubernetes.io/azure-load-balancer-internal: "true"` +- `cloud.google.com/load-balancer-type: "Internal"` +- `service.beta.kubernetes.io/aws-load-balancer-internal: 0.0.0.0/0` + +If these values are not applicable for your Kubernetes cluster, use the [`service.annotations`](#serviceannotations) value to provide a custom annotation that applies to your load balancer. + +#### `service.loadBalancerIP` + +Type: String + +By default, this value is not set. + +When setting this property, the load balancer attempts to use the provided IP address instead of dynamically allocating one. This IP address should be a static IP address that is usable by Kubernetes. + +## Zookeeper Values + +### Image Configuration + +#### `zookeeper.image` + +Type: String + +By default, the value is set to `k8s.gcr.io/kubernetes-zookeeper`. + +The `image` refers to the location to retrieve the specific container image for Zookeeper. In some cases, the `zookeeper.image` value may vary in corporate environments where there may be a private container registry that is used. + +#### `zookeeper.imageTag` + +Type: String + +By default, the value is set to `1.0-3.4.10`. + +The version of Zookeeper set has been validated by Dremio to work with the Dremio software. Changing this version is not recommended unless the tag is different due to a private container registry name difference. + +### General Configuration + +#### `zookeeeper.cpu` & `zookeeper.memory` + +Type: Integer + +By default, the value of `cpu` is `0.5` and the value of memory is `1024` (MB). + +The values for `cpu` and `memory` control the amount of CPU and memory in MB being requested for each Zookeeper instance for the purposes of scheduling a Zookeeper to a specific node in the Kubernetes cluster. + +#### `zookeeper.count` + +Type: Integer + +By default, the value is set to `3`. + +This number sets the number of instances of Zookeeper to deploy. It is recommended to have a minimum of 3 to maintain a quorum. Changing the value below 3 may cause instability in the cluster. + +#### `zookeeper.volumeSize` + +Type: String + +By default, the value is set to `10Gi`. + +The Zookeeper volume is used for the WAL (Write Ahead Log) used by Zookeeper in the event of a crash. + +### Annotations, Labels, Node Selectors, Tags, and Tolerations + +By default, these values are not set. If the value is omitted or set to an empty array/dictionary, this value will be inherited from the top level equivalent. For more information about these configuration values, please refer to the top level equivalents of these values. + +#### `zookeeper.annotations` + +Type: Dictionary + +The annotations set are used by the Zookeeper `StatefulSet` resource. + +For example, you can set annotations as follows: + +```yaml +zookeeper: + [...] + annotations: + example-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`annotations`](#annotations) section of this reference. + +#### `zookeeper.podAnnotations` + +Type: Dictionary + +The pod annotations set are used by all `Pod`(s) created by the Zookeeper `StatefulSet`. + +For example, you can set pod annotations as follows: + +```yaml +zookeeper: + [...] + podAnnotations: + example-pod-annotation-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`podAnnotations`](#podannotations) section of this reference. + +#### `zookeeper.labels` + +Type: Dictionary + +The labels set are used by the Zookeeper `StatefulSet` . + +For example, you can set labels as follows: + +```yaml +zookeeper: + [...] + labels: + example-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`labels`](#labels) section of this reference. + +#### `zookeeper.podLabels` + +Type: Dictionary + +The pod labels set are used by all `Pod`(s) created by the Zookeeper `StatefulSet`. + +For example, you can set pod labels as follows: + +```yaml +zookeeper: + [...] + podLabels: + example-pod-label-one: "example-value-one" +[...] +``` + +More Info: Refer to the [`podLabels`](#podlabels) section of this reference. + +#### `zookeeper.nodeSelector` + +Type: Array + +The node selectors set are used by all `Pod`(s) created by the Zookeeper `StatefulSet`. + +For example, you can set node selectors as follows: + +```yaml +zookeeper: + [...] + nodeSelector: + diskType: "ssd" +[...] +``` + +More Info: Refer to the [`nodeSelector`](#nodeselector) section of this reference. + +### Advanced Customizations + +#### `zookeeper.storageClass` + +Type: String + +By default, this value is not set. If this value is omitted or set to an empty string, this value will be inherited from the top level `storageClass`. + +Storage class has an impact on the performance of the Zookeeper instances when writing the WAL and reading back data in the event of a crash. A more performant storage class may impact recovery times in the event of such a crash. + +More Info: Refer to the [`storageClass`](#storageclass) section of this reference. + diff --git a/charts/dremio_v2/docs/administration/Dremio-Administration.md b/charts/dremio_v2/docs/administration/Dremio-Administration.md new file mode 100644 index 00000000..5499cc4c --- /dev/null +++ b/charts/dremio_v2/docs/administration/Dremio-Administration.md @@ -0,0 +1,47 @@ +# Dremio Administration + +Administration commands restore, cleanup and set-password in dremio-admin needs to be run when the Dremio cluster is not running. So, before running these commands, you need to shutdown the Dremio cluster. + +As part of the Helm chart, we support invoking the `dremio-admin` commands via a `dremio-admin` pod. Consult the [Admin CLI](https://docs.dremio.com/advanced-administration/dremio-admin-cli.html) documentation for Dremio for a complete list of `dremio-admin` commands that can be invoked. + +**Starting Dremio Admin Pod** + +The `dremio-admin` pod is created via the Helm chart. During this process, Dremio will become unavailable to end users as the other pods are shutdown during this process. + +To invoke the `dremio-admin` pod, use the following Helm command: + +```bash +$ helm upgrade dremio_v2 --reuse-values --set DremioAdmin=true +``` + +**Stopping Dremio Admin Pod** + +To stop the `dremio-admin` pod and restart the other Dremio pods, use the following Helm command: + +```bash +$ helm upgrade dremio_v2 --resuse-values --set DremioAdmin=false +``` + +**Connecting to the Dremio Admin Pod** + +Once you have started the `dremio-admin` pod, you can use the following command to access the pod: + +```bash +$ kubectl exec -it dremio-admin -- bash +``` + +The above command will connect you to the dremio-admin pod. Once there, you can invoke the `dremio-admin` commands normally from within the pod. + +**Copying Files** + +To copy contents from the `dremio-admin` pod, you can use the following command: + +```bash +$ kubectl cp dremio-admin: +``` + +For example, to copy the contents of the Dremio `db` directory to a `db_backup` directory on your local machine, you can do the following: + +```bash +$ kubectl cp dremio-admin:data/db db_backup +``` diff --git a/charts/dremio_v2/docs/administration/Scaling-Coordinators-and-Executors.md b/charts/dremio_v2/docs/administration/Scaling-Coordinators-and-Executors.md new file mode 100644 index 00000000..c5b47029 --- /dev/null +++ b/charts/dremio_v2/docs/administration/Scaling-Coordinators-and-Executors.md @@ -0,0 +1,47 @@ +# Scaling Coordinators and Executors + +**Temporarily Scaling Coordinators and Executors** + +*Coordinators*: To temporarily scale the coordinator nodes that you have, modify the number of replicas for the `dremio-coordinator` StatefulSet using the following command. + +```bash +$ kubectl scale statefulsets dremio-coordinator --replicas= +``` + +This number should represent the number of *secondary* coordinators that you want. Setting this number to zero will remove all secondary coordinators and leave a single master coordinator. + +*Executors*: To temporarily scale the number of executors, locate the StatefulSet for the engine you wish to scale. + +To see the StatefulSets that exist, use the following command: + +```bash +$ kubectl get statefulsets +``` + +Then, to scale a specific engine, modify the number of replicas for the associated StatefulSet using the following command: + +```bash +$ kubectl scale statefulsets --replicas= +``` + +**Permanently Scaling Coordinators and Executors** + +1. Get the name of the Helm release. In the example below, the release name is `dremio`: + +```bash +$ helm list +NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION +dremio helm-demo 1 2020-08-10 08:45:20.038011 -0700 PDT deployed dremio-2.0.0 +``` + +Adding additional resources should be done by modifying your`values.local.yaml` file. + +* To modify the number of secondary coordinators, modify the value `coordinator.count` to be greater than 0. +* To modify the number of executors, modify the `executor.count`. If you have more than one engine and wish to scale a specific engine, see the [`executor.engineOverride`](../Values-Reference.md#executorengineoverride) section of the `Values.yaml` Reference documentation. + +Once you have made the appropriate customizations, run the following command to update your deployment with the changes: + +```bash +$ helm upgrade dremio_v2 -f values.local.yaml +``` + diff --git a/charts/dremio_v2/docs/administration/Upgrading-Dremio.md b/charts/dremio_v2/docs/administration/Upgrading-Dremio.md new file mode 100644 index 00000000..47acd446 --- /dev/null +++ b/charts/dremio_v2/docs/administration/Upgrading-Dremio.md @@ -0,0 +1,33 @@ +# Upgrading Dremio + +1. Ensure that you have completed a backup of Dremio. See the [Dremio Administration](./Dremio-Administration.md) documentation on how to access `dremio-admin` commands to complete a backup prior to upgrading. +2. Update the Dremio `imageTag` value in your values.yaml file. + + For example, to update to `4.7.0`, update the tag to the following: + +```yaml +imageTag: 4.7.0 +[...] +``` + +3. Get the name of the Helm release. In the example below, the release name is `dremio`. + +```bash +$ helm list +NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION +dremio helm-demo 1 2020-08-10 08:45:20.038011 -0700 PDT deployed dremio-2.0.0 +``` + +4. Upgrade the deployment via `helm` upgrade command: + +```bash +$ helm upgrade dremio_v2 -f values.local.yaml +``` + +The existing pods will be terminated and new pods will be created with the new image. You can monitor the status of the pods by running: + +```bash +$ kubectl get pods +``` + +Once all the pods are restarted and running, your Dremio cluster is upgraded. \ No newline at end of file diff --git a/charts/dremio_v2/docs/administration/Viewing-Logs.md b/charts/dremio_v2/docs/administration/Viewing-Logs.md new file mode 100644 index 00000000..bb07cf52 --- /dev/null +++ b/charts/dremio_v2/docs/administration/Viewing-Logs.md @@ -0,0 +1,26 @@ +# Viewing Logs + +**Dremio Logs** + +By default, Dremio logs are written to the container's console. All the logs - `server.log`, `server.gc` and `access.log` - are written into the console simultaneously. + +You can view the logs using `kubectl logs`: + +```bash +$ kubectl logs +``` + +You can also tail the logs using the `-f` parameter for `kubectl logs`: + +```bash +$ kubectl logs -f +``` + +**Pod Initialization Logs** + +In some cases, if the Dremio cluster is failing to startup during the pod init phases, it may be necessary to view the logs generated by the init containers. Identify the pod that is failing to start, and use the following command to view the init container logs: + +```bash +$ kubectl logs -c +``` + diff --git a/charts/dremio_v2/docs/setup/Customizing-Dremio-Configuration.md b/charts/dremio_v2/docs/setup/Customizing-Dremio-Configuration.md new file mode 100644 index 00000000..f76819c5 --- /dev/null +++ b/charts/dremio_v2/docs/setup/Customizing-Dremio-Configuration.md @@ -0,0 +1,13 @@ +# Customizing Dremio Configuration + +Dremio configuration files used by the deployment are in the `config` directory. These files are propagated to all the pods in the cluster. + +To update the configuration used in the pods, run the Helm upgrade command: + +```bash +$ helm upgrade dremio_v2 -f values.local.yaml +``` + +To see all the configuration customizations, please see the [Customizing Configuration](https://docs.dremio.com/deployment/README-config.html) documentation for Dremio. + +For users who wish to setup a Hive 2/3 source, please see the [Setup Hive 2 and 3](./Setup-Hive-2-and-3.md) documentation. \ No newline at end of file diff --git a/charts/dremio_v2/docs/setup/Important-Setup-Considerations.md b/charts/dremio_v2/docs/setup/Important-Setup-Considerations.md new file mode 100644 index 00000000..c796d56c --- /dev/null +++ b/charts/dremio_v2/docs/setup/Important-Setup-Considerations.md @@ -0,0 +1,10 @@ +# Important Setup Considerations + +As part of setting up a Dremio cluster on Kubernetes, there are a number of important considerations that we recommend you review before deploying your cluster. Some of these values have an impact on the performance of your cluster and should be adjusted to your needs. + +* `imageTag`: As part of setup, this value should be updated to reference the exact version of Dremio you wish to deploy, i.e. `4.7.0`. +* `distStorage.type`: By default, the `distStorage.type` is set to `local`. This **must** be changed prior to production use. We do not recommend users use local distributed storage as part of a production setup. +* `volumeSize` and `storageClass`: The size and type of volume used for Dremio has a direct impact on performance. In most Kubernetes providers, volume size has a direct impact on the performance in IOPS and read/write speeds. It is important to check your Kubernetes provider to determine how volume size impacts the performance of your disk. +* `executor.cloudCache.storageClass`: Dremio C3 was designed to be used with performant NVMe storage. By default, the chart utilizes the default storage class that is configured on the Kubernetes cluster. For the major Kubernetes providers, NVMe storage is often available on appropriately sized nodes. We recommend utilizing a local storage provisioner to unlock the benefits of NVMe storage available on the physical Kubernetes nodes. For more information, see the [Kubernetes Special Interest Group for Local Static Provisioner](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner). + +For users who wish to setup a Hive 2/3 source, please see the [Setup Hive 2 and 3](./Setup-Hive-2-and-3.md) documentation. \ No newline at end of file diff --git a/charts/dremio_v2/docs/setup/Migrating-Helm-Chart-Versions.md b/charts/dremio_v2/docs/setup/Migrating-Helm-Chart-Versions.md new file mode 100644 index 00000000..6dcd5381 --- /dev/null +++ b/charts/dremio_v2/docs/setup/Migrating-Helm-Chart-Versions.md @@ -0,0 +1,71 @@ +# Migrating Helm Chart Versions + +**⚠️ If the contents of your chart's `templates` directory has been modified, this guide may not cover the changes necessary to maintain your existing setup.** As new functionality has been added to the Helm chart, please check whether the new version of the chart allows you to express the same modifications that have been made to your templates directory. + +**Note**: Helm 2 is no longer supported in this Helm chart. + +1. First begin by overriding the values in `values.yaml` to reflect the original chart's values. + + ***Tip***: As a best practice, we recommend creating a `values.local.yaml` (or equivalently named file) that stores the values that you wish to override as part of your setup of Dremio. This allows you to quickly update to the latest version of the chart by copying the `values.local.yaml` across Helm chart updates. + + In the new version of the Dremio Helm chart, changes have been introduced to the `values.yaml` file that differ from the original Dremio Helm chart. The chart below maps existing values and shows their equivalents in this Helm chart's `values.yaml`. + +| Original Value | New Value | +| ------------------------------------- | ------------------------------------------------------------ | +| `executor.cloudCache.quota.fs_pct` | **Removed** - In the new chart, we now require provisioning a persistent volume for Cloud Cache. | +| `executor.cloudCache.quota.db_pct` | **Removed** - In the new chart, we now require provisioning a persistent volume for Cloud Cache. | +| `executor.cloudCache.quota.cache_pct` | **Removed** - In the new chart, we now require provisioning a persistent volume for Cloud Cache. | +| `tls.ui.enabled` | `coordinator.ui.tls.enabled` | +| `tls.ui.secret` | `coordinator.ui.tls.secret` | +| `tls.client.enabled` | `coordinator.client.tls.enabled` | +| `tls.client.secret` | `coordinator.client.tls.secret` | +| `serviceType` | `service.type` | +| `sessionAffinity` | `service.sessionAffinity` | +| `internalLoadBalancer` | `service.internalLoadbalancer` | +| `imagePullSecrets` | `imagePullSecrets` is no longer a string based-value. This is now an array, which can be represented as follows: `imagePullSecrets: ["original-value"]`. | +| `distStorage.aws.accessKey` | `distStorage.aws.credentials.accessKey` ***Note***: If using access key and secret for authentication, the value of `distStorage.aws.authentication` must also be set to `accesskeySecret`. | +| `distStorage.aws.secret` | `distStorage.aws.credentials.secret` ***Note***: If using access key and secret for authentication, the value of `distStorage.aws.authentication` must also be set to `accesskeySecret`. | +| `distStorage.azure.applicationId` | `distStorage.azure.credentials.applicationId` | +| `distStorage.azure.secret` | `distStorage.azure.credentials.secret` | +| `distStorage.azure.oauth2EndPoint` | `distStorage.azure.credentials.oauth2Endpoint` ***Note***: The capitalization has changed in this value from `EndPoint` to `Endpoint`. | +| `distStorage.azureStorage.accessKey` | `distStorage.azureStorage.credentials.accessKey` | + +2. This chart introduces the concept of engines. Engines operate as a grouping of executor nodes that can be targeted via queues to handle specific workloads. + + As part of the transition to this Helm chart, to retain the existing persistent volumes used for the executor nodes, ensure that you keep a `default` engine as provided by the `values.yaml`. Additionally, set the value of `executor.engineOverride.default.volumeClaimName` to be `dremio-executor-volume`. + + For example, you would want to do the following to setup the `volumeClaimName`: + +```yaml +executor: + [...] + engineOverride: + default: + volumeClaimName: dremio-executor-volume +``` + +3. **With any upgrade, we recommend creating a backup first before performing an upgrade.** + + See the [Dremio Administration](../administration/Dremio-Administration.md) documentation on how to access `dremio-admin` commands to complete a backup prior to upgrading. + +4. We are now ready to begin upgrading to the new Helm chart. Begin by uninstalling the existing Helm chart for Dremio by using the `helm` command. + + Note: The data will persist in the persistent volumes through this process. + +```bash +$ helm uninstall +``` + +If your original deployment of the Dremio chart was with Helm 2, uninstall the Helm chart for Dremio by using the following command with the Helm 2 binary: + +```bash +$ helm2 delete --purge +``` + +5. Now, invoke `helm` again to install the new version of the chart: + +```bash +$ helm install dremio_v2 -f values.local.yaml +``` + +6. **Done!** Verify that the upgrade was successful. \ No newline at end of file diff --git a/charts/dremio_v2/docs/setup/Setup-Hive-2-and-3.md b/charts/dremio_v2/docs/setup/Setup-Hive-2-and-3.md new file mode 100644 index 00000000..8f34f0ba --- /dev/null +++ b/charts/dremio_v2/docs/setup/Setup-Hive-2-and-3.md @@ -0,0 +1,9 @@ +# Setup Hive 2 and 3 + +To setup Hive 2/3 in the Helm chart, locate the respective `config/hive2` or `config/hive3` directory to copy your necessary configuration files for Hive, i.e. `core-site.xml`. + +To update the configuration files in the pods, run the Helm upgrade command: + +```bash +$ helm upgrade dremio_v2 -f values.local.yaml +``` \ No newline at end of file diff --git a/charts/dremio_v2/templates/_helpers_coordinator.tpl b/charts/dremio_v2/templates/_helpers_coordinator.tpl new file mode 100644 index 00000000..d7973f06 --- /dev/null +++ b/charts/dremio_v2/templates/_helpers_coordinator.tpl @@ -0,0 +1,143 @@ +{{/* +Coordinator - Dremio Heap Memory allocation +*/}} +{{- define "dremio.coordinator.heapMemory" -}} +{{- $coordinatorMemory := int $.Values.coordinator.memory -}} +{{- if gt 4096 $coordinatorMemory -}} +{{ fail "Dremio's minimum memory requirement is 4 GB." }} +{{- end -}} +{{- if le 18432 $coordinatorMemory -}} +16384 +{{- else -}} +{{- sub $coordinatorMemory 2048}} +{{- end -}} +{{- end -}} + +{{/* +Coordiantor - Dremio Direct Memory Allocation +*/}} +{{- define "dremio.coordinator.directMemory" -}} +{{- $coordinatorMemory := int $.Values.coordinator.memory -}} +{{- if gt 4096 $coordinatorMemory -}} +{{ fail "Dremio's minimum memory requirement is 4 GB." }} +{{- end -}} +{{- if le 18432 $coordinatorMemory -}} +{{- sub $coordinatorMemory 16384 -}} +{{- else -}} +2048 +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Dremio Start Parameters +*/}} +{{- define "dremio.coordinator.extraStartParams" -}} +{{- $coordinatorExtraStartParams := coalesce $.Values.coordinator.extraStartParams $.Values.extraStartParams -}} +{{- if $coordinatorExtraStartParams}} +{{- printf "%v " $coordinatorExtraStartParams -}} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Extra Init Containers +*/}} +{{- define "dremio.coordinator.extraInitContainers" -}} +{{- $coordinatorExtraInitContainers := coalesce $.Values.coordinator.extraInitContainers $.Values.extraInitContainers -}} +{{- if $coordinatorExtraInitContainers -}} +{{ tpl $coordinatorExtraInitContainers $ }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Extra Volume Mounts +*/}} +{{- define "dremio.coordinator.extraVolumeMounts" -}} +{{- $coordinatorExtraVolumeMounts := default (default (dict) $.Values.extraVolumeMounts) $.Values.coordinator.extraVolumeMounts -}} +{{- if $coordinatorExtraVolumeMounts -}} +{{ toYaml $coordinatorExtraVolumeMounts }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Extra Volumes +*/}} +{{- define "dremio.coordinator.extraVolumes" -}} +{{- $coordinatorExtraVolumes := coalesce $.Values.coordinator.extraVolumes $.Values.extraVolumes -}} +{{- if $coordinatorExtraVolumes -}} +{{ toYaml $coordinatorExtraVolumes }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Storage Class +*/}} +{{- define "dremio.coordinator.storageClass" -}} +{{- $coordinatorStorageClass := coalesce $.Values.coordinator.storageClass $.Values.storageClass -}} +{{- if $coordinatorStorageClass -}} +storageClass: {{ $coordinatorStorageClass }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - StatefulSet Annotations +*/}} +{{- define "dremio.coordinator.annotations" -}} +{{- $coordinatorAnnotations := coalesce $.Values.coordinator.annotations $.Values.annotations -}} +{{- if $coordinatorAnnotations -}} +annotations: + {{- toYaml $coordinatorAnnotations | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - StatefulSet Labels +*/}} +{{- define "dremio.coordinator.labels" -}} +{{- $coordinatorLabels := coalesce $.Values.coordinator.labels $.Values.labels -}} +{{- if $coordinatorLabels -}} +labels: + {{- toYaml $coordinatorLabels | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Annotations +*/}} +{{- define "dremio.coordinator.podAnnotations" -}} +{{- $coordiantorPodAnnotations := coalesce $.Values.coordinator.podAnnotations $.Values.podAnnotations -}} +{{- if $coordiantorPodAnnotations -}} +{{ toYaml $coordiantorPodAnnotations }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Labels +*/}} +{{- define "dremio.coordinator.podLabels" -}} +{{- $coordinatorPodLabels := coalesce $.Values.coordinator.podLabels $.Values.podLabels -}} +{{- if $coordinatorPodLabels -}} +{{ toYaml $coordinatorPodLabels }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Node Selectors +*/}} +{{- define "dremio.coordinator.nodeSelector" -}} +{{- $coordinatorNodeSelector := coalesce $.Values.coordinator.nodeSelector $.Values.nodeSelector -}} +{{- if $coordinatorNodeSelector -}} +nodeSelector: + {{- toYaml $coordinatorNodeSelector | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Coordinator - Pod Tolerations +*/}} +{{- define "dremio.coordinator.tolerations" -}} +{{- $coordinatorTolerations := coalesce $.Values.coordinator.tolerations $.Values.tolerations -}} +{{- if $coordinatorTolerations -}} +tolerations: + {{- toYaml $coordinatorTolerations | nindent 2 }} +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/_helpers_executor.tpl b/charts/dremio_v2/templates/_helpers_executor.tpl new file mode 100644 index 00000000..bbe5eafb --- /dev/null +++ b/charts/dremio_v2/templates/_helpers_executor.tpl @@ -0,0 +1,319 @@ + +{{/* +Executor - Dremio Heap Memory Allocation +*/}} +{{- define "dremio.executor.heapMemory" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineMemory := int (default $context.Values.executor.memory $engineConfiguration.memory) -}} +{{- if gt 4096 $engineMemory -}} +{{ fail "Dremio's minimum memory requirement is 4 GB." }} +{{- end -}} +{{- if le 32786 $engineMemory -}} +8192 +{{- else if le 6144 $engineMemory -}} +4096 +{{- else -}} +2048 +{{- end -}} +{{- end -}} + +{{/* +Executor - Dremio Direct Memory Allocation +*/}} +{{- define "dremio.executor.directMemory" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineMemory := int (default $context.Values.executor.memory $engineConfiguration.memory) -}} +{{- if gt 4096 $engineMemory -}} +{{ fail "Dremio's minimum memory requirement is 4 GB." }} +{{- end -}} +{{- if le 32786 $engineMemory -}} +{{- sub $engineMemory 8192 -}} +{{- else if le 6144 $engineMemory -}} +{{- sub $engineMemory 6144 -}} +{{- else -}} +{{- sub $engineMemory 2048 -}} +{{- end -}} +{{- end -}} + +{{/* +Executor - CPU Resource Request +*/}} +{{- define "dremio.executor.cpu" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineCpu := default ($context.Values.executor.cpu) $engineConfiguration.cpu -}} +{{- $engineCpu -}} +{{- end -}} + +{{/* +Executor - Memory Resource Request +*/}} +{{- define "dremio.executor.memory" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineMemory := default ($context.Values.executor.memory) $engineConfiguration.memory -}} +{{- $engineMemory -}}M +{{- end -}} + +{{/* +Executor - Replication Count +*/}} +{{- define "dremio.executor.count" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineCount := default ($context.Values.executor.count) $engineConfiguration.count -}} +{{- $engineCount -}} +{{- end -}} + +{{/* +Executor - ConfigMap +*/}} +{{- define "dremio.executor.config" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- if hasKey (default (dict) $context.Values.executor.engineOverride) $engineName -}} +{{- printf "dremio-config-%v" $engineName -}} +{{- else -}} +dremio-config +{{- end -}} +{{- end -}} + +{{/* +Executor - Dremio Start Parameters +*/}} +{{- define "dremio.executor.extraStartParams" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineExtraStartParams := coalesce $engineConfiguration.extraStartParams $context.Values.executor.extraStartParams $context.Values.extraStartParams -}} +{{- if $engineExtraStartParams}} +{{- printf "%v " $engineExtraStartParams -}} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Extra Init Containers +*/}} +{{- define "dremio.executor.extraInitContainers" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineExtraInitContainers := coalesce $engineConfiguration.extraInitContainers $context.Values.executor.extraInitContainers $context.Values.extraInitContainers -}} +{{- if $engineExtraInitContainers -}} +{{ tpl $engineExtraInitContainers $ }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Extra Volume Mounts +*/}} +{{- define "dremio.executor.extraVolumeMounts" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineExtraVolumeMounts := coalesce $engineConfiguration.extraVolumeMounts $context.Values.executor.extraVolumeMounts $context.Values.extraVolumeMounts -}} +{{- if $engineExtraVolumeMounts -}} +{{ toYaml $engineExtraVolumeMounts }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Extra Volume Mounts +*/}} +{{- define "dremio.executor.extraVolumes" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineExtraVolumes := coalesce $engineConfiguration.extraVolumes $context.Values.executor.extraVolumes $context.Values.extraVolumes -}} +{{- if $engineExtraVolumes -}} +{{ toYaml $engineExtraVolumes }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Persistent Volume Storage Class +*/}} +{{- define "dremio.executor.storageClass" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineStorageClass := coalesce $engineConfiguration.storageClass $context.Values.executor.storageClass $context.Values.storageClass -}} +{{- if $engineStorageClass -}} +storageClass: {{ $engineStorageClass }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Cloud Cache Peristent Volume Claims +*/}} +{{- define "dremio.executor.cloudCache.volumeClaimTemplate" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineCloudCacheConfig := default (dict) $engineConfiguration.cloudCache -}} +{{- $cloudCacheConfig := coalesce $engineConfiguration.cloudCache $context.Values.executor.cloudCache -}} +{{- $cloudCacheStorageClass := coalesce $engineCloudCacheConfig.storageClass $context.Values.executor.cloudCache.storageClass $engineConfiguration.storageClass $context.Values.executor.storageClass $context.Values.storageClass -}} +{{- if $cloudCacheConfig.enabled -}} +{{- range $index, $cloudCacheVolumeConfig := $cloudCacheConfig.volumes }} +{{- $volumeStorageClass := coalesce $cloudCacheVolumeConfig.storageClass $cloudCacheStorageClass }} +- metadata: + name: {{ coalesce $cloudCacheVolumeConfig.name (printf "dremio-%s-executor-c3-%d" $engineName $index) }} + spec: + accessModes: ["ReadWriteOnce"] + {{- if $volumeStorageClass }} + storageClass: {{ $volumeStorageClass }} + {{- end }} + resources: + requests: + storage: {{ $cloudCacheVolumeConfig.size }} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Executor - Cloud Cache Peristent Volume Mounts +*/}} +{{- define "dremio.executor.cloudCache.volumeMounts" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $cloudCacheConfig := coalesce $engineConfiguration.cloudCache $context.Values.executor.cloudCache -}} +{{- if $cloudCacheConfig.enabled -}} +{{- range $index, $cloudCacheVolumeConfig := $cloudCacheConfig.volumes }} +- name: {{ coalesce $cloudCacheVolumeConfig.name (printf "dremio-%s-executor-c3-%d" $engineName $index) }} + mountPath: /opt/dremio/cloudcache/c{{ $index }} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Executor - Cloud Cache Peristent Volume Mounts +*/}} +{{- define "dremio.executor.cloudCache.initContainers" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $cloudCacheConfig := coalesce $engineConfiguration.cloudCache $context.Values.executor.cloudCache -}} +{{- if $cloudCacheConfig.enabled -}} +- name: chown-cloudcache-directory + image: {{ $context.Values.image }}:{{ $context.Values.imageTag }} + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 0 + volumeMounts: + {{- include "dremio.executor.cloudCache.volumeMounts" (list $context $engineName) | nindent 2 }} + command: ["bash"] + args: ["-c", "chown dremio:dremio /opt/dremio/cloudcache/c*"] +{{- end -}} +{{- end -}} + +{{/* +Executor - Persistent Volume Size +*/}} +{{- define "dremio.executor.volumeSize" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineVolumeSize := default ($context.Values.executor.volumeSize) $engineConfiguration.volumeSize -}} +{{- $engineVolumeSize -}} +{{- end -}} + +{{/* +Executor - Persistent Volume Name +*/}} +{{- define "dremio.executor.volumeClaimName" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $volumeClaimName := default (printf "dremio-%v-executor-volume" $engineName) $engineConfiguration.volumeClaimName -}} +{{- $volumeClaimName -}} +{{- end -}} + +{{/* +Executor - StatefulSet Annotations +*/}} +{{- define "dremio.executor.annotations" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineAnnotations := coalesce $engineConfiguration.annotations $context.Values.executor.annotations $context.Values.annotations -}} +{{- if $engineAnnotations -}} +annotations: + {{- toYaml $engineAnnotations | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Executor - StatefulSet Labels +*/}} +{{- define "dremio.executor.labels" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineLabels := coalesce $engineConfiguration.labels $context.Values.executor.labels $context.Values.labels -}} +{{- if $engineLabels -}} +labels: + {{- toYaml $engineLabels | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Annotations +*/}} +{{- define "dremio.executor.podAnnotations" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $enginePodAnnotations := coalesce $engineConfiguration.podAnnotations $context.Values.executor.podAnnotations $context.Values.podAnnotations -}} +{{- if $enginePodAnnotations -}} +{{ toYaml $enginePodAnnotations }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Labels +*/}} +{{- define "dremio.executor.podLabels" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $enginePodLabels := coalesce $engineConfiguration.podLabels $context.Values.executor.podLabels $context.Values.podLabels -}} +{{- if $enginePodLabels -}} +{{ toYaml $enginePodLabels }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Node Selectors +*/}} +{{- define "dremio.executor.nodeSelector" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineNodeSelector := coalesce $engineConfiguration.nodeSelector $context.Values.executor.nodeSelector $context.Values.nodeSelector -}} +{{- if $engineNodeSelector -}} +nodeSelector: + {{- toYaml $engineNodeSelector | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Executor - Pod Tolerations +*/}} +{{- define "dremio.executor.tolerations" -}} +{{- $context := index . 0 -}} +{{- $engineName := index . 1 -}} +{{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} +{{- $engineTolerations := coalesce $engineConfiguration.tolerations $context.Values.executor.tolerations $context.Values.tolerations -}} +{{- if $engineTolerations -}} +tolerations: + {{- toYaml $engineTolerations | nindent 2 }} +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/_helpers_general.tpl b/charts/dremio_v2/templates/_helpers_general.tpl new file mode 100644 index 00000000..9d5f2c55 --- /dev/null +++ b/charts/dremio_v2/templates/_helpers_general.tpl @@ -0,0 +1,86 @@ +{{/* +Shared - Image Pull Secrets +*/}} +{{- define "dremio.imagePullSecrets" -}} +{{- if $.Values.imagePullSecrets }} +imagePullSecrets: +{{- range $secretName := $.Values.imagePullSecrets }} +- name: {{ $secretName }} +{{- end}} +{{- end -}} +{{- end -}} + +{{/* +Service - Annotations +*/}} +{{- define "dremio.service.annotations" -}} +{{- $serviceAnnotations := coalesce $.Values.service.annotations $.Values.annotations -}} +{{- if $.Values.service.internalLoadBalancer }} +annotations: + service.beta.kubernetes.io/azure-load-balancer-internal: "true" + cloud.google.com/load-balancer-type: "Internal" + service.beta.kubernetes.io/aws-load-balancer-internal: 0.0.0.0/0 + {{- if $serviceAnnotations -}} + {{- toYaml $serviceAnnotations | nindent 2 -}} + {{- end -}} +{{- else -}} +{{ if $serviceAnnotations }} +annotations: + {{- toYaml $serviceAnnotations | nindent 4 -}} +{{- end -}} +{{- end }} +{{- end -}} + +{{/* +Service - Labels +*/}} +{{- define "dremio.service.labels" -}} +{{- $serviceLabels := coalesce $.Values.service.labels $.Values.labels -}} +{{- if $serviceLabels -}} +{{- toYaml $serviceLabels }} +{{- end -}} +{{- end -}} + +{{/* +Admin - Pod Annotations +*/}} +{{- define "dremio.admin.podAnnotations" -}} +{{- $adminPodAnnotations := coalesce $.Values.coordinator.podAnnotations $.Values.podAnnotations -}} +{{- if $adminPodAnnotations -}} +annotations: + {{- toYaml $adminPodAnnotations | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Admin - Pod Labels +*/}} +{{- define "dremio.admin.podLabels" -}} +{{- $adminPodLabels := coalesce $.Values.coordinator.podLabels $.Values.podLabels -}} +{{- if $adminPodLabels -}} +labels: + {{- toYaml $adminPodLabels | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Admin - Pod Node Selectors +*/}} +{{- define "dremio.admin.nodeSelector" -}} +{{- $adminNodeSelector := coalesce $.Values.coordinator.nodeSelector $.Values.nodeSelector -}} +{{- if $adminNodeSelector -}} +nodeSelector: + {{- toYaml $adminNodeSelector | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Admin - Pod Tolerations +*/}} +{{- define "dremio.admin.tolerations" -}} +{{- $adminPodTolerations := coalesce $.Values.coordinator.tolerations $.Values.tolerations -}} +{{- if $adminPodTolerations -}} +tolerations: + {{- toYaml $adminPodTolerations | nindent 2 }} +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/_helpers_zookeeper.tpl b/charts/dremio_v2/templates/_helpers_zookeeper.tpl new file mode 100644 index 00000000..dbf01e44 --- /dev/null +++ b/charts/dremio_v2/templates/_helpers_zookeeper.tpl @@ -0,0 +1,82 @@ +{{/* +Zookeeper - Memory Calculation +*/}} +{{- define "dremio.zookeeper.memory" -}} +{{- $heapMemory := sub (int $.Values.zookeeper.memory) 100 -}} +{{- $heapMemory -}} +{{- end -}} + +{{/* +Zookeeper - Storage Class +*/}} +{{- define "dremio.zookeeper.storageClass" -}} +{{- $zookeeperStorageClass := coalesce $.Values.zookeeper.storageClass $.Values.storageClass -}} +{{- if $zookeeperStorageClass -}} +storageClass: {{ $zookeeperStorageClass }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - StatefulSet Annotations +*/}} +{{- define "dremio.zookeeper.annotations" -}} +{{- $zookeeperAnnotations := coalesce $.Values.zookeeper.annotations $.Values.annotations -}} +{{- if $zookeeperAnnotations -}} +annotations: + {{- toYaml $zookeeperAnnotations | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - StatefulSet Labels +*/}} +{{- define "dremio.zookeeper.labels" -}} +{{- $zookeeperLabels := coalesce $.Values.zookeeper.labels $.Values.labels -}} +{{- if $zookeeperLabels -}} +labels: + {{- toYaml $zookeeperLabels | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - Pod Annotations +*/}} +{{- define "dremio.zookeeper.podAnnotations" -}} +{{- $coordiantorAnnotations := coalesce $.Values.zookeeper.podAnnotations $.Values.podAnnotations -}} +{{- if $coordiantorAnnotations -}} +annotations: + {{- toYaml $coordiantorAnnotations | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - Pod Labels +*/}} +{{- define "dremio.zookeeper.podLabels" -}} +{{- $zookeeperLabels := coalesce $.Values.zookeeper.podLabels $.Values.podLabels -}} +{{- if $zookeeperLabels -}} +{{ toYaml $zookeeperLabels }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - Pod Node Selectors +*/}} +{{- define "dremio.zookeeper.nodeSelector" -}} +{{- $zookeeperNodeSelector := coalesce $.Values.zookeeper.nodeSelector $.Values.nodeSelector -}} +{{- if $zookeeperNodeSelector -}} +nodeSelector: + {{- toYaml $zookeeperNodeSelector | nindent 2 }} +{{- end -}} +{{- end -}} + +{{/* +Zookeeper - Pod Tolerations +*/}} +{{- define "dremio.zookeeper.tolerations" -}} +{{- $zookeeperTolerations := coalesce $.Values.zookeeper.tolerations $.Values.tolerations -}} +{{- if $zookeeperTolerations -}} +tolerations: + {{- toYaml $zookeeperTolerations | nindent 2 }} +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/dremio-admin.yaml b/charts/dremio_v2/templates/dremio-admin.yaml new file mode 100644 index 00000000..10de4317 --- /dev/null +++ b/charts/dremio_v2/templates/dremio-admin.yaml @@ -0,0 +1,41 @@ +{{- if $.Values.DremioAdmin -}} +# dremio-admin pod is used to run offline commands like +# clean, restore or set-password against the Dremio cluster. +# The Dremio cluster should be shutdown before attempting to +# create the dremio-admin pod. +# You connect to the pod (kubectl exec -it dremio-admin -- bash), +# go to /opt/dremio/bin and run dremio-admin commands as documented. +apiVersion: v1 +kind: Pod +metadata: + name: dremio-admin + {{- include "dremio.admin.podAnnotations" $ | nindent 2}} + {{- include "dremio.admin.podLabels" $ | nindent 2}} +spec: + containers: + - name: dremio-admin + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + stdin: true + tty: true + resources: + requests: + cpu: {{ $.Values.coordinator.cpu }} + memory: {{ $.Values.coordinator.memory }}M + volumeMounts: + - name: dremio-master-volume + mountPath: /opt/dremio/data + - name: dremio-config + mountPath: /opt/dremio/conf + command: ["sleep", "infinity"] + {{- include "dremio.imagePullSecrets" $ | nindent 2 }} + {{- include "dremio.admin.nodeSelector" $ | nindent 2 }} + {{- include "dremio.admin.tolerations" $ | nindent 2 }} + volumes: + - name: dremio-master-volume + persistentVolumeClaim: + claimName: dremio-master-volume-dremio-master-0 + - name: dremio-config + configMap: + name: dremio-config +{{- end -}} diff --git a/charts/dremio_v2/templates/dremio-configmap.yaml b/charts/dremio_v2/templates/dremio-configmap.yaml new file mode 100644 index 00000000..34fa67aa --- /dev/null +++ b/charts/dremio_v2/templates/dremio-configmap.yaml @@ -0,0 +1,38 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: dremio-config +data: + {{- tpl ($.Files.Glob "config/*").AsConfig . | nindent 2 }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: dremio-hive2-config +data: + {{- tpl ($.Files.Glob "config/hive2/*").AsConfig . | nindent 2 }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: dremio-hive3-config +data: + {{- tpl ($.Files.Glob "config/hive3/*").AsConfig . | nindent 2 }} +--- +{{- $originalValues := mustDeepCopy $.Values -}} +{{- range $engine := keys (default (dict) $.Values.executor.engineOverride) -}} +{{- if has $engine $.Values.executor.engines -}} +{{- $_ := mustMergeOverwrite $.Values $originalValues -}} +{{- $engineAwareConfig := mustDeepCopy $.Values -}} +{{- $_ := set $engineAwareConfig "currentEngine" $engine }} +{{- $_ := mustMergeOverwrite $engineAwareConfig.executor (get $.Values.executor.engineOverride $engine) -}} +{{- $_ := mustMergeOverwrite $.Values $engineAwareConfig -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: dremio-config-{{ $engine }} +data: + {{- tpl ($.Files.Glob "config/*").AsConfig $ | nindent 2 }} +--- +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/dremio-coordinator.yaml b/charts/dremio_v2/templates/dremio-coordinator.yaml new file mode 100644 index 00000000..426c9f16 --- /dev/null +++ b/charts/dremio_v2/templates/dremio-coordinator.yaml @@ -0,0 +1,150 @@ +{{- if not $.Values.DremioAdmin -}} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: dremio-coordinator +spec: + serviceName: "dremio-cluster-pod" + replicas: {{ $.Values.coordinator.count }} + podManagementPolicy: "Parallel" + revisionHistoryLimit: 1 + selector: + matchLabels: + app: dremio-coordinator + {{- include "dremio.coordinator.annotations" $ | nindent 2 }} + {{- include "dremio.coordinator.labels" $ | nindent 2 }} + template: + metadata: + labels: + app: dremio-coordinator + role: dremio-cluster-pod + {{- include "dremio.coordinator.podLabels" $ | nindent 8 }} + annotations: + dremio-configmap/checksum: {{ (tpl ($.Files.Glob "config/*").AsConfig $) | sha256sum }} + {{- include "dremio.coordinator.podAnnotations" $ | nindent 8 }} + spec: + terminationGracePeriodSeconds: 120 + {{- include "dremio.coordinator.nodeSelector" $ | nindent 6 }} + {{- include "dremio.coordinator.tolerations" $ | nindent 6 }} + containers: + - name: dremio-coordinator + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + resources: + requests: + cpu: {{ $.Values.coordinator.cpu }} + memory: {{ $.Values.coordinator.memory }}M + volumeMounts: + - name: dremio-config + mountPath: /opt/dremio/conf + - name: dremio-hive2-config + mountPath: /opt/dremio/plugins/connectors/hive2.d + - name: dremio-hive2-config + mountPath: /opt/dremio/plugins/connectors/hive2-ee.d + - name: dremio-hive3-config + mountPath: /opt/dremio/plugins/connectors/hive3.d + - name: dremio-hive3-config + mountPath: /opt/dremio/plugins/connectors/hive3-ee.d + {{- if or $.Values.coordinator.web.tls.enabled $.Values.coordinator.client.tls.enabled }} + - name: dremio-tls + mountPath: /opt/dremio/tls + {{- end }} + {{- include "dremio.coordinator.extraVolumeMounts" $ | nindent 8 }} + env: + - name: DREMIO_MAX_HEAP_MEMORY_SIZE_MB + value: "{{ template "dremio.coordinator.heapMemory" $ }}" + - name: DREMIO_MAX_DIRECT_MEMORY_SIZE_MB + value: "{{ template "dremio.coordinator.directMemory" $ }}" + - name: DREMIO_JAVA_SERVER_EXTRA_OPTS + value: >- + {{- include "dremio.coordinator.extraStartParams" $ | nindent 12 -}} + -Dzookeeper=zk-hs:2181 + -Dservices.coordinator.enabled=true + -Dservices.coordinator.master.enabled=false + -Dservices.coordinator.master.embedded-zookeeper.enabled=false + -Dservices.executor.enabled=false + -Dservices.conduit.port=45679 + command: ["/opt/dremio/bin/dremio"] + args: ["start-fg"] + ports: + - containerPort: 31010 + name: client + - containerPort: 45678 + name: server-fabric + - containerPort: 45679 + name: server-conduit + readinessProbe: + httpGet: + path: / + {{- if $.Values.coordinator.web.tls.enabled }} + scheme: HTTPS + {{- end }} + port: 9047 + initialDelaySeconds: 5 + periodSeconds: 5 + initContainers: + {{- include "dremio.coordinator.extraInitContainers" $ | nindent 6 }} + - name: wait-for-dremio-master + image: busybox + command: ["sh", "-c", "until nc -z dremio-client {{ $.Values.coordinator.web.port }} > /dev/null; do echo Waiting for Dremio master.; sleep 2; done;"] + {{- if $.Values.coordinator.web.tls.enabled }} + - name: generate-ui-keystore + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-ui + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/ui.pkcs12", "-passout", "pass:"] + {{- end }} + {{- if $.Values.coordinator.client.tls.enabled }} + - name: generate-client-keystore + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-client + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/client.pkcs12", "-passout", "pass:"] + {{- end }} + volumes: + - name: dremio-config + configMap: + name: dremio-config + - name: dremio-hive2-config + configMap: + name: dremio-hive2-config + - name: dremio-hive3-config + configMap: + name: dremio-hive3-config + {{- if or $.Values.coordinator.web.tls.enabled $.Values.coordinator.client.tls.enabled }} + - name: dremio-tls + emptyDir: {} + {{- end }} + {{- if $.Values.coordinator.web.tls.enabled }} + - name: dremio-tls-secret-ui + secret: + secretName: {{ $.Values.coordinator.web.tls.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- if $.Values.coordinator.client.tls.enabled }} + - name: dremio-tls-secret-client + secret: + secretName: {{ $.Values.coordinator.client.tls.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- include "dremio.coordinator.extraVolumes" $ | nindent 6 }} + {{- include "dremio.imagePullSecrets" $ | nindent 6}} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/dremio-executor.yaml b/charts/dremio_v2/templates/dremio-executor.yaml new file mode 100644 index 00000000..beea2890 --- /dev/null +++ b/charts/dremio_v2/templates/dremio-executor.yaml @@ -0,0 +1,116 @@ +{{- if not $.Values.DremioAdmin -}} +{{- range $engineIndex, $engineName := $.Values.executor.engines -}} +{{- $executorName := ternary "dremio-executor" (printf "dremio-executor-%s" $engineName) (eq $engineName "default") -}} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ $executorName }} +spec: + serviceName: "dremio-cluster-pod" + replicas: {{ template "dremio.executor.count" (list $ $engineName) }} + podManagementPolicy: "Parallel" + revisionHistoryLimit: 1 + selector: + matchLabels: + app: {{ $executorName }} + {{- include "dremio.executor.annotations" (list $ $engineName) | nindent 2}} + {{- include "dremio.executor.labels" (list $ $engineName) | nindent 2}} + template: + metadata: + labels: + app: {{ $executorName }} + role: dremio-cluster-pod + {{- include "dremio.executor.podLabels" (list $ $engineName) | nindent 8}} + annotations: + dremio-configmap/checksum: {{ (tpl ($.Files.Glob "config/*").AsConfig $) | sha256sum }} + {{- include "dremio.executor.podAnnotations" (list $ $engineName) | nindent 8}} + spec: + terminationGracePeriodSeconds: 120 + {{- include "dremio.executor.nodeSelector" (list $ $engineName) | nindent 6}} + {{- include "dremio.executor.tolerations" (list $ $engineName) | nindent 6}} + containers: + - name: dremio-executor + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + resources: + requests: + cpu: {{ template "dremio.executor.cpu" (list $ $engineName) }} + memory: {{ template "dremio.executor.memory" (list $ $engineName) }} + volumeMounts: + - name: {{ template "dremio.executor.volumeClaimName" (list $ $engineName) }} + mountPath: /opt/dremio/data + - name: dremio-config + mountPath: /opt/dremio/conf + - name: dremio-hive2-config + mountPath: /opt/dremio/plugins/connectors/hive2.d + - name: dremio-hive2-config + mountPath: /opt/dremio/plugins/connectors/hive2-ee.d + - name: dremio-hive3-config + mountPath: /opt/dremio/plugins/connectors/hive3.d + - name: dremio-hive3-config + mountPath: /opt/dremio/plugins/connectors/hive3-ee.d + {{- include "dremio.executor.cloudCache.volumeMounts" (list $ $engineName) | nindent 8 }} + {{- include "dremio.executor.extraVolumeMounts" (list $ $engineName) | nindent 8 }} + env: + - name: DREMIO_MAX_HEAP_MEMORY_SIZE_MB + value: "{{ template "dremio.executor.heapMemory" (list $ $engineName) }}" + - name: DREMIO_MAX_DIRECT_MEMORY_SIZE_MB + value: "{{ template "dremio.executor.directMemory" (list $ $engineName) }}" + - name: DREMIO_JAVA_SERVER_EXTRA_OPTS + value: >- + {{- include "dremio.executor.extraStartParams" (list $ $engineName) | nindent 12 -}} + -Dzookeeper=zk-hs:2181 + -Dservices.coordinator.enabled=false + -Dservices.coordinator.master.enabled=false + -Dservices.coordinator.master.embedded-zookeeper.enabled=false + -Dservices.executor.enabled=true + -Dservices.conduit.port=45679 + -Dservices.node-tag={{ $engineName }} + command: ["/opt/dremio/bin/dremio"] + args: ["start-fg"] + ports: + - containerPort: 45678 + name: server-fabric + - containerPort: 45679 + name: server-conduit + initContainers: + {{- include "dremio.executor.extraInitContainers" (list $ $engineName) | nindent 6 }} + - name: chown-data-directory + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 0 + volumeMounts: + - name: {{ template "dremio.executor.volumeClaimName" (list $ $engineName) }} + mountPath: /opt/dremio/data + command: ["chown"] + args: ["dremio:dremio", "/opt/dremio/data"] + {{- include "dremio.executor.cloudCache.initContainers" (list $ $engineName) | nindent 6 }} + - name: wait-for-zookeeper + image: busybox + command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo Waiting for Zookeeper to be ready.; sleep 2; done;"] + volumes: + - name: dremio-config + configMap: + name: {{ template "dremio.executor.config" (list $ $engineName) }} + - name: dremio-hive2-config + configMap: + name: dremio-hive2-config + - name: dremio-hive3-config + configMap: + name: dremio-hive3-config + {{- include "dremio.executor.extraVolumes" (list $ $engineName) | nindent 6 }} + {{- include "dremio.imagePullSecrets" $ | nindent 6 }} + volumeClaimTemplates: + - metadata: + name: {{ template "dremio.executor.volumeClaimName" (list $ $engineName) }} + spec: + accessModes: ["ReadWriteOnce"] + {{- include "dremio.executor.storageClass" (list $ $engineName) | nindent 6 }} + resources: + requests: + storage: {{ template "dremio.executor.volumeSize" (list $ $engineName) }} + {{- include "dremio.executor.cloudCache.volumeClaimTemplate" (list $ $engineName) | nindent 2 }} +{{ end -}} +{{- end -}} diff --git a/charts/dremio_v2/templates/dremio-master.yaml b/charts/dremio_v2/templates/dremio-master.yaml new file mode 100644 index 00000000..d2e12228 --- /dev/null +++ b/charts/dremio_v2/templates/dremio-master.yaml @@ -0,0 +1,197 @@ +{{- if not $.Values.DremioAdmin -}} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: dremio-master +spec: + serviceName: "dremio-cluster-pod" + podManagementPolicy: "Parallel" + replicas: 1 + selector: + matchLabels: + app: dremio-coordinator + {{- include "dremio.coordinator.annotations" $ | nindent 2}} + {{- include "dremio.coordinator.labels" $ | nindent 2}} + template: + metadata: + labels: + app: dremio-coordinator + role: dremio-cluster-pod + {{- include "dremio.coordinator.podLabels" $ | nindent 8}} + annotations: + dremio-configmap/checksum: {{ (tpl ($.Files.Glob "config/*").AsConfig $) | sha256sum }} + {{- include "dremio.coordinator.podAnnotations" $ | nindent 8}} + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - dremio-coordinator + topologyKey: "kubernetes.io/hostname" + terminationGracePeriodSeconds: 120 + {{- include "dremio.coordinator.nodeSelector" $ | nindent 6 }} + {{- include "dremio.coordinator.tolerations" $ | nindent 6 }} + containers: + - name: dremio-master-coordinator + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + resources: + requests: + cpu: {{ $.Values.coordinator.cpu }} + memory: {{ $.Values.coordinator.memory }}M + volumeMounts: + - name: dremio-master-volume + mountPath: /opt/dremio/data + - name: dremio-config + mountPath: /opt/dremio/conf + - name: dremio-hive2-config + mountPath: /opt/dremio/plugins/connectors/hive2.d + - name: dremio-hive2-config + mountPath: /opt/dremio/plugins/connectors/hive2-ee.d + - name: dremio-hive3-config + mountPath: /opt/dremio/plugins/connectors/hive3.d + - name: dremio-hive3-config + mountPath: /opt/dremio/plugins/connectors/hive3-ee.d + {{- if or $.Values.coordinator.web.tls.enabled $.Values.coordinator.client.tls.enabled }} + - name: dremio-tls + mountPath: /opt/dremio/tls + {{- end }} + {{- include "dremio.coordinator.extraVolumeMounts" $ | nindent 8 }} + env: + - name: DREMIO_MAX_HEAP_MEMORY_SIZE_MB + value: "{{ template "dremio.coordinator.heapMemory" $ }}" + - name: DREMIO_MAX_DIRECT_MEMORY_SIZE_MB + value: "{{ template "dremio.coordinator.directMemory" $ }}" + - name: DREMIO_JAVA_SERVER_EXTRA_OPTS + value: >- + {{- include "dremio.coordinator.extraStartParams" $ | nindent 12 -}} + -Dzookeeper=zk-hs:2181 + -Dservices.coordinator.enabled=true + -Dservices.coordinator.master.enabled=true + -Dservices.coordinator.master.embedded-zookeeper.enabled=false + -Dservices.executor.enabled=false + -Dservices.conduit.port=45679 + command: ["/opt/dremio/bin/dremio"] + args: + - "start-fg" + ports: + - containerPort: 9047 + name: web + - containerPort: 31010 + name: client + - containerPort: 45678 + name: server-fabric + - containerPort: 45679 + name: server-conduit + readinessProbe: + httpGet: + path: / + {{- if $.Values.coordinator.web.tls.enabled }} + scheme: HTTPS + {{- end }} + port: 9047 + initialDelaySeconds: 5 + periodSeconds: 5 + initContainers: + {{- include "dremio.coordinator.extraInitContainers" $ | nindent 6 }} + - name: start-only-one-dremio-master + image: busybox + command: ["sh", "-c", "INDEX=${HOSTNAME##*-}; if [ $INDEX -ne 0 ]; then echo Only one master should be running.; exit 1; fi; "] + - name: wait-for-zookeeper + image: busybox + command: ["sh", "-c", "until ping -c 1 -W 1 zk-hs > /dev/null; do echo Waiting for Zookeeper to be ready.; sleep 2; done;"] + - name: chown-data-directory + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 0 + volumeMounts: + - name: dremio-master-volume + mountPath: /opt/dremio/data + command: ["chown"] + args: + - "dremio:dremio" + - "/opt/dremio/data" + - name: upgrade-task + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-master-volume + mountPath: /opt/dremio/data + command: ["/opt/dremio/bin/dremio-admin"] + args: + - "upgrade" + {{- if $.Values.coordinator.web.tls.enabled }} + - name: generate-ui-keystore + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-ui + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/ui.pkcs12", "-passout", "pass:"] + {{- end }} + {{- if $.Values.coordinator.client.tls.enabled }} + - name: generate-client-keystore + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-client + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/client.pkcs12", "-passout", "pass:"] + {{- end }} + volumes: + - name: dremio-config + configMap: + name: dremio-config + - name: dremio-hive2-config + configMap: + name: dremio-hive2-config + - name: dremio-hive3-config + configMap: + name: dremio-hive3-config + {{- if or $.Values.coordinator.web.tls.enabled $.Values.coordinator.client.tls.enabled }} + - name: dremio-tls + emptyDir: {} + {{- end }} + {{- if $.Values.coordinator.web.tls.enabled }} + - name: dremio-tls-secret-ui + secret: + secretName: {{ $.Values.coordinator.web.tls.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- if $.Values.coordinator.client.tls.enabled }} + - name: dremio-tls-secret-client + secret: + secretName: {{ $.Values.coordinator.client.tls.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} + {{- include "dremio.coordinator.extraVolumes" $ | nindent 6 }} + {{- include "dremio.imagePullSecrets" $ | nindent 6 }} + volumeClaimTemplates: + - metadata: + name: dremio-master-volume + spec: + accessModes: ["ReadWriteOnce"] + {{- include "dremio.coordinator.storageClass" $ | nindent 6 }} + resources: + requests: + storage: {{ $.Values.coordinator.volumeSize }} +{{- end -}} diff --git a/charts/dremio_v2/templates/dremio-service-client.yaml b/charts/dremio_v2/templates/dremio-service-client.yaml new file mode 100644 index 00000000..16de16e1 --- /dev/null +++ b/charts/dremio_v2/templates/dremio-service-client.yaml @@ -0,0 +1,38 @@ +{{- if not $.Values.DremioAdmin -}} +apiVersion: v1 +kind: Service +metadata: + name: dremio-client + labels: + app: dremio-client + {{- include "dremio.service.labels" $ | nindent 4 }} + {{- include "dremio.service.annotations" $ | nindent 2 }} +spec: + ports: + - port: {{ $.Values.coordinator.client.port | default 31010 }} + targetPort: client + name: client + - port: {{ $.Values.coordinator.web.port | default 9047 }} + targetPort: web + name: web + selector: + app: dremio-coordinator + type: {{ $.Values.service.type }} + {{- if and (eq $.Values.service.type "LoadBalancer") $.Values.service.loadBalancerIP }} + loadBalancerIP: {{ $.Values.service.loadBalancerIP }} + {{- end -}} + {{- if $.Values.service.sessionAffinity }} + sessionAffinity: {{ $.Values.service.sessionAffinity }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: dremio-cluster-pod +spec: + ports: + - port: 9999 + clusterIP: None + selector: + role: dremio-cluster-pod +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/zookeeper.yaml b/charts/dremio_v2/templates/zookeeper.yaml new file mode 100644 index 00000000..c94e17de --- /dev/null +++ b/charts/dremio_v2/templates/zookeeper.yaml @@ -0,0 +1,139 @@ +{{- if not $.Values.DremioAdmin -}} +apiVersion: v1 +kind: Service +metadata: + name: zk-hs + labels: + app: zk +spec: + ports: + - port: 2181 + name: client + - port: 2888 + name: server + - port: 3888 + name: leader-election + clusterIP: None + selector: + app: zk +--- +apiVersion: v1 +kind: Service +metadata: + name: zk-cs + labels: + app: zk +spec: + ports: + - port: 2181 + name: client + selector: + app: zk +--- +apiVersion: policy/v1beta1 +kind: PodDisruptionBudget +metadata: + name: zk-pdb +spec: + selector: + matchLabels: + app: zk + maxUnavailable: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: zk +spec: + selector: + matchLabels: + app: zk + serviceName: zk-hs + replicas: {{ $.Values.zookeeper.count }} + updateStrategy: + type: RollingUpdate + podManagementPolicy: Parallel + {{- include "dremio.zookeeper.annotations" $ | nindent 2 }} + {{- include "dremio.zookeeper.labels" $ | nindent 2 }} + template: + metadata: + labels: + app: zk + {{- include "dremio.zookeeper.podLabels" $ | nindent 8 }} + {{- include "dremio.zookeeper.podAnnotations" $ | nindent 8 }} + spec: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - zk + topologyKey: "kubernetes.io/hostname" + {{- include "dremio.zookeeper.nodeSelector" $ | nindent 6 }} + {{- include "dremio.zookeeper.tolerations" $ | nindent 6 }} + containers: + - name: kubernetes-zookeeper + imagePullPolicy: Always + image: "{{ $.Values.zookeeper.image }}:{{ $.Values.zookeeper.imageTag }}" + resources: + requests: + cpu: {{ $.Values.zookeeper.cpu }} + memory: {{ $.Values.zookeeper.memory }}M + ports: + - containerPort: 2181 + name: client + - containerPort: 2888 + name: server + - containerPort: 3888 + name: leader-election + command: + - sh + - -c + - "start-zookeeper \ + --servers={{ $.Values.zookeeper.count }} \ + --data_dir=/var/lib/zookeeper/data \ + --data_log_dir=/var/lib/zookeeper/data/log \ + --conf_dir=/opt/zookeeper/conf \ + --client_port=2181 \ + --election_port=3888 \ + --server_port=2888 \ + --tick_time=2000 \ + --init_limit=10 \ + --sync_limit=5 \ + --heap={{- template "dremio.zookeeper.memory" $ -}}M \ + --max_client_cnxns=60 \ + --snap_retain_count=3 \ + --purge_interval=12 \ + --max_session_timeout=40000 \ + --min_session_timeout=4000 \ + --log_level=INFO" + readinessProbe: + exec: + command: ["sh", "-c", "zookeeper-ready 2181"] + initialDelaySeconds: 10 + timeoutSeconds: 5 + livenessProbe: + exec: + command: ["sh", "-c", "zookeeper-ready 2181"] + initialDelaySeconds: 10 + timeoutSeconds: 5 + volumeMounts: + - name: datadir + mountPath: /var/lib/zookeeper + securityContext: + runAsUser: 1000 + fsGroup: 1000 + {{- include "dremio.imagePullSecrets" $ | nindent 6 }} + volumeClaimTemplates: + - metadata: + name: datadir + spec: + accessModes: ["ReadWriteOnce"] + {{- include "dremio.zookeeper.storageClass" $ | nindent 6 }} + resources: + requests: + storage: {{ $.Values.zookeeper.volumeSize }} +{{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/values.yaml b/charts/dremio_v2/values.yaml new file mode 100644 index 00000000..11667d82 --- /dev/null +++ b/charts/dremio_v2/values.yaml @@ -0,0 +1,396 @@ +# The Dremio image used in the cluster. +# +# It is *highly* recommended to update the version tag to +# the version that you are using. This will ensure that all +# the pods are using the same version of the software. +# +# Using latest will cause Dremio to potentially upgrade versions +# automatically during redeployments and may negatively impact +# the cluster. +image: dremio/dremio-oss +imageTag: latest + +# Annotations, labels, node selectors, and tolerations +# +# annotations: Annotations are applied to the StatefulSets that are deployed. +# podAnnotations: Pod annotations are applied to the pods that are deployed. +# labels: Labels operate much like annotations. +# podLabels: Labels that are applied to the pods that are deployed. +# nodeSelector: Target pods to nodes based on labels set on the nodes. For more +# information, see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +# tolerations: Tolerations allow the negation of taints that have been applied to some set of nodes +# in the Kubernetes cluster so that pods can be scheduled on those tainted nodes. +annotations: {} +podAnnotations: {} +labels: {} +podLabels: {} +nodeSelector: {} +tolerations: [] + +# Dremio Coordinator +coordinator: + # CPU & Memory + # Memory allocated to each coordinator, expressed in MB. + # CPU allocated to each coordinator, expressed in CPU cores. + cpu: 15 + memory: 122800 + + # This count is used for slave coordinators only. + # The total number of coordinators will always be count + 1. + count: 0 + + # Coordinator data volume size (applies to the master coordinator only). + # In most managed Kubernetes environments (AKS, GKE, etc.), the size of the disk has a direct impact on + # the provisioned and maximum performance of the disk. + volumeSize: 128Gi + + # Uncomment the lines below to use a custom set of extra startup parameters for the coordinator. + #extraStartParams: >- + # -DsomeKey=someValue + + # Extra Init Containers + # Uncomment the below lines to use a custom set of extra init containers for the coordinator. + #extraInitContainers: | + # - name: extra-init-container + # image: {{ $.Values.image }}:{{ $.Values.imageTag }} + # command: ["echo", "Hello World"] + + # Extra Volumes + # Uncomment below to use a custom set of extra volumes for the coordinator. + #extraVolumes: [] + + # Extra Volume Mounts + # Uncomment below to use a custom set of extra volume mounts for the coordinator. + #extraVolumeMounts: [] + + # Uncomment this value to use a different storage class for the coordinator. + #storageClass: + + # These values, when defined, override the provided shared annotations, labels, node selectors, or tolerations. + # Uncomment only if you are trying to override the chart's shared values. + #annotations: {} + #podAnnotations: {} + #labels: {} + #podLabels: {} + #nodeSelector: {} + #tolerations: [] + + # Web UI + web: + port: 9047 + tls: + # To enable TLS for the web UI, set the enabled flag to true and provide + # the appropriate Kubernetes TLS secret. + enabled: false + + # To create a TLS secret, use the following command: + # kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE} + secret: dremio-tls-secret-ui + + # ODBC/JDBC Client + client: + port: 31010 + tls: + # To enable TLS for the client endpoints, set the enabled flag to + # true and provide the appropriate Kubernetes TLS secret. Client + # endpoint encryption is available only on Dremio Enterprise + # Edition and should not be enabled otherwise. + enabled: false + + # To create a TLS secret, use the following command: + # kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE} + secret: dremio-tls-secret-client + +# Dremio Executor +executor: + # CPU & Memory + # Memory allocated to each executor, expressed in MB. + # CPU allocated to each executor, expressed in CPU cores. + cpu: 15 + memory: 122800 + + # Engines + # Engine names be 47 characters or less and be lowercase alphanumber characters or '-'. + # Note: The number of executor pods will be the length of the array below * count. + engines: ["default"] + count: 3 + + # Executor volume size. + volumeSize: 128Gi + + # Uncomment the lines below to use a custom set of extra startup parameters for executors. + #extraStartParams: >- + # -DsomeKey=someValue + + # Extra Init Containers + # Uncomment the below lines to use a custom set of extra init containers for executors. + #extraInitContainers: | + # - name: extra-init-container + # image: {{ $.Values.image }}:{{ $.Values.imageTag }} + # command: ["echo", "Hello World"] + + # Extra Volumes + # Uncomment below to use a custom set of extra volumes for executors. + #extraVolumes: [] + + # Extra Volume Mounts + # Uncomment below to use a custom set of extra volume mounts for executors. + #extraVolumeMounts: [] + + # Uncomment this value to use a different storage class for executors. + #storageClass: + + # Dremio C3 + # Designed for use with NVMe storage devices, performance may be impacted when using + # persistent volume storage that resides far from the physical node. + cloudCache: + enabled: true + + # Uncomment this value to use a different storage class for C3. + #storageClass: + + # Volumes to use for C3, specify multiple volumes if there are more than one local + # NVMe disk that you would like to use for C3. + # + # The below example shows all valid options that can be provided for a volume. + # volumes: + # - name: "dremio-default-c3" + # size: 100Gi + # storageClass: "local-nvme" + volumes: + - size: 100Gi + + # These values, when defined and not empty, override the provided shared annotations, labels, node selectors, or tolerations. + # Uncomment only if you are trying to override the chart's shared values. + #annotations: {} + #podAnnotations: {} + #labels: {} + #podLabels: {} + #nodeSelector: {} + #tolerations: [] + + # Engine Overrides + # + # The settings above are overridable on a per-engine basis. These + # values here will take precedence and *override* the configured values + # on a per-engine basis. Engine ovrrides are matched with the name in the above + # list of engines. + # + # Special per-engine parameters: + # volumeClaimName: For each engine, you can optionally specify a value for the volume claim name, + # this value must be unique to each engine or may cause unintended consequences. This value is + # primarily intended for transitioning an existing single engine to a multi-engine configuration + # where there may already have been existing persistent volumes. + # + # The below example shows all valid options that can be overridden on a per-engine basis. + # engineOverride: + # engineNameHere: + # cpu: 1 + # memory: 122800 + # + # count: 1 + # + # annotations: {} + # podAnnotations: {} + # labels: {} + # podLabels: {} + # nodeSelector: {} + # tolerations: [] + # + # extraStartParams: >- + # -DsomeCustomKey=someCustomValue + # + # extraInitContainers: | + # - name: extra-init-container + # image: {{ $.Values.image }}:{{ $.Values.imageTag }} + # command: ["echo", "Hello World"] + # + # extraVolumes: [] + # extraVolumeMounts: [] + # + # volumeSize: 50Gi + # storageClass: managed-premium + # volumeClaimName: dremio-default-executor-volume + # + # cloudCache: + # enabled: true + # + # storageClass: "" + # + # volumes: + # - name: "default-c3" + # size: 100Gi + # storageClass: "" + +# Zookeeper +zookeeper: + # The Zookeeper image used in the cluster. + image: k8s.gcr.io/kubernetes-zookeeper + imageTag: 1.0-3.4.10 + + # CPU & Memory + # Memory allocated to each zookeeper, expressed in MB. + # CPU allocated to each zookeeper, expressed in CPU cores. + cpu: 0.5 + memory: 1024 + count: 3 + + volumeSize: 10Gi + + # Uncomment this value to use a different storage class for Zookeeper. + #storageClass: + + # These values, when defined, override the provided shared annotations, labels, node selectors, or tolerations. + # Uncomment only if you are trying to override the chart's shared values. + #annotations: {} + #podAnnotations: {} + #labels: {} + #podLabels: {} + #nodeSelector: {} + #tolerations: [] + +# Control where uploaded files are stored for Dremio. +# For more information, see https://docs.dremio.com/deployment/distributed-storage.html +distStorage: + # The supported distributed storage types are: local, aws, azure, or azureStorage. + # + # local: Not recommended for production use. When using local, dist-caching is disabled. + # aws: AWS S3, additional parameters required, see "aws" section. + # azure: ADLS Gen 1, additional parameters required, see "azure" section. + # azureStorage: Azure Storage Gen2, additional paramters required, see "azureStorage" section. + type: "local" + + # AWS S3 + # For more details of S3 configuration, see https://docs.dremio.com/deployment/dist-store-config.html#amazon-s3 + # + # bucketName: The name of the S3 bucket for distributed storage. + # path: The path, relative to the bucket, to create Dremio's directories. + # authentication: Valid types are: accessKeySecret or instanceMetadata. + # - Note: Instance metadata is only supported in AWS EKS and requires that the + # EKS worker node IAM role is configured with sufficient access rights. At this time, + # Dremio does not support using an K8s service account based IAM role. + # credentials: If using accessKeySecret authentication, uncomment the credentials section below. + aws: + bucketName: "AWS Bucket Name" + path: "/" + authentication: "metadata" + # If using accessKeySecret for authentication against S3, uncomment the lines below and use the values + # to configure the appropriate credentials. + # + #credentials: + # accessKey: "AWS Access Key" + # secret: "AWS Secret" + + # Extra Properties + # Use the extra properties block to provide additional parameters to configure the distributed + # storage in the generated core-site.xml file. + # + #extraProperties: | + # + # + # + # + + # Azure ADLS Gen 1 + # For more details of Azure ADLS Gen 1 storage configuration, see + # https://docs.dremio.com/deployment/dist-store-config.html#azure-data-lake-storage-gen1 + # + # datalakeStoreName: The ADLS Gen 1 + azure: + datalakeStoreName: "Azure DataLake Store Name" + path: "/" + credentials: + applicationId: "Azure Application ID" + secret: "Azure Application Secret" + oauth2Endpoint: "Azure OAuth2 Endpoint" + + # Extra Properties + # Use the extra properties block to provide additional parameters to configure the distributed + # storage in the generated core-site.xml file. + # + #extraProperties: | + # + # + # + # + + # Azure Storage Gen2 + # For more details of Azure Storage Gen2 storage configuration, see + # https://docs.dremio.com/deployment/dist-store-config.html#azure-storage + # + # accountName: The name of the storage account. + # filesystem: The name of the blob container to use within the storage account. + # path: The path, relative to the filesystem, to create Dremio's directories. + # credentials: + azureStorage: + accountName: "Azure Storage Account Name" + filesystem: "Azure Storage Account Blob Container" + path: "/" + credentials: + accessKey: "Azure Storage Account Access Key" + + # Extra Properties + # Use the extra properties block to provide additional parameters to configure the distributed + # storage in the generated core-site.xml file. + # + #extraProperties: | + # + # + # + # + +# Dremio Start Parameters +# Uncomment the below lines to provide extra start paramaters to be passed directly to Dremio during startup. +#extraStartParams: >- +# -DsomeKey=someValue + +# Extra Init Containers +# Uncomment the below lines to provide extra init containers to be run first. +#extraInitContainers: | +# - name: extra-init-container +# image: {{ $.Values.image }}:{{ $.Values.imageTag }} +# command: ["echo", "Hello World"] + +# Extra Volumes +# Array to add extra volumes to all Dremio resources. +extraVolumes: [] + +# Extra Volume Mounts +# Array to add extra volume mounts to all Dremio resources, normally used in conjunction wtih extraVolumes. +extraVolumeMounts: [] + +# Dremio Service +# The dremio-client service exposes the service for access outside of the Kubernetes cluster. +service: + type: LoadBalancer + + # These values, when defined and not empty, override the provided shared annotations and labels. + # Uncomment only if you are trying to override the chart's shared values. + #annotations: {} + #labels: {} + + # If the loadBalancer supports sessionAffinity and you have more than one coordinator, + # uncomment the below line to enable session affinity. + #sessionAffinity: ClientIP + + # Enable the following flag if you wish to route traffic through a shared VPC + # for the LoadBalancer's external IP. + # The chart is setup for internal IP support for AKS, EKS, GKE. + # For more information, see https://kubernetes.io/docs/concepts/services-networking/service/#internal-load-balancer + #internalLoadBalancer: true + + # If you have a static IP allocated for your load balancer, uncomment the following + # line and set the IP to provide the static IP used for the load balancer. + # Note: The service type must be set to LoadBalancer for this value to be used. + #loadBalancerIP: 0.0.0.0 + +# To use custom storage class, uncomment below. +# Otherwise the default storage class configured for your K8s cluster is used. +#storageClass: managed-premium + +# For private and protected docker image repository, you should store +# the credentials in a kubernetes secret and provide the secret name +# here. For more information, see +# https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod +#imagePullSecrets: +# - secretname \ No newline at end of file From ccc5de8d68d928dc6ebf24eac2eadc6d003edfd0 Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Fri, 14 Aug 2020 16:32:53 -0700 Subject: [PATCH 26/31] DX-23310: Deprecate Helm Chart v1 Change-Id: I36a84dd6139030f1860b68c870ee2a7e9743dca3 --- charts/dremio/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/charts/dremio/README.md b/charts/dremio/README.md index 8bab30c8..dff18d9e 100644 --- a/charts/dremio/README.md +++ b/charts/dremio/README.md @@ -1,5 +1,11 @@ # Dremio + Kubernetes Cluster Setup +# ⚠️ Deprecation Notice ⚠️ + +***This version of the Dremio Helm chart is deprecated.*** We recommend users transition to the new [Dremio v2 Helm Chart](../dremio_v2). + +Dremio no longer maintains this Helm chart and will not perform additional improvements and bug fixes. The repository is kept for historical reference. + ## Overview This is a Helm chart to deploy a Dremio cluster in kubernetes. It uses From 6f2f2efbbb74f53921bce96add3abecbc65a23ce Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Thu, 27 Aug 2020 16:46:42 -0700 Subject: [PATCH 27/31] DX-24910: Fix storage class in Helm chart. Change-Id: I7cb9bd86c165bf59654ba2c2a26c173d31e3c01d --- charts/dremio_v2/templates/_helpers_coordinator.tpl | 2 +- charts/dremio_v2/templates/_helpers_executor.tpl | 4 ++-- charts/dremio_v2/templates/_helpers_zookeeper.tpl | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/dremio_v2/templates/_helpers_coordinator.tpl b/charts/dremio_v2/templates/_helpers_coordinator.tpl index d7973f06..162ed17a 100644 --- a/charts/dremio_v2/templates/_helpers_coordinator.tpl +++ b/charts/dremio_v2/templates/_helpers_coordinator.tpl @@ -74,7 +74,7 @@ Coordinator - Storage Class {{- define "dremio.coordinator.storageClass" -}} {{- $coordinatorStorageClass := coalesce $.Values.coordinator.storageClass $.Values.storageClass -}} {{- if $coordinatorStorageClass -}} -storageClass: {{ $coordinatorStorageClass }} +storageClassName: {{ $coordinatorStorageClass }} {{- end -}} {{- end -}} diff --git a/charts/dremio_v2/templates/_helpers_executor.tpl b/charts/dremio_v2/templates/_helpers_executor.tpl index bbe5eafb..424518be 100644 --- a/charts/dremio_v2/templates/_helpers_executor.tpl +++ b/charts/dremio_v2/templates/_helpers_executor.tpl @@ -146,7 +146,7 @@ Executor - Persistent Volume Storage Class {{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} {{- $engineStorageClass := coalesce $engineConfiguration.storageClass $context.Values.executor.storageClass $context.Values.storageClass -}} {{- if $engineStorageClass -}} -storageClass: {{ $engineStorageClass }} +storageClassName: {{ $engineStorageClass }} {{- end -}} {{- end -}} @@ -168,7 +168,7 @@ Executor - Cloud Cache Peristent Volume Claims spec: accessModes: ["ReadWriteOnce"] {{- if $volumeStorageClass }} - storageClass: {{ $volumeStorageClass }} + storageClassName: {{ $volumeStorageClass }} {{- end }} resources: requests: diff --git a/charts/dremio_v2/templates/_helpers_zookeeper.tpl b/charts/dremio_v2/templates/_helpers_zookeeper.tpl index dbf01e44..e3cef173 100644 --- a/charts/dremio_v2/templates/_helpers_zookeeper.tpl +++ b/charts/dremio_v2/templates/_helpers_zookeeper.tpl @@ -12,7 +12,7 @@ Zookeeper - Storage Class {{- define "dremio.zookeeper.storageClass" -}} {{- $zookeeperStorageClass := coalesce $.Values.zookeeper.storageClass $.Values.storageClass -}} {{- if $zookeeperStorageClass -}} -storageClass: {{ $zookeeperStorageClass }} +storageClassName: {{ $zookeeperStorageClass }} {{- end -}} {{- end -}} From c8e9c86ffa9491a03ca5a928bcb426d3d801fad7 Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Mon, 5 Oct 2020 09:42:23 -0700 Subject: [PATCH 28/31] DX-25650: Fix context for extraInitContainers. - Fixes the context used for extraInitContainers when templating the user provided value. Change-Id: I866716fa636ac6c50971b623f314a6de79fbee69 --- charts/dremio_v2/templates/_helpers_executor.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/dremio_v2/templates/_helpers_executor.tpl b/charts/dremio_v2/templates/_helpers_executor.tpl index 424518be..dba3d0df 100644 --- a/charts/dremio_v2/templates/_helpers_executor.tpl +++ b/charts/dremio_v2/templates/_helpers_executor.tpl @@ -107,7 +107,7 @@ Executor - Pod Extra Init Containers {{- $engineConfiguration := default (dict) (get (default (dict) $context.Values.executor.engineOverride) $engineName) -}} {{- $engineExtraInitContainers := coalesce $engineConfiguration.extraInitContainers $context.Values.executor.extraInitContainers $context.Values.extraInitContainers -}} {{- if $engineExtraInitContainers -}} -{{ tpl $engineExtraInitContainers $ }} +{{ tpl $engineExtraInitContainers $context }} {{- end -}} {{- end -}} From 4f3a1128b452be4d7f1edc5fc2931138e151ed5c Mon Sep 17 00:00:00 2001 From: Keerat Singh Date: Mon, 19 Oct 2020 16:33:30 -0700 Subject: [PATCH 29/31] DX-25897: Add Flight support to Dremio v2 Helm chart. - Exposed Flight endpoint (default port 32010). - Added TLS configuration support to the Flight endpoint. - Added documentation for Flight. Change-Id: I155543d55e8d78c8b13f244918c5a5f8438d89fe --- charts/dremio_v2/README.md | 30 +++++++++++++ charts/dremio_v2/config/dremio.conf | 6 +++ charts/dremio_v2/docs/Values-Reference.md | 42 ++++++++++++++++++- .../setup/Important-Setup-Considerations.md | 7 ++++ .../templates/dremio-coordinator.yaml | 28 ++++++++++++- charts/dremio_v2/templates/dremio-master.yaml | 28 ++++++++++++- .../templates/dremio-service-client.yaml | 3 ++ charts/dremio_v2/values.yaml | 12 ++++++ 8 files changed, 150 insertions(+), 6 deletions(-) diff --git a/charts/dremio_v2/README.md b/charts/dremio_v2/README.md index 5fa46f46..6030283f 100644 --- a/charts/dremio_v2/README.md +++ b/charts/dremio_v2/README.md @@ -107,3 +107,33 @@ $ kubectl get services dremio-client NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE dremio-client NodePort 10.110.65.97 31010:32390/TCP,9047:30670/TCP 1h ``` + +### Connect to Dremio via Flight + +You can look up the service `dremio-client` in Kubernetes to find the host for Flight connections using the following command: + +```bash +$ kubectl get services dremio-client +``` + +#### Load Balancer Supported +If your Kubernetes cluster supports a `service.type` of `LoadBalancer`, you can access Dremio using Flight via port 32010 on the load balancer's external IP. You can optionally change the exposed port for Flight connections via `values.local.yaml` by setting `coordinator.flight.port`. + +For example, in the output below, the value under the `EXTERNAL-IP` column is `8.8.8.8`. Therefore, you can connect to Dremio using Flight using: `8.8.8.8:32010` + +```bash +$ kubectl get services dremio-client +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +dremio-client LoadBalancer 10.99.227.180 8.8.8.8 31010:32260/TCP,9047:30620/TCP,32010:31357/TCP 2d +``` + +#### Load Balancer Unsupported +If your Kubernetes cluster does not have support for a `service.type` of `LoadBalancer`, you can access Dremio using Flight on the port exposed on the node. + +For example, in the output below, there is no value on the `EXTERNAL-IP` column and the Dremio master is running on node "localhost". Therefore, you can connect to Dremio via Flight using: `localhost:31357` + +```bash +$ kubectl get services dremio-client +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +dremio-client NodePort 10.110.65.97 31010:32390/TCP,9047:30670/TCP,32010:31357/TCP 1h +``` \ No newline at end of file diff --git a/charts/dremio_v2/config/dremio.conf b/charts/dremio_v2/config/dremio.conf index 20991cae..446d5e85 100644 --- a/charts/dremio_v2/config/dremio.conf +++ b/charts/dremio_v2/config/dremio.conf @@ -86,3 +86,9 @@ services.coordinator.client-endpoint.ssl.enabled: true services.coordinator.client-endpoint.ssl.auto-certificate.enabled: false services.coordinator.client-endpoint.ssl.keyStore: "/opt/dremio/tls/client.pkcs12" {{- end }} + +{{- if $.Values.coordinator.flight.tls.enabled }} +services.flight.ssl.enabled: true +services.flight.ssl.auto-certificate.enabled: false +services.flight.ssl.keyStore: "/opt/dremio/tls/flight.pkcs12" +{{- end }} diff --git a/charts/dremio_v2/docs/Values-Reference.md b/charts/dremio_v2/docs/Values-Reference.md index e89b4d6e..d9484aa3 100644 --- a/charts/dremio_v2/docs/Values-Reference.md +++ b/charts/dremio_v2/docs/Values-Reference.md @@ -345,7 +345,7 @@ Type: String By default, the value is set to `dremio-tls-secret-client`. -This value is ignored if `coordinator.web.tls.enabled` is not set to `true`. This value should reference the TLS secret object in Kubernetes that contains the certificate for the client JDBC/ODBC connections. +This value is ignored if `coordinator.client.tls.enabled` is not set to `true`. This value should reference the TLS secret object in Kubernetes that contains the certificate for the client JDBC/ODBC connections. For example, to have TLS enabled for the client JDBC/ODBC connections using a certificate created called `dremio-tls-secret-client`, you can set the configuration as follows: @@ -365,6 +365,42 @@ To create a secret, use the following command: `kubectl create secret tls ${TLS_ More Info: See the [Creating your own Secrets](https://kubernetes.io/docs/concepts/configuration/secret/#creating-your-own-secrets) section of the Secrets documentation for Kubernetes. +### Flight + +#### `coordinator.flight.tls.enabled` + +Type: Boolean + +By default, the value is set to `false`. + +To enable TLS on the Flight port, set this value to `true`. Also, provide a value for `coordinator.flight.tls.secret` that corresponds with the TLS secret that should be used. + +#### `coordinator.flight.tls.secret` + +Type: String + +By default, the value is set to `dremio-tls-secret-flight`. + +This value is ignored if `coordinator.flight.tls.enabled` is not set to `true`. This value should reference the TLS secret object in Kubernetes that contains the certificate for the Flight connections. + +For example, to have TLS enabled for the Flight connections using a certificate created called `dremio-tls-secret-flight`, you can set the configuration as follows: + +```yaml +coordinator: + [...] + flight: + tls: + enabled: true + secret: dremio-tls-secret-flight +[...] +``` + +To create a secret, use the following command: `kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE}` providing appropriate values for `TLS_SECRET_NAME`, `KEY_FILE`, `CERT_FILE`. + +***Note***: Dremio does not support auto-rotation of secrets. To update the secret used by Dremio, restart the coordinator pods to have the new TLS secret take effect. + +More Info: See the [Creating your own Secrets](https://kubernetes.io/docs/concepts/configuration/secret/#creating-your-own-secrets) section of the Secrets documentation for Kubernetes. + ### Annotations, Labels, Node Selectors, Tags, and Tolerations By default, these values are not set. If the value is omitted or set to an empty array/dictionary, this value will be inherited from the top level equivalent. For more information about these configuration values, please refer to the top level equivalents of these values. @@ -1188,7 +1224,9 @@ Type: Boolean By default, this value is not set, which defaults to `false`. -To enable session affinity, set this value to `true`. Session affinity is critical for the web UI when there `coordinator.count` is greater than 0. +To enable session affinity, set this value to `ClientIP`. Session affinity is critical for the web UI when there `coordinator.count` is greater than 0. + +If utilizing Flight, please see [Important Setup Considerations](https://github.com/dremio/dremio-cloud-tools/blob/master/charts/dremio_v2/docs/setup/Important-Setup-Considerations.md) for more information about enabling session affinity. ### Annotations and Labels diff --git a/charts/dremio_v2/docs/setup/Important-Setup-Considerations.md b/charts/dremio_v2/docs/setup/Important-Setup-Considerations.md index c796d56c..2281a309 100644 --- a/charts/dremio_v2/docs/setup/Important-Setup-Considerations.md +++ b/charts/dremio_v2/docs/setup/Important-Setup-Considerations.md @@ -6,5 +6,12 @@ As part of setting up a Dremio cluster on Kubernetes, there are a number of impo * `distStorage.type`: By default, the `distStorage.type` is set to `local`. This **must** be changed prior to production use. We do not recommend users use local distributed storage as part of a production setup. * `volumeSize` and `storageClass`: The size and type of volume used for Dremio has a direct impact on performance. In most Kubernetes providers, volume size has a direct impact on the performance in IOPS and read/write speeds. It is important to check your Kubernetes provider to determine how volume size impacts the performance of your disk. * `executor.cloudCache.storageClass`: Dremio C3 was designed to be used with performant NVMe storage. By default, the chart utilizes the default storage class that is configured on the Kubernetes cluster. For the major Kubernetes providers, NVMe storage is often available on appropriately sized nodes. We recommend utilizing a local storage provisioner to unlock the benefits of NVMe storage available on the physical Kubernetes nodes. For more information, see the [Kubernetes Special Interest Group for Local Static Provisioner](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner). +* `service.sessionAffinity`: By default, the `service.sessionAffinity` is set to `false`. We currently recommend leaving this value as `false` unless you are using Flight, in which case you should consider the following factors: + * When the Flight client is being used and this value is set to `false`, there are cases where the `DoGet` call happens on a different TCP connection than the original `GetFlightInfo` call. + * For the Java Flight client, this happens when a different `ManagedChannel` is used for different `FlightClient` instances for different Dremio Users. + * For the Python Flight client, this happens when a different `FlightClient` is initialized for different Dremio Users. + * In the cases described above, the `DoGet` call goes to a different coordinator than the one that originally created the query plan. + * This causes the query plan to be regenerated, which is less efficient than the case where both the `DoGet` and the `GetFlightInfo` calls go to the same coordinator. + * When `service.sessionAffinity` is set to `true`, all the TCP connections from a particular client IP will be routed to a specific Dremio coordinator. For users who wish to setup a Hive 2/3 source, please see the [Setup Hive 2 and 3](./Setup-Hive-2-and-3.md) documentation. \ No newline at end of file diff --git a/charts/dremio_v2/templates/dremio-coordinator.yaml b/charts/dremio_v2/templates/dremio-coordinator.yaml index 426c9f16..78f1c822 100644 --- a/charts/dremio_v2/templates/dremio-coordinator.yaml +++ b/charts/dremio_v2/templates/dremio-coordinator.yaml @@ -45,7 +45,7 @@ spec: mountPath: /opt/dremio/plugins/connectors/hive3.d - name: dremio-hive3-config mountPath: /opt/dremio/plugins/connectors/hive3-ee.d - {{- if or $.Values.coordinator.web.tls.enabled $.Values.coordinator.client.tls.enabled }} + {{- if or $.Values.coordinator.web.tls.enabled (or $.Values.coordinator.client.tls.enabled $.Values.coordinator.flight.tls.enabled) }} - name: dremio-tls mountPath: /opt/dremio/tls {{- end }} @@ -69,6 +69,8 @@ spec: ports: - containerPort: 31010 name: client + - containerPort: 32010 + name: flight - containerPort: 45678 name: server-fabric - containerPort: 45679 @@ -111,6 +113,18 @@ spec: command: ["/usr/bin/openssl"] args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/client.pkcs12", "-passout", "pass:"] {{- end }} + {{- if $.Values.coordinator.flight.tls.enabled }} + - name: generate-flight-keystore + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-flight + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/flight.pkcs12", "-passout", "pass:"] + {{- end }} volumes: - name: dremio-config configMap: @@ -121,7 +135,7 @@ spec: - name: dremio-hive3-config configMap: name: dremio-hive3-config - {{- if or $.Values.coordinator.web.tls.enabled $.Values.coordinator.client.tls.enabled }} + {{- if or $.Values.coordinator.web.tls.enabled (or $.Values.coordinator.client.tls.enabled $.Values.coordinator.flight.tls.enabled) }} - name: dremio-tls emptyDir: {} {{- end }} @@ -145,6 +159,16 @@ spec: - key: tls.crt path: tls.crt {{- end }} + {{- if $.Values.coordinator.flight.tls.enabled }} + - name: dremio-tls-secret-flight + secret: + secretName: {{ $.Values.coordinator.flight.tls.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} {{- include "dremio.coordinator.extraVolumes" $ | nindent 6 }} {{- include "dremio.imagePullSecrets" $ | nindent 6}} {{- end -}} \ No newline at end of file diff --git a/charts/dremio_v2/templates/dremio-master.yaml b/charts/dremio_v2/templates/dremio-master.yaml index d2e12228..0f30281c 100644 --- a/charts/dremio_v2/templates/dremio-master.yaml +++ b/charts/dremio_v2/templates/dremio-master.yaml @@ -56,7 +56,7 @@ spec: mountPath: /opt/dremio/plugins/connectors/hive3.d - name: dremio-hive3-config mountPath: /opt/dremio/plugins/connectors/hive3-ee.d - {{- if or $.Values.coordinator.web.tls.enabled $.Values.coordinator.client.tls.enabled }} + {{- if or $.Values.coordinator.web.tls.enabled (or $.Values.coordinator.client.tls.enabled $.Values.coordinator.flight.tls.enabled) }} - name: dremio-tls mountPath: /opt/dremio/tls {{- end }} @@ -83,6 +83,8 @@ spec: name: web - containerPort: 31010 name: client + - containerPort: 32010 + name: flight - containerPort: 45678 name: server-fabric - containerPort: 45679 @@ -149,6 +151,18 @@ spec: command: ["/usr/bin/openssl"] args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/client.pkcs12", "-passout", "pass:"] {{- end }} + {{- if $.Values.coordinator.flight.tls.enabled }} + - name: generate-flight-keystore + image: {{ $.Values.image }}:{{ $.Values.imageTag }} + imagePullPolicy: IfNotPresent + volumeMounts: + - name: dremio-tls + mountPath: /opt/dremio/tls + - name: dremio-tls-secret-flight + mountPath: /dremio-tls-secret + command: ["/usr/bin/openssl"] + args: ["pkcs12", "-export", "-inkey", "/dremio-tls-secret/tls.key", "-in", "/dremio-tls-secret/tls.crt", "-out", "/opt/dremio/tls/flight.pkcs12", "-passout", "pass:"] + {{- end }} volumes: - name: dremio-config configMap: @@ -159,7 +173,7 @@ spec: - name: dremio-hive3-config configMap: name: dremio-hive3-config - {{- if or $.Values.coordinator.web.tls.enabled $.Values.coordinator.client.tls.enabled }} + {{- if or $.Values.coordinator.web.tls.enabled (or $.Values.coordinator.client.tls.enabled $.Values.coordinator.flight.tls.enabled) }} - name: dremio-tls emptyDir: {} {{- end }} @@ -183,6 +197,16 @@ spec: - key: tls.crt path: tls.crt {{- end }} + {{- if $.Values.coordinator.flight.tls.enabled }} + - name: dremio-tls-secret-flight + secret: + secretName: {{ $.Values.coordinator.flight.tls.secret }} + items: + - key: tls.key + path: tls.key + - key: tls.crt + path: tls.crt + {{- end }} {{- include "dremio.coordinator.extraVolumes" $ | nindent 6 }} {{- include "dremio.imagePullSecrets" $ | nindent 6 }} volumeClaimTemplates: diff --git a/charts/dremio_v2/templates/dremio-service-client.yaml b/charts/dremio_v2/templates/dremio-service-client.yaml index 16de16e1..7de85817 100644 --- a/charts/dremio_v2/templates/dremio-service-client.yaml +++ b/charts/dremio_v2/templates/dremio-service-client.yaml @@ -15,6 +15,9 @@ spec: - port: {{ $.Values.coordinator.web.port | default 9047 }} targetPort: web name: web + - port: {{ $.Values.coordinator.flight.port | default 32010 }} + targetPort: flight + name: flight selector: app: dremio-coordinator type: {{ $.Values.service.type }} diff --git a/charts/dremio_v2/values.yaml b/charts/dremio_v2/values.yaml index 11667d82..0c75952c 100644 --- a/charts/dremio_v2/values.yaml +++ b/charts/dremio_v2/values.yaml @@ -101,6 +101,18 @@ coordinator: # kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE} secret: dremio-tls-secret-client + # Flight Client + flight: + port: 32010 + tls: + # To enable TLS for the Flight endpoints, set the enabled flag to + # true and provide the appropriate Kubernetes TLS secret. + enabled: false + + # To create a TLS secret, use the following command: + # kubectl create secret tls ${TLS_SECRET_NAME} --key ${KEY_FILE} --cert ${CERT_FILE} + secret: dremio-tls-secret-flight + # Dremio Executor executor: # CPU & Memory From a4c16f6c65a8ed7eb6c2af248e43ca019a8dec37 Mon Sep 17 00:00:00 2001 From: Ryan Tse Date: Tue, 26 Jan 2021 12:41:07 -0800 Subject: [PATCH 30/31] DX-27781: Fix annotations/labels in Helm chart - Fixes the ability to set annotations/labels on StatefulSets in the Helm chart. Change-Id: Ie21980dbd3b626f434c68c517e7a46a6b16450e5 --- charts/dremio_v2/templates/dremio-coordinator.yaml | 4 ++-- charts/dremio_v2/templates/dremio-executor.yaml | 4 ++-- charts/dremio_v2/templates/dremio-master.yaml | 4 ++-- charts/dremio_v2/templates/zookeeper.yaml | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/charts/dremio_v2/templates/dremio-coordinator.yaml b/charts/dremio_v2/templates/dremio-coordinator.yaml index 78f1c822..8cd36e88 100644 --- a/charts/dremio_v2/templates/dremio-coordinator.yaml +++ b/charts/dremio_v2/templates/dremio-coordinator.yaml @@ -3,6 +3,8 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: dremio-coordinator + {{- include "dremio.coordinator.annotations" $ | nindent 2 }} + {{- include "dremio.coordinator.labels" $ | nindent 2 }} spec: serviceName: "dremio-cluster-pod" replicas: {{ $.Values.coordinator.count }} @@ -11,8 +13,6 @@ spec: selector: matchLabels: app: dremio-coordinator - {{- include "dremio.coordinator.annotations" $ | nindent 2 }} - {{- include "dremio.coordinator.labels" $ | nindent 2 }} template: metadata: labels: diff --git a/charts/dremio_v2/templates/dremio-executor.yaml b/charts/dremio_v2/templates/dremio-executor.yaml index beea2890..6ef3a2cf 100644 --- a/charts/dremio_v2/templates/dremio-executor.yaml +++ b/charts/dremio_v2/templates/dremio-executor.yaml @@ -6,6 +6,8 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: {{ $executorName }} + {{- include "dremio.executor.annotations" (list $ $engineName) | nindent 2}} + {{- include "dremio.executor.labels" (list $ $engineName) | nindent 2}} spec: serviceName: "dremio-cluster-pod" replicas: {{ template "dremio.executor.count" (list $ $engineName) }} @@ -14,8 +16,6 @@ spec: selector: matchLabels: app: {{ $executorName }} - {{- include "dremio.executor.annotations" (list $ $engineName) | nindent 2}} - {{- include "dremio.executor.labels" (list $ $engineName) | nindent 2}} template: metadata: labels: diff --git a/charts/dremio_v2/templates/dremio-master.yaml b/charts/dremio_v2/templates/dremio-master.yaml index 0f30281c..af4cdebb 100644 --- a/charts/dremio_v2/templates/dremio-master.yaml +++ b/charts/dremio_v2/templates/dremio-master.yaml @@ -3,6 +3,8 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: dremio-master + {{- include "dremio.coordinator.annotations" $ | nindent 2}} + {{- include "dremio.coordinator.labels" $ | nindent 2}} spec: serviceName: "dremio-cluster-pod" podManagementPolicy: "Parallel" @@ -10,8 +12,6 @@ spec: selector: matchLabels: app: dremio-coordinator - {{- include "dremio.coordinator.annotations" $ | nindent 2}} - {{- include "dremio.coordinator.labels" $ | nindent 2}} template: metadata: labels: diff --git a/charts/dremio_v2/templates/zookeeper.yaml b/charts/dremio_v2/templates/zookeeper.yaml index c94e17de..69cd19cc 100644 --- a/charts/dremio_v2/templates/zookeeper.yaml +++ b/charts/dremio_v2/templates/zookeeper.yaml @@ -44,6 +44,8 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: zk + {{- include "dremio.zookeeper.annotations" $ | nindent 2 }} + {{- include "dremio.zookeeper.labels" $ | nindent 2 }} spec: selector: matchLabels: @@ -53,8 +55,6 @@ spec: updateStrategy: type: RollingUpdate podManagementPolicy: Parallel - {{- include "dremio.zookeeper.annotations" $ | nindent 2 }} - {{- include "dremio.zookeeper.labels" $ | nindent 2 }} template: metadata: labels: From c3bf91597919d7ac6422d199ac94d9cb4c85a98b Mon Sep 17 00:00:00 2001 From: Stephen Layland Date: Tue, 13 Apr 2021 23:35:27 -0700 Subject: [PATCH 31/31] Fix indentation for zookeeper annotations --- charts/dremio_v2/templates/zookeeper.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/dremio_v2/templates/zookeeper.yaml b/charts/dremio_v2/templates/zookeeper.yaml index 69cd19cc..f5c87b9b 100644 --- a/charts/dremio_v2/templates/zookeeper.yaml +++ b/charts/dremio_v2/templates/zookeeper.yaml @@ -60,7 +60,7 @@ spec: labels: app: zk {{- include "dremio.zookeeper.podLabels" $ | nindent 8 }} - {{- include "dremio.zookeeper.podAnnotations" $ | nindent 8 }} + {{- include "dremio.zookeeper.podAnnotations" $ | nindent 6 }} spec: affinity: podAntiAffinity: @@ -136,4 +136,4 @@ spec: resources: requests: storage: {{ $.Values.zookeeper.volumeSize }} -{{- end -}} \ No newline at end of file +{{- end -}}