Skip to content

Commit

Permalink
Check is any PVCs are resizing and in requeue in that case. Also, add…
Browse files Browse the repository at this point in the history
… ability to verify if StorageClass supports resizing

Add resize functionality, new DatacenterCondition to indicate Resizing is happening

Patch the PVC and reconcile in 10 seconds

Fix existing tests

Add validation test

Implement unit tests and fix the implementation of expansion

Let VoluemResize actually just continue instead of waiting for the PVC resize, we will do a requeue and check there before recreating the StatefulSet

Add RBAC role for StorageClasses

Fix lint issues, update versions of tools

Add e2e test for the PVC expansion using TopoLVM

Disable StorageConfig webhook validation failure

Improve failure logs with dump of pvc and pv also

Move datacenterCondition reset for ResizingVolumes

Log also StatefulSets, add annotation to allow StorageConfig changes

Add validation if PVC expansion is allowed, modify the behavior to keep the existing PersistentVolumeClaims in the StS, but modifying the sizes only

Update controller-runtime to 0.17.4 and update some logging

Update docker/build-push-action from v4 to v6

Add CHANGELOG, return all the other e2e tests, update Cassandra/DSE versions

Modify the annotation check to happen in the CheckVolumeClaimSizes() instead of CheckRackPodTemplate() to ensure rest of the validations would still always happen. Also, modify the envtests to use more re-usable AsyncAssertions.

Add new events to indicate when Datacenter was set to Valid: False
  • Loading branch information
burmanm committed Jul 11, 2024
1 parent 2a9cbd7 commit 56fca3e
Show file tree
Hide file tree
Showing 29 changed files with 1,131 additions and 231 deletions.
113 changes: 101 additions & 12 deletions .github/workflows/kindIntegTest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ on:
push:
branches:
- master
- 1.10.x
pull_request:
branches: [master]
jobs:
Expand All @@ -23,7 +22,7 @@ jobs:
uses: docker/setup-buildx-action@v3
- name: Build and push
id: docker_build
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
file: Dockerfile
context: .
Expand All @@ -34,7 +33,7 @@ jobs:
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
- name: Build and push
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
file: logger.Dockerfile
push: false
Expand Down Expand Up @@ -83,7 +82,7 @@ jobs:
with:
name: k8s-logs-${{ matrix.integration_test }}
path: ./build/kubectl_dump
# This job is only for tests that don't run or don't pass against 4.1 yet
# # This job is only for tests that don't run or don't pass against 4.1 yet
kind_40_tests:
needs: build_docker_images
strategy:
Expand Down Expand Up @@ -118,12 +117,12 @@ jobs:
strategy:
matrix:
version:
- "6.8.46"
- "6.8.49"
integration_test:
- cdc_successful
include:
- version: 6.8.46
serverImage: datastax/dse-mgmtapi-6_8:6.8.46-ubi8 # DSE 6.8.46
- version: 6.8.49
serverImage: datastax/dse-mgmtapi-6_8:6.8.49-ubi8 # DSE 6.8.49
serverType: dse
integration_test: "cdc_successful"
fail-fast: true
Expand Down Expand Up @@ -234,14 +233,14 @@ jobs:
matrix:
version:
- "3.11.17"
- "4.0.12"
- "4.1.4"
- "6.8.46"
- "4.0.13"
- "4.1.5"
- "6.8.49"
integration_test:
- test_all_the_things
include:
- version: 6.8.46
serverImage: datastax/dse-mgmtapi-6_8:6.8.46-ubi8 # DSE 6.8.46
- version: 6.8.49
serverImage: datastax/dse-mgmtapi-6_8:6.8.49-ubi8 # DSE 6.8.49
serverType: dse
integration_test: "test_all_the_things"
fail-fast: true
Expand All @@ -268,3 +267,93 @@ jobs:
with:
name: k8s-logs-smoke_test-${{ matrix.version }}
path: ./build/kubectl_dump
kind_topolvm_tests:
name: TopoLVM kind installation with volumeExpansion
needs: build_docker_images
strategy:
matrix:
version:
- "4.1.5"
integration_test:
- pvc_expansion
fail-fast: true
runs-on: ubuntu-latest
env:
CGO_ENABLED: 0
M_INTEG_DIR: ${{ matrix.integration_test }}
M_SERVER_VERSION: ${{ matrix.version }}
steps:
- name: Install necessary tools for LVM setup
run: |
sudo apt-get update
sudo apt-get install -y lvm2 xfsprogs thin-provisioning-tools
- name: Check out code into the Go module directory
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Check out code into the Go module directory
uses: actions/checkout@v4
with:
repository: topolvm/topolvm
path: topolvm
ref: topolvm-chart-v15.0.0
- name: Create LVM from TopoLVM's example setup
run: |
cd topolvm/example
mkdir -p build
mkdir -p bin
make start-lvmd
make KIND=$(type -a -P kind) launch-kind
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'
cache: true
- name: Install TopoLVM controller
run: |
make cert-manager
helm repo add topolvm https://topolvm.github.io/topolvm
helm repo update
kubectl create namespace topolvm-system
kubectl label namespace topolvm-system topolvm.io/webhook=ignore
kubectl label namespace kube-system topolvm.io/webhook=ignore
helm install --namespace=topolvm-system topolvm topolvm/topolvm -f topolvm/example/values.yaml
kubectl wait --for=condition=available --timeout=120s -n topolvm-system deployments/topolvm-controller
kubectl wait --for=condition=ready --timeout=120s -n topolvm-system -l="app.kubernetes.io/component=controller,app.kubernetes.io/name=topolvm" pod
kubectl wait --for=condition=ready --timeout=120s -n topolvm-system certificate/topolvm-mutatingwebhook
- name: Link tools
shell: bash
run: |
mkdir bin
ln -s /usr/local/bin/kustomize bin/kustomize
- name: Download cass-operator image
uses: actions/download-artifact@v4
with:
name: cass-operator
path: /tmp
- name: Download system-logger image
uses: actions/download-artifact@v4
with:
name: system-logger
path: /tmp
- name: Load Docker images
shell: bash
id: load
run: |
echo "operator_img=$(docker load --input /tmp/k8ssandra-cass-operator.tar | cut -f 3 -d' ')" >> $GITHUB_OUTPUT
echo "logger_img=$(docker load --input /tmp/k8ssandra-system-logger.tar | cut -f 3 -d' ')" >> $GITHUB_OUTPUT
- name: Load image on the nodes of the cluster
shell: bash
run: |
kind load docker-image --name=topolvm-example ${{ steps.load.outputs.operator_img }}
kind load docker-image --name=topolvm-example ${{ steps.load.outputs.logger_img }}
- name: Run integration test
shell: bash
run: |
IMG=${{ steps.load.outputs.operator_img }} LOG_IMG=${{ steps.load.outputs.logger_img }} make integ-test
- name: Archive k8s logs
# if: ${{ failure() }}
uses: actions/upload-artifact@v4
with:
name: k8s-logs-topolvm-test-${{ matrix.version }}
path: ./build/kubectl_dump
6 changes: 3 additions & 3 deletions .github/workflows/operatorBuildAndDeploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
echo "version=$(make version)" >> $GITHUB_OUTPUT
- name: Build and push
id: docker_build_cass_operator
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
file: Dockerfile
context: .
Expand All @@ -83,7 +83,7 @@ jobs:
cache-to: type=local,dest=/tmp/.buildx-cache
- name: Build and push
id: docker_build_system_logger
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
file: logger.Dockerfile
push: ${{ github.event_name != 'pull_request' }}
Expand All @@ -99,7 +99,7 @@ jobs:
bin/operator-sdk bundle validate ./bundle --select-optional name=good-practices
- name: Build and push cass-operator-bundle
id: docker_build_cass-operator_bundle
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
file: bundle.Dockerfile
build-args: |
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
echo "TARGET_VERSION=$(echo ${GITHUB_REF#refs/tags/} | awk '{print substr($0,2)}')" >> $GITHUB_ENV
- name: Build system-logger
id: docker_build_system-logger
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
file: logger.Dockerfile
build-args: |
Expand All @@ -58,7 +58,7 @@ jobs:
cache-to: type=local,dest=/tmp/.buildx-cache
- name: Build cass-operator
id: docker_build_cass-operator
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
file: Dockerfile
build-args: |
Expand All @@ -71,7 +71,7 @@ jobs:
cache-to: type=local,dest=/tmp/.buildx-cache
- name: Push system-logger
id: docker_push_system-logger
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
file: logger.Dockerfile
build-args: |
Expand All @@ -84,7 +84,7 @@ jobs:
cache-to: type=local,dest=/tmp/.buildx-cache
- name: Push cass-operator
id: docker_push_cass-operator
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
file: Dockerfile
build-args: |
Expand Down Expand Up @@ -134,7 +134,7 @@ jobs:
- name: Build and push cass-operator-bundle
id: docker_build_cass-operator_bundle
if: ${{ !env.ACT }}
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
file: bundle.Dockerfile
build-args: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/workflow-integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
uses: docker/setup-buildx-action@v3
- name: Build and push
id: docker_build
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
file: Dockerfile
context: .
Expand All @@ -36,7 +36,7 @@ jobs:
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
- name: Build and push
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
file: logger.Dockerfile
push: false
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Changelog for Cass Operator, new PRs should update the `main / unreleased` secti

## unreleased

* [FEATURE] [#263]((https://github.com/k8ssandra/cass-operator/issues/263) Allow increasing the size of CassandraDataVolumeClaimSpec if the selected StorageClass supports it. This feature is currently behind a opt-in feature flag and requires an annotation ``cassandra.datastax.com/allow-storage-changes: true`` to be set in the CassandraDatacenter.
* [ENHANCEMENT] [#648](https://github.com/k8ssandra/cass-operator/issues/648) Make MinReadySeconds configurable value in the Spec.
* [FEATURE] [#646](https://github.com/k8ssandra/cass-operator/issues/646) Allow starting multiple parallel pods if they have already previously bootstrapped and not planned for replacement. Set annotation ``cassandra.datastax.com/allow-parallel-starts: true`` to enable this feature.

Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build the manager binary
FROM golang:1.20 as builder
FROM golang:1.22 as builder
ARG TARGETOS
ARG TARGETARCH

Expand Down
18 changes: 9 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ IMG ?= $(IMAGE_TAG_BASE):v$(VERSION)
# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)
CRD_OPTIONS ?= "crd:generateEmbeddedObjectMeta=true"
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.28.x
ENVTEST_K8S_VERSION = 1.30.x

# Logger image
LOG_IMG_BASE ?= $(ORG)/system-logger
Expand Down Expand Up @@ -139,9 +139,9 @@ test: manifests generate fmt vet lint envtest ## Run tests.
integ-test: kustomize cert-manager helm ## Run integration tests from directory M_INTEG_DIR or set M_INTEG_DIR=all to run all the integration tests.
ifeq ($(M_INTEG_DIR), all)
# Run all the tests (exclude kustomize & testdata directories)
cd tests && go test -v ./... -timeout 300m --ginkgo.show-node-events --ginkgo.v
cd tests && go test -v ./... -timeout 60m --ginkgo.show-node-events --ginkgo.v
else
cd tests/${M_INTEG_DIR} && go test -v ./... -timeout 300m --ginkgo.show-node-events --ginkgo.v
cd tests/${M_INTEG_DIR} && go test -v ./... -timeout 60m --ginkgo.show-node-events --ginkgo.v
endif

.PHONY: version
Expand Down Expand Up @@ -239,13 +239,13 @@ HELM ?= $(LOCALBIN)/helm
OPM ?= $(LOCALBIN)/opm

## Tool Versions
CERT_MANAGER_VERSION ?= v1.14.3
KUSTOMIZE_VERSION ?= v5.3.0
CONTROLLER_TOOLS_VERSION ?= v0.14.0
OPERATOR_SDK_VERSION ?= 1.34.1
CERT_MANAGER_VERSION ?= v1.14.7
KUSTOMIZE_VERSION ?= v5.4.2
CONTROLLER_TOOLS_VERSION ?= v0.15.0
OPERATOR_SDK_VERSION ?= 1.35.0
HELM_VERSION ?= 3.14.2
OPM_VERSION ?= 1.36.0
GOLINT_VERSION ?= 1.55.2
OPM_VERSION ?= 1.38.0
GOLINT_VERSION ?= 1.59.1

.PHONY: cert-manager
cert-manager: ## Install cert-manager to the cluster
Expand Down
28 changes: 16 additions & 12 deletions apis/cassandra/v1beta1/cassandradatacenter_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ const (
// AllowParallelStartsAnnotations allows the operator to start multiple server nodes at the same time if they have already bootstrapped.
AllowParallelStartsAnnotations = "cassandra.datastax.com/allow-parallel-starts"

// AllowStorageChangesAnnotation indicates the CassandraDatacenter StorageConfig can be modified for existing datacenters
AllowStorageChangesAnnotation = "cassandra.datastax.com/allow-storage-changes"

AllowUpdateAlways AllowUpdateType = "always"
AllowUpdateOnce AllowUpdateType = "once"

Expand Down Expand Up @@ -385,18 +388,19 @@ type CassandraStatusMap map[string]CassandraNodeStatus
type DatacenterConditionType string

const (
DatacenterReady DatacenterConditionType = "Ready"
DatacenterInitialized DatacenterConditionType = "Initialized"
DatacenterReplacingNodes DatacenterConditionType = "ReplacingNodes"
DatacenterScalingUp DatacenterConditionType = "ScalingUp"
DatacenterScalingDown DatacenterConditionType = "ScalingDown"
DatacenterUpdating DatacenterConditionType = "Updating"
DatacenterStopped DatacenterConditionType = "Stopped"
DatacenterResuming DatacenterConditionType = "Resuming"
DatacenterRollingRestart DatacenterConditionType = "RollingRestart"
DatacenterValid DatacenterConditionType = "Valid"
DatacenterDecommission DatacenterConditionType = "Decommission"
DatacenterRequiresUpdate DatacenterConditionType = "RequiresUpdate"
DatacenterReady DatacenterConditionType = "Ready"
DatacenterInitialized DatacenterConditionType = "Initialized"
DatacenterReplacingNodes DatacenterConditionType = "ReplacingNodes"
DatacenterScalingUp DatacenterConditionType = "ScalingUp"
DatacenterScalingDown DatacenterConditionType = "ScalingDown"
DatacenterUpdating DatacenterConditionType = "Updating"
DatacenterStopped DatacenterConditionType = "Stopped"
DatacenterResuming DatacenterConditionType = "Resuming"
DatacenterRollingRestart DatacenterConditionType = "RollingRestart"
DatacenterValid DatacenterConditionType = "Valid"
DatacenterDecommission DatacenterConditionType = "Decommission"
DatacenterRequiresUpdate DatacenterConditionType = "RequiresUpdate"
DatacenterResizingVolumes DatacenterConditionType = "ResizingVolumes"

// DatacenterHealthy indicates if QUORUM can be reached from all deployed nodes.
// If this check fails, certain operations such as scaling up will not proceed.
Expand Down
20 changes: 11 additions & 9 deletions apis/cassandra/v1beta1/cassandradatacenter_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ import (
"encoding/json"
"errors"
"fmt"
"reflect"
"strings"

"github.com/google/go-cmp/cmp"
"github.com/k8ssandra/cass-operator/pkg/images"

apiequality "k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -173,16 +173,18 @@ func ValidateDatacenterFieldChanges(oldDc CassandraDatacenter, newDc CassandraDa
return attemptedTo("change serviceAccount")
}

// CassandraDataVolumeClaimSpec changes are disallowed
oldClaimSpec := oldDc.Spec.StorageConfig.CassandraDataVolumeClaimSpec.DeepCopy()
newClaimSpec := newDc.Spec.StorageConfig.CassandraDataVolumeClaimSpec.DeepCopy()

if !reflect.DeepEqual(oldDc.Spec.StorageConfig.CassandraDataVolumeClaimSpec, newDc.Spec.StorageConfig.CassandraDataVolumeClaimSpec) {
return attemptedTo("change storageConfig.CassandraDataVolumeClaimSpec")
// CassandraDataVolumeClaimSpec changes are disallowed
if metav1.HasAnnotation(newDc.ObjectMeta, AllowStorageChangesAnnotation) && newDc.Annotations[AllowStorageChangesAnnotation] == "true" {
// If the AllowStorageChangesAnnotation is set, we allow changes to the CassandraDataVolumeClaimSpec sizes, but not other fields
oldClaimSpec.Resources.Requests = newClaimSpec.Resources.Requests
}

if oldDc.Spec.StorageConfig.CassandraDataVolumeClaimSpec != nil {
if !reflect.DeepEqual(*oldDc.Spec.StorageConfig.CassandraDataVolumeClaimSpec, *newDc.Spec.StorageConfig.CassandraDataVolumeClaimSpec) {
return attemptedTo("change storageConfig.CassandraDataVolumeClaimSpec")
}
if !apiequality.Semantic.DeepEqual(oldClaimSpec, newClaimSpec) {
pvcSourceDiff := cmp.Diff(oldClaimSpec, newClaimSpec)
return attemptedTo("change storageConfig.CassandraDataVolumeClaimSpec, diff: %s", pvcSourceDiff)
}

// Topology changes - Racks
Expand Down
Loading

0 comments on commit 56fca3e

Please sign in to comment.