Skip to content

Commit

Permalink
Add e2e test, fix bug with possible nil pointer if Pod never got the …
Browse files Browse the repository at this point in the history
…first StartTime status.
  • Loading branch information
burmanm committed Dec 4, 2024
1 parent 139fbf8 commit a4ae065
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/kindIntegTest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ jobs:
- webhook_validation
# Three worker tests:
- canary_upgrade
# - config_change_condition # config_change takes care of testing the same
- config_change_condition
#- cdc_successful # OSS only
# - delete_node_lost_readiness # DSE specific behavior
- host_network
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/workflow-integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ jobs:
- webhook_validation
# Three worker tests:
# - canary_upgrade # See kind_40_tests job
# - config_change_condition # config_change takes care of the same testing
- config_change_condition
# - cdc_successful # CDC is OSS only , see kind_311_tests and kind_40_tests jobs
# - delete_node_lost_readiness # DSE specific behavior see kind_dse_tests job
- host_network
Expand Down
2 changes: 1 addition & 1 deletion pkg/reconciliation/reconcile_racks.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ func (rc *ReconciliationContext) failureModeDetection() bool {
continue
}
if pod.Status.Phase == corev1.PodPending {
if hasBeenXMinutes(5, pod.Status.StartTime.Time) {
if pod.Status.StartTime == nil || hasBeenXMinutes(5, pod.Status.StartTime.Time) {
// Pod has been over 5 minutes in Pending state. This can be normal, but lets see
// if we have some detected failures events like FailedScheduling
events := &corev1.EventList{}
Expand Down
32 changes: 22 additions & 10 deletions tests/config_change_condition/config_change_condition_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ import (
)

var (
testName = "Config change condition"
namespace = "test-config-change-condition"
dcName = "dc2"
dcYaml = "../testdata/default-single-rack-2-node-dc.yaml"
dcResource = fmt.Sprintf("CassandraDatacenter/%s", dcName)
ns = ginkgo_util.NewWrapper(testName, namespace)
testName = "Config change condition with failure"
namespace = "test-config-change-condition"
dcName = "dc1"
clusterName = "cluster1"
dcYaml = "../testdata/default-three-rack-three-node-dc-zones.yaml"
dcResource = fmt.Sprintf("CassandraDatacenter/%s", dcName)
ns = ginkgo_util.NewWrapper(testName, namespace)
)

func TestLifecycle(t *testing.T) {
Expand Down Expand Up @@ -55,22 +56,33 @@ var _ = Describe(testName, func() {

ns.WaitForOperatorReady()

step := "creating a datacenter resource with 1 racks/2 nodes"
step := "creating a datacenter resource with 3 racks/3 nodes using unavailable zones"
testFile, err := ginkgo_util.CreateTestFile(dcYaml)
Expect(err).ToNot(HaveOccurred())

k := kubectl.ApplyFiles(testFile)
ns.ExecAndLog(step, k)

ns.WaitForDatacenterReady(dcName)
// Wait for status to be Unschedulable
step = "waiting the nodes to be unschedulable"
json := `jsonpath={.status.conditions[?(@.type=="PodScheduled")].status}`
k = kubectl.Get(fmt.Sprintf("pod/%s-%s-r1-sts-0", clusterName, dcName)).
FormatOutput(json)
ns.WaitForOutputContains(k, "False", 30)

Check failure on line 71 in tests/config_change_condition/config_change_condition_suite_test.go

View workflow job for this annotation

GitHub Actions / Run unit tests

Error return value of `ns.WaitForOutputContains` is not checked (errcheck)

json = `jsonpath={.status.conditions[?(@.type=="PodScheduled")].reason}`
k = kubectl.Get(fmt.Sprintf("pod/%s-%s-r1-sts-0", clusterName, dcName)).
FormatOutput(json)
ns.WaitForOutputContainsAndLog(step, k, "Unschedulable", 30)

step = "change the config"
json := ginkgo_util.CreateTestJson("{\"spec\": {\"config\": {\"cassandra-yaml\": {\"roles_validity\": \"256000ms\"}, \"jvm-server-options\": {\"garbage_collector\": \"CMS\"}}}}")
step = "change the config by removing zones"
json = `{"spec": { "racks": [{"name": "r1"}, {"name": "r2"}, {"name": "r3"}]}}`
k = kubectl.PatchMerge(dcResource, json)
ns.ExecAndLog(step, k)

ns.WaitForDatacenterCondition(dcName, "Updating", string(corev1.ConditionTrue))
ns.WaitForDatacenterCondition(dcName, "Updating", string(corev1.ConditionFalse))
ns.WaitForDatacenterReady(dcName)
ns.WaitForDatacenterOperatorProgress(dcName, "Ready", 1800)
})
})
Expand Down
54 changes: 54 additions & 0 deletions tests/testdata/default-three-rack-three-node-dc-zones.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
apiVersion: cassandra.datastax.com/v1beta1
kind: CassandraDatacenter
metadata:
name: dc1
spec:
clusterName: cluster1
serverType: cassandra
serverVersion: "5.0.2"
managementApiAuth:
insecure: {}
size: 3
storageConfig:
cassandraDataVolumeClaimSpec:
storageClassName: standard
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
racks:
- name: r1
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: topology.kubernetes.io/zone
operator: In
values:
- europe-north1-a
- name: r2
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: topology.kubernetes.io/zone
operator: In
values:
- europe-north1-b
- name: r3
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: topology.kubernetes.io/zone
operator: In
values:
- europe-north1-c
config:
jvm-options:
initial_heap_size: "512m"
max_heap_size: "512m"

0 comments on commit a4ae065

Please sign in to comment.