From 67eee3ff6a81f9a9ab59bebe61fec9fc98cac332 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Tue, 17 Jan 2023 17:56:27 +1100 Subject: [PATCH 1/9] Start stubbing out methods for agent config. --- apis/telemetry/v1alpha1/telemetry_types.go | 10 ++++++++++ pkg/telemetry/cassandra_agent_config.go | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 pkg/telemetry/cassandra_agent_config.go diff --git a/apis/telemetry/v1alpha1/telemetry_types.go b/apis/telemetry/v1alpha1/telemetry_types.go index a935ec012..515b4c55b 100644 --- a/apis/telemetry/v1alpha1/telemetry_types.go +++ b/apis/telemetry/v1alpha1/telemetry_types.go @@ -138,3 +138,13 @@ type Endpoint struct { Address string `json:"address,omitempty"` Port string `json:"port,omitempty"` } + +type CassandraTelemetryAgentSpec struct { + Endpoint TelemetryAgentEndpoint `json:"endpoint,omitempty"` + Filters promapi.RelabelConfig `json:"filters,omitempty"` +} + +type TelemetryAgentEndpoint struct { + Address string `json:"address,omitempty"` + Port string `json:"port,omitempty"` +} diff --git a/pkg/telemetry/cassandra_agent_config.go b/pkg/telemetry/cassandra_agent_config.go new file mode 100644 index 000000000..6175763d4 --- /dev/null +++ b/pkg/telemetry/cassandra_agent_config.go @@ -0,0 +1,19 @@ +package telemetry + +import ( + "context" + + telemetryapi "github.com/k8ssandra/k8ssandra-operator/apis/telemetry/v1alpha1" + "gopkg.in/yaml.v2" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func ReconcileTelemetryAgentConfigMap(ctx context.Context, remoteClient client.Client, telemetrySpec telemetryapi.TelemetrySpec) error { + yamlData, err := yaml.Marshal(&telemetrySpec.Cassandra) + if err != nil { + return err + } + cm := corev1.ConfigMap{} + +} From a823dd1ac1efbb328b3bb15ee9c4131f59d6a485 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Fri, 20 Jan 2023 16:08:10 +1100 Subject: [PATCH 2/9] Add reconciliation logic, tests. --- apis/telemetry/v1alpha1/telemetry_types.go | 4 ++-- pkg/telemetry/cassandra_agent_config.go | 19 ------------------- 2 files changed, 2 insertions(+), 21 deletions(-) delete mode 100644 pkg/telemetry/cassandra_agent_config.go diff --git a/apis/telemetry/v1alpha1/telemetry_types.go b/apis/telemetry/v1alpha1/telemetry_types.go index 515b4c55b..dbd812665 100644 --- a/apis/telemetry/v1alpha1/telemetry_types.go +++ b/apis/telemetry/v1alpha1/telemetry_types.go @@ -140,8 +140,8 @@ type Endpoint struct { } type CassandraTelemetryAgentSpec struct { - Endpoint TelemetryAgentEndpoint `json:"endpoint,omitempty"` - Filters promapi.RelabelConfig `json:"filters,omitempty"` + Endpoint TelemetryAgentEndpoint `json:"endpoint,omitempty"` + Filters []promapi.RelabelConfig `json:"filters,omitempty"` } type TelemetryAgentEndpoint struct { diff --git a/pkg/telemetry/cassandra_agent_config.go b/pkg/telemetry/cassandra_agent_config.go deleted file mode 100644 index 6175763d4..000000000 --- a/pkg/telemetry/cassandra_agent_config.go +++ /dev/null @@ -1,19 +0,0 @@ -package telemetry - -import ( - "context" - - telemetryapi "github.com/k8ssandra/k8ssandra-operator/apis/telemetry/v1alpha1" - "gopkg.in/yaml.v2" - corev1 "k8s.io/api/core/v1" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -func ReconcileTelemetryAgentConfigMap(ctx context.Context, remoteClient client.Client, telemetrySpec telemetryapi.TelemetrySpec) error { - yamlData, err := yaml.Marshal(&telemetrySpec.Cassandra) - if err != nil { - return err - } - cm := corev1.ConfigMap{} - -} From 47ff0770a83e6526982277c0a2cb8b29e29ec6c6 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Fri, 20 Jan 2023 16:19:42 +1100 Subject: [PATCH 3/9] CRD upgrades, codegen upgrades. --- apis/telemetry/v1alpha1/zz_generated.deepcopy.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/apis/telemetry/v1alpha1/zz_generated.deepcopy.go b/apis/telemetry/v1alpha1/zz_generated.deepcopy.go index 54da6841b..543b4c55a 100644 --- a/apis/telemetry/v1alpha1/zz_generated.deepcopy.go +++ b/apis/telemetry/v1alpha1/zz_generated.deepcopy.go @@ -121,6 +121,21 @@ func (in *PrometheusTelemetrySpec) DeepCopy() *PrometheusTelemetrySpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TelemetryAgentEndpoint) DeepCopyInto(out *TelemetryAgentEndpoint) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TelemetryAgentEndpoint. +func (in *TelemetryAgentEndpoint) DeepCopy() *TelemetryAgentEndpoint { + if in == nil { + return nil + } + out := new(TelemetryAgentEndpoint) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TelemetrySpec) DeepCopyInto(out *TelemetrySpec) { *out = *in From e54491bfce30e9d3a437df8b01ce4bb10d919206 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Mon, 23 Jan 2023 13:54:52 +1100 Subject: [PATCH 4/9] Make type names more concise. --- .DS_Store | Bin 6148 -> 0 bytes apis/telemetry/v1alpha1/telemetry_types.go | 10 -- .../v1alpha1/zz_generated.deepcopy.go | 15 --- .../cassandra_agent/cassandra_agent_config.go | 89 ++++++++++++++++++ 4 files changed, 89 insertions(+), 25 deletions(-) diff --git a/.DS_Store b/.DS_Store index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 Date: Tue, 14 Feb 2023 18:36:09 +1100 Subject: [PATCH 5/9] The Endpoint type within the CassandraAgentSpec needs to be a pointer as it must be optional from the perspective of the CR so that it can have defaulting behaviour in the controller. --- apis/telemetry/v1alpha1/telemetry_types.go | 2 +- .../v1alpha1/zz_generated.deepcopy.go | 6 +- .../cassandra_agent/cassandra_agent_config.go | 8 +- .../cassandra_agent_config_test.go | 173 +++++++++++++++++- 4 files changed, 178 insertions(+), 11 deletions(-) diff --git a/apis/telemetry/v1alpha1/telemetry_types.go b/apis/telemetry/v1alpha1/telemetry_types.go index a935ec012..b8db79ebf 100644 --- a/apis/telemetry/v1alpha1/telemetry_types.go +++ b/apis/telemetry/v1alpha1/telemetry_types.go @@ -130,7 +130,7 @@ type McacTelemetrySpec struct { } type CassandraAgentSpec struct { - Endpoint Endpoint `json:"endpoint,omitempty"` + Endpoint *Endpoint `json:"endpoint,omitempty"` Filters []promapi.RelabelConfig `json:"filters,omitempty"` } diff --git a/apis/telemetry/v1alpha1/zz_generated.deepcopy.go b/apis/telemetry/v1alpha1/zz_generated.deepcopy.go index 54da6841b..4c0b0f7eb 100644 --- a/apis/telemetry/v1alpha1/zz_generated.deepcopy.go +++ b/apis/telemetry/v1alpha1/zz_generated.deepcopy.go @@ -30,7 +30,11 @@ import ( // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *CassandraAgentSpec) DeepCopyInto(out *CassandraAgentSpec) { *out = *in - out.Endpoint = in.Endpoint + if in.Endpoint != nil { + in, out := &in.Endpoint, &out.Endpoint + *out = new(Endpoint) + **out = **in + } if in.Filters != nil { in, out := &in.Filters, &out.Filters *out = make([]monitoringv1.RelabelConfig, len(*in)) diff --git a/pkg/telemetry/cassandra_agent/cassandra_agent_config.go b/pkg/telemetry/cassandra_agent/cassandra_agent_config.go index 736e27318..2dac275d5 100644 --- a/pkg/telemetry/cassandra_agent/cassandra_agent_config.go +++ b/pkg/telemetry/cassandra_agent/cassandra_agent_config.go @@ -5,7 +5,7 @@ import ( "path/filepath" "time" - goalesceutils "github.com/k8ssandra/k8ssandra-operator/pkg/goalesce" + "github.com/adutra/goalesce" cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" k8ssandraapi "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" @@ -24,7 +24,7 @@ import ( var ( agentConfigLocation = "/opt/management-api/configs/metrics-collector.yaml" defaultAgentConfig = telemetryapi.CassandraAgentSpec{ - Endpoint: telemetryapi.Endpoint{ + Endpoint: &telemetryapi.Endpoint{ Port: "9000", Address: "127.0.0.1", }, @@ -130,8 +130,8 @@ func (c Configurator) GetTelemetryAgentConfigMap() (*corev1.ConfigMap, error) { var yamlData []byte var err error if c.TelemetrySpec.Cassandra != nil { - goalesceutils.MergeCRs(defaultAgentConfig, *c.TelemetrySpec.Cassandra) - yamlData, err = yaml.Marshal(&c.TelemetrySpec.Cassandra) + mergedSpec := goalesce.MustDeepMerge(&defaultAgentConfig, c.TelemetrySpec.Cassandra) + yamlData, err = yaml.Marshal(&mergedSpec) if err != nil { return &corev1.ConfigMap{}, err } diff --git a/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go b/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go index 46328ee9c..33fdba3b9 100644 --- a/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go +++ b/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go @@ -8,6 +8,7 @@ import ( k8ssandraapi "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" telemetryapi "github.com/k8ssandra/k8ssandra-operator/apis/telemetry/v1alpha1" + "github.com/k8ssandra/k8ssandra-operator/pkg/cassandra" telemetry "github.com/k8ssandra/k8ssandra-operator/pkg/telemetry" testutils "github.com/k8ssandra/k8ssandra-operator/pkg/test" promapi "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" @@ -27,10 +28,95 @@ var ( DcNamespace: testCluster.Spec.Cassandra.Datacenters[0].Meta.Namespace, DcName: testCluster.Spec.Cassandra.Datacenters[0].Meta.Name, } - expectedYaml string = `endpoint: + allDefinedYaml string = `endpoint: address: 127.0.0.1 port: "10000" filters: +- action: drop + regex: (.*);(b.*) + separator: ; + sourceLabels: + - tag1 + - tag2 +` + endpointDefinedYaml string = `endpoint: + address: 192.168.1.10 + port: "50000" +filters: +- regex: org\.apache\.cassandra\.metrics\.Table.* + replacement: "true" + sourceLabels: + - __origname__ + targetLabel: should_drop +- regex: org\.apache\.cassandra\.metrics\.table.* + replacement: "true" + sourceLabels: + - __origname__ + targetLabel: should_drop +- regex: org\.apache\.cassandra\.metrics\.table\.live_ss_table_count + replacement: "false" + sourceLabels: + - __origname__ + targetLabel: should_drop +- regex: org\.apache\.cassandra\.metrics\.Table\.LiveSSTableCount + replacement: "false" + sourceLabels: + - __origname__ + targetLabel: should_drop +- regex: org\.apache\.cassandra\.metrics\.table\.live_disk_space_used + replacement: "false" + sourceLabels: + - __origname__ + targetLabel: should_drop +- regex: org\.apache\.cassandra\.metrics\.table\.LiveDiskSpaceUsed + replacement: "false" + sourceLabels: + - __origname__ + targetLabel: should_drop +- regex: org\.apache\.cassandra\.metrics\.Table\.Memtable + replacement: "false" + sourceLabels: + - __origname__ + targetLabel: should_drop +- regex: org\.apache\.cassandra\.metrics\.Table\.Compaction + replacement: "false" + sourceLabels: + - __origname__ + targetLabel: should_drop +- regex: org\.apache\.cassandra\.metrics\.table\.read + replacement: "false" + sourceLabels: + - __origname__ + targetLabel: should_drop +- regex: org\.apache\.cassandra\.metrics\.table\.write + replacement: "false" + sourceLabels: + - __origname__ + targetLabel: should_drop +- regex: org\.apache\.cassandra\.metrics\.table\.range + replacement: "false" + sourceLabels: + - __origname__ + targetLabel: should_drop +- regex: org\.apache\.cassandra\.metrics\.table\.coordinator + replacement: "false" + sourceLabels: + - __origname__ + targetLabel: should_drop +- regex: org\.apache\.cassandra\.metrics\.table\.dropped_mutations + replacement: "false" + sourceLabels: + - __origname__ + targetLabel: should_drop +- action: drop + regex: "true" + sourceLabels: + - should_drop +` + filtersDefinedYaml = `endpoint: + address: 127.0.0.1 + port: "9000" +filters: - action: drop regex: (.*);(b.*) separator: ; @@ -46,7 +132,7 @@ func getExpectedConfigMap() corev1.ConfigMap { Namespace: Cfg.DcNamespace, Name: Cfg.Kluster.Name + "-" + Cfg.DcName + "-metrics-agent-config", }, - Data: map[string]string{filepath.Base(agentConfigLocation): expectedYaml}, + Data: map[string]string{filepath.Base(agentConfigLocation): allDefinedYaml}, } return expectedCm } @@ -61,12 +147,15 @@ func getExampleTelemetrySpec() telemetryapi.TelemetrySpec { Action: "drop", }, } - tspec.Cassandra.Endpoint.Address = "127.0.0.1" - tspec.Cassandra.Endpoint.Port = "10000" + tspec.Cassandra.Endpoint = &telemetryapi.Endpoint{ + Address: "127.0.0.1", + Port: "10000", + } return *tspec } -func Test_GetTelemetryAgentConfigMap(t *testing.T) { +// Make sure when both endpoint and filters are defined they come through to yaml. +func Test_GetTelemetryAgentConfigMapAllDefined(t *testing.T) { expectedCm := getExpectedConfigMap() Cfg.RemoteClient = testutils.NewFakeClientWRestMapper() // Reset the Client Cfg.TelemetrySpec = getExampleTelemetrySpec() @@ -76,3 +165,77 @@ func Test_GetTelemetryAgentConfigMap(t *testing.T) { assert.Equal(t, expectedCm.Name, cm.Name) assert.Equal(t, expectedCm.Namespace, cm.Namespace) } + +// Make sure we get default filters when only endpoint is defined in spec. +func Test_GetTelemetryAgentConfigMapWithDefinedEndpoint(t *testing.T) { + expectedCm := getExpectedConfigMap() + expectedCm.Data[filepath.Base(agentConfigLocation)] = endpointDefinedYaml + Cfg.RemoteClient = testutils.NewFakeClientWRestMapper() // Reset the Client + Cfg.TelemetrySpec = getExampleTelemetrySpec() + Cfg.TelemetrySpec.Cassandra.Filters = nil + Cfg.TelemetrySpec.Cassandra.Endpoint = &telemetryapi.Endpoint{ + Address: "192.168.1.10", + Port: "50000", + } + cm, err := Cfg.GetTelemetryAgentConfigMap() + println(cm.Data) + assert.NoError(t, err) + assert.Equal(t, expectedCm.Data["metric-collector.yaml"], cm.Data["metric-collector.yaml"]) + assert.Equal(t, expectedCm.Name, cm.Name) + assert.Equal(t, expectedCm.Namespace, cm.Namespace) +} + +func Test_GetTelemetryAgentConfigMapWithDefinedFilters(t *testing.T) { + expectedCm := getExpectedConfigMap() + expectedCm.Data[filepath.Base(agentConfigLocation)] = filtersDefinedYaml + Cfg.RemoteClient = testutils.NewFakeClientWRestMapper() // Reset the Client + Cfg.TelemetrySpec = getExampleTelemetrySpec() + Cfg.TelemetrySpec.Cassandra.Filters = []promapi.RelabelConfig{ + { + SourceLabels: []string{"tag1", "tag2"}, + Separator: ";", + Regex: "(.*);(b.*)", + Action: "drop", + }, + } + Cfg.TelemetrySpec.Cassandra.Endpoint = nil + cm, err := Cfg.GetTelemetryAgentConfigMap() + println(cm.Data) + assert.NoError(t, err) + assert.Equal(t, expectedCm.Data["metric-collector.yaml"], cm.Data["metric-collector.yaml"]) + assert.Equal(t, expectedCm.Name, cm.Name) + assert.Equal(t, expectedCm.Namespace, cm.Namespace) +} + +func Test_AddStsVolumes(t *testing.T) { + dc := testutils.NewCassandraDatacenter("test-dc", "test-namespace") + Cfg.RemoteClient = testutils.NewFakeClientWRestMapper() // Reset the Client + Cfg.AddStsVolumes(&dc) + expectedVol := corev1.Volume{ + Name: "metrics-agent-config", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + Items: []corev1.KeyToPath{ + { + Key: filepath.Base(agentConfigLocation), + Path: filepath.Base(agentConfigLocation), + }, + }, + LocalObjectReference: corev1.LocalObjectReference{ + Name: Cfg.Kluster.Name + "-" + Cfg.DcName + "-metrics-agent-config", + }, + }, + }, + } + assert.Contains(t, dc.Spec.PodTemplateSpec.Spec.Volumes, expectedVol) + cassContainer, found := cassandra.FindContainer(dc.Spec.PodTemplateSpec, "cassandra") + if !found { + assert.Fail(t, "no cassandra container found") + } + expectedVm := corev1.VolumeMount{ + Name: "metrics-agent-config", + MountPath: "/opt/management-api/configs/metric-collector.yaml", + SubPath: "metric-collector.yaml", + } + assert.Contains(t, dc.Spec.PodTemplateSpec.Spec.Containers[cassContainer].VolumeMounts, expectedVm) +} From 5771b7e795c4e3573f9835026e3fcd9c1e2907dc Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Wed, 15 Feb 2023 17:57:01 +1100 Subject: [PATCH 6/9] Replace Cassandra agent `filters` field with `relabels`. --- apis/telemetry/v1alpha1/telemetry_types.go | 2 +- .../v1alpha1/zz_generated.deepcopy.go | 4 ++-- .../bases/k8ssandra.io_k8ssandraclusters.yaml | 12 +++++----- .../bases/reaper.k8ssandra.io_reapers.yaml | 2 +- .../stargate.k8ssandra.io_stargates.yaml | 4 ++-- .../cassandra_agent/cassandra_agent_config.go | 2 +- .../cassandra_agent_config_test.go | 22 +++++++++---------- 7 files changed, 24 insertions(+), 24 deletions(-) diff --git a/apis/telemetry/v1alpha1/telemetry_types.go b/apis/telemetry/v1alpha1/telemetry_types.go index b8db79ebf..ec85f84f8 100644 --- a/apis/telemetry/v1alpha1/telemetry_types.go +++ b/apis/telemetry/v1alpha1/telemetry_types.go @@ -131,7 +131,7 @@ type McacTelemetrySpec struct { type CassandraAgentSpec struct { Endpoint *Endpoint `json:"endpoint,omitempty"` - Filters []promapi.RelabelConfig `json:"filters,omitempty"` + Relabels []promapi.RelabelConfig `json:"relabels,omitempty"` } type Endpoint struct { diff --git a/apis/telemetry/v1alpha1/zz_generated.deepcopy.go b/apis/telemetry/v1alpha1/zz_generated.deepcopy.go index 4c0b0f7eb..2a440e3c0 100644 --- a/apis/telemetry/v1alpha1/zz_generated.deepcopy.go +++ b/apis/telemetry/v1alpha1/zz_generated.deepcopy.go @@ -35,8 +35,8 @@ func (in *CassandraAgentSpec) DeepCopyInto(out *CassandraAgentSpec) { *out = new(Endpoint) **out = **in } - if in.Filters != nil { - in, out := &in.Filters, &out.Filters + if in.Relabels != nil { + in, out := &in.Relabels, &out.Relabels *out = make([]monitoringv1.RelabelConfig, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) diff --git a/config/crd/bases/k8ssandra.io_k8ssandraclusters.yaml b/config/crd/bases/k8ssandra.io_k8ssandraclusters.yaml index eac3ea3a7..cda309b88 100644 --- a/config/crd/bases/k8ssandra.io_k8ssandraclusters.yaml +++ b/config/crd/bases/k8ssandra.io_k8ssandraclusters.yaml @@ -13740,7 +13740,7 @@ spec: port: type: string type: object - filters: + relabels: items: description: 'RelabelConfig allows dynamic rewriting of the label set, being @@ -14259,7 +14259,7 @@ spec: port: type: string type: object - filters: + relabels: items: description: 'RelabelConfig allows dynamic rewriting of the label set, being applied @@ -16841,7 +16841,7 @@ spec: port: type: string type: object - filters: + relabels: items: description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before @@ -25518,7 +25518,7 @@ spec: port: type: string type: object - filters: + relabels: items: description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before @@ -28195,7 +28195,7 @@ spec: port: type: string type: object - filters: + relabels: items: description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before @@ -29864,7 +29864,7 @@ spec: port: type: string type: object - filters: + relabels: items: description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before diff --git a/config/crd/bases/reaper.k8ssandra.io_reapers.yaml b/config/crd/bases/reaper.k8ssandra.io_reapers.yaml index ffb8c9e5b..fcbe86856 100644 --- a/config/crd/bases/reaper.k8ssandra.io_reapers.yaml +++ b/config/crd/bases/reaper.k8ssandra.io_reapers.yaml @@ -2087,7 +2087,7 @@ spec: port: type: string type: object - filters: + relabels: items: description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. diff --git a/config/crd/bases/stargate.k8ssandra.io_stargates.yaml b/config/crd/bases/stargate.k8ssandra.io_stargates.yaml index e630a422e..336d63e3f 100644 --- a/config/crd/bases/stargate.k8ssandra.io_stargates.yaml +++ b/config/crd/bases/stargate.k8ssandra.io_stargates.yaml @@ -2775,7 +2775,7 @@ spec: port: type: string type: object - filters: + relabels: items: description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before @@ -3242,7 +3242,7 @@ spec: port: type: string type: object - filters: + relabels: items: description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. diff --git a/pkg/telemetry/cassandra_agent/cassandra_agent_config.go b/pkg/telemetry/cassandra_agent/cassandra_agent_config.go index 2dac275d5..465f56ff8 100644 --- a/pkg/telemetry/cassandra_agent/cassandra_agent_config.go +++ b/pkg/telemetry/cassandra_agent/cassandra_agent_config.go @@ -28,7 +28,7 @@ var ( Port: "9000", Address: "127.0.0.1", }, - Filters: []promapi.RelabelConfig{ + Relabels: []promapi.RelabelConfig{ { SourceLabels: []string{"__origname__"}, Regex: "org\\.apache\\.cassandra\\.metrics\\.Table.*", diff --git a/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go b/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go index 33fdba3b9..3c0f44282 100644 --- a/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go +++ b/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go @@ -31,7 +31,7 @@ var ( allDefinedYaml string = `endpoint: address: 127.0.0.1 port: "10000" -filters: +relabels: - action: drop regex: (.*);(b.*) separator: ; @@ -42,7 +42,7 @@ filters: endpointDefinedYaml string = `endpoint: address: 192.168.1.10 port: "50000" -filters: +relabels: - regex: org\.apache\.cassandra\.metrics\.Table.* replacement: "true" sourceLabels: @@ -113,10 +113,10 @@ filters: sourceLabels: - should_drop ` - filtersDefinedYaml = `endpoint: + relabelsDefinedYaml = `endpoint: address: 127.0.0.1 port: "9000" -filters: +relabels: - action: drop regex: (.*);(b.*) separator: ; @@ -139,7 +139,7 @@ func getExpectedConfigMap() corev1.ConfigMap { func getExampleTelemetrySpec() telemetryapi.TelemetrySpec { tspec := &Cfg.TelemetrySpec - tspec.Cassandra.Filters = []promapi.RelabelConfig{ + tspec.Cassandra.Relabels = []promapi.RelabelConfig{ { SourceLabels: []string{"tag1", "tag2"}, Separator: ";", @@ -154,7 +154,7 @@ func getExampleTelemetrySpec() telemetryapi.TelemetrySpec { return *tspec } -// Make sure when both endpoint and filters are defined they come through to yaml. +// Make sure when both endpoint and relabels are defined they come through to yaml. func Test_GetTelemetryAgentConfigMapAllDefined(t *testing.T) { expectedCm := getExpectedConfigMap() Cfg.RemoteClient = testutils.NewFakeClientWRestMapper() // Reset the Client @@ -166,13 +166,13 @@ func Test_GetTelemetryAgentConfigMapAllDefined(t *testing.T) { assert.Equal(t, expectedCm.Namespace, cm.Namespace) } -// Make sure we get default filters when only endpoint is defined in spec. +// Make sure we get default relabels when only endpoint is defined in spec. func Test_GetTelemetryAgentConfigMapWithDefinedEndpoint(t *testing.T) { expectedCm := getExpectedConfigMap() expectedCm.Data[filepath.Base(agentConfigLocation)] = endpointDefinedYaml Cfg.RemoteClient = testutils.NewFakeClientWRestMapper() // Reset the Client Cfg.TelemetrySpec = getExampleTelemetrySpec() - Cfg.TelemetrySpec.Cassandra.Filters = nil + Cfg.TelemetrySpec.Cassandra.Relabels = nil Cfg.TelemetrySpec.Cassandra.Endpoint = &telemetryapi.Endpoint{ Address: "192.168.1.10", Port: "50000", @@ -185,12 +185,12 @@ func Test_GetTelemetryAgentConfigMapWithDefinedEndpoint(t *testing.T) { assert.Equal(t, expectedCm.Namespace, cm.Namespace) } -func Test_GetTelemetryAgentConfigMapWithDefinedFilters(t *testing.T) { +func Test_GetTelemetryAgentConfigMapWithDefinedRelabels(t *testing.T) { expectedCm := getExpectedConfigMap() - expectedCm.Data[filepath.Base(agentConfigLocation)] = filtersDefinedYaml + expectedCm.Data[filepath.Base(agentConfigLocation)] = relabelsDefinedYaml Cfg.RemoteClient = testutils.NewFakeClientWRestMapper() // Reset the Client Cfg.TelemetrySpec = getExampleTelemetrySpec() - Cfg.TelemetrySpec.Cassandra.Filters = []promapi.RelabelConfig{ + Cfg.TelemetrySpec.Cassandra.Relabels = []promapi.RelabelConfig{ { SourceLabels: []string{"tag1", "tag2"}, Separator: ";", From 982158fb86b0f558487ae6a11912a9c64afb5b27 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Fri, 17 Feb 2023 09:34:57 +1100 Subject: [PATCH 7/9] Update metric-collector.yaml to metrics-collector.yaml --- .../en/components/metrics-collector/_index.md | 2 +- docs/content/en/tasks/monitor/_index.md | 201 +++++++++++++++++- .../cassandra_agent_config_test.go | 8 +- 3 files changed, 205 insertions(+), 6 deletions(-) diff --git a/docs/content/en/components/metrics-collector/_index.md b/docs/content/en/components/metrics-collector/_index.md index 7ee5ff522..070a1234f 100644 --- a/docs/content/en/components/metrics-collector/_index.md +++ b/docs/content/en/components/metrics-collector/_index.md @@ -82,7 +82,7 @@ Ingress or port forwarding can be used to expose access to the Prometheus and Gr 2. How can I filter out metrics I don't care about? - Please read the [metric-collector.yaml](https://github.com/datastax/metric-collector-for-apache-cassandra/blob/master/config/metric-collector.yaml) section in the MCAC GitHub repo on how to add filtering rules. + Please read the [metrics-collector.yaml](https://github.com/datastax/metric-collector-for-apache-cassandra/blob/master/config/metrics-collector.yaml) section in the MCAC GitHub repo on how to add filtering rules. 3. What is the datalog? And what is it for? diff --git a/docs/content/en/tasks/monitor/_index.md b/docs/content/en/tasks/monitor/_index.md index 67c7fdead..5f6c74ab6 100644 --- a/docs/content/en/tasks/monitor/_index.md +++ b/docs/content/en/tasks/monitor/_index.md @@ -3,4 +3,203 @@ title: "Monitor K8ssandra" linkTitle: "Monitor" weight: 6 description: "Access tools to monitor your Apache Cassandra® cluster running in Kubernetes." ---- \ No newline at end of file +--- + + +# Monitoring using the kube-prometheus-stack + +While K8ssandra v1 managed the deployment of the kube-prometheus stack, that ability was removed in k8ssandra-operator. +The following guide will show you how to install Prometheus and Grafana on your Kubernetes cluster using the kube-prometheus-stack set of Helm charts. + +## Prerequisites + +The k8ssandra-operator should be installed in the `k8ssanda-operator` namespace. +See the [installation documentation]({{< relref "/install" >}}) for more information. + +## Installing and configuring the kube-prometheus-stack + +`k8ssandra-operator` has integrations with Prometheus which allow for the simple rollout of Prometheus ServiceMonitors for Stargate, Cassandra and Reaper. +ServiceMonitors are custom resources of [prometheus-operator](https://github.com/prometheus-operator/prometheus-operator) which describe the set of targets to be scraped by Prometheus. +The prometheus-operator is a core component of the kube-prometheus-stack. + +### Install the kube-prometheus-stack + +We will install the kube-prometheus-stack in the same `k8ssandra-operator` namespace in order to simplify this guide. +Create the following `kube-prom-stack-values.yaml` file: + +```yaml +prometheus: + prometheusSpec: + serviceMonitorSelectorNilUsesHelmValues: false + serviceMonitorSelector: {} + serviceMonitorNamespaceSelector: {} +grafana: + enabled: true + adminUser: admin + adminPassword: secret + defaultDashboardsEnabled: false + # -- Additional plugins to be installed during Grafana startup, + # `grafana-polystat-panel` is used by the default Cassandra dashboards. + plugins: + - grafana-polystat-panel + grafana.ini: {} + image: + repository: grafana/grafana + tag: 7.5.11 + sha: "" + pullPolicy: IfNotPresent +``` + +*Download this file [here](kube-prom-stack-values.yaml).* + +Add the prometheus-community Helm repository: + +```bash +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +Then, install the kube-prometheus-stack using the following command, and referencing the `kube-prom-stack-values.yaml` file: + +```bash +helm install prometheus-grafana prometheus-community/kube-prometheus-stack -n k8ssandra-operator -f kube-prom-stack-values.yaml +``` + +This will install all the monitoring components in the `k8ssandra-operator` namespace. + +### Creating a K8ssandraCluster with telemetry enabled + +The following guide assumes k8ssandra-operator is already installed, and a K8ssandraCluster object was created with the following manifest, in the `k8ssandra-operator` namespace: + +```yaml +apiVersion: k8ssandra.io/v1alpha1 +kind: K8ssandraCluster +metadata: + name: test + namespace: k8ssandra-operator +spec: + cassandra: + serverVersion: "4.0.3" + serverImage: k8ssandra/cass-management-api:4.0.3 + telemetry: + prometheus: + enabled: true + storageConfig: + cassandraDataVolumeClaimSpec: + storageClassName: standard + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + config: + jvmOptions: + heapSize: 512M + datacenters: + - metadata: + name: dc1 + size: 3 + mgmtAPIHeap: 64Mi + stargate: + size: 1 + telemetry: + prometheus: + enabled: true + reaper: + keyspace: reaper_db + telemetry: + prometheus: + enabled: true +``` +*Download this manifest [here](k8ssandra.yaml).* + +Setting `telemetry.prometheus.enabled` to `true` on the `.spec.cassandra`, `.spec.stargate` and `.spec.reaper` sections of the K8ssandraCluster CR will automatically create the ServiceMonitors. +*Note: Reaper's telemetry block was added in K8ssandra v1.2.0 and Reaper v3.2.0.* + +You can selectively enable service monitor creation for each component without any requirement to enable them all. +Wait for the pods to come up in the `k8ssandra-operator` namespace and fully start. + +Running `kubectl get servicemonitor -n k8ssandra-operator` should return three ServiceMonitor resources once all the pods are up and running. +You should get the following output: + +```bash +% kubectl get servicemonitors -n k8ssandra-operator +NAME AGE +prometheus-grafana 7m41s +prometheus-grafana-kube-pr-alertmanager 7m41s +prometheus-grafana-kube-pr-apiserver 7m41s +prometheus-grafana-kube-pr-coredns 7m41s +prometheus-grafana-kube-pr-kube-controller-manager 7m41s +prometheus-grafana-kube-pr-kube-etcd 7m41s +prometheus-grafana-kube-pr-kube-proxy 7m41s +prometheus-grafana-kube-pr-kube-scheduler 7m41s +prometheus-grafana-kube-pr-kubelet 7m41s +prometheus-grafana-kube-pr-operator 7m41s +prometheus-grafana-kube-pr-prometheus 7m41s +prometheus-grafana-kube-state-metrics 7m41s +prometheus-grafana-prometheus-node-exporter 7m41s +test-dc1-cass-servicemonitor 5m47s +test-dc1-reaper-reaper-servicemonitor 5m47s +test-dc1-stargate-stargate-servicemonitor 5m47s +``` + +### Install the Grafana dashboards + +Grafana will pick up dashboards passed as configmaps that have the label `grafana_dashboard: "1"`. +Create the overview, condensed and stargate dashboards (download the manifest [here](grafana-dashboards.yaml)) configmaps: + +```bash +kubectl apply -f grafana-dashboards.yaml -n k8ssandra-operator +``` + +You can port-forward the Grafana service to access the dashboard at [http://localhost:3000](http://localhost:3000): `kubectl port-forward svc/grafana-service 3000:3000` +Log in with the credentials defined in the values file: `admin` / `secret` + +You should then see the following list of available dashboards: +![Dashboard list](grafana-dashboard-list.png) + +Clicking on the Overview Dashboard should get you to a screen similar to this: +![Overview Dashboard](grafana-overview-dashboard.png) + +### Filtering metrics + +Cassandra provides a lot of metrics which can create some overload, especially when there are many tables in a cluster. [Filtering rules for MCAC](https://github.com/datastax/metric-collector-for-apache-cassandra/blob/master/config/metrics-collector.yaml#L9-L72) can be defined in the telemetry spec: + +``` +apiVersion: k8ssandra.io/v1alpha1 +kind: K8ssandraCluster +metadata: + name: test +spec: + cassandra: + telemetry: + prometheus: + enabled: true + mcacMetricFilters: + - "deny:org.apache.cassandra.metrics.Table" + - "allow:org.apache.cassandra.metrics.Table.LiveSSTableCount" +``` + +When no filter is explicitly defined in the spec, default K8ssandra v1.x filters will be applied: + +``` + - "deny:org.apache.cassandra.metrics.Table" + - "deny:org.apache.cassandra.metrics.table" + - "allow:org.apache.cassandra.metrics.table.live_ss_table_count" + - "allow:org.apache.cassandra.metrics.Table.LiveSSTableCount" + - "allow:org.apache.cassandra.metrics.table.live_disk_space_used" + - "allow:org.apache.cassandra.metrics.table.LiveDiskSpaceUsed" + - "allow:org.apache.cassandra.metrics.Table.Pending" + - "allow:org.apache.cassandra.metrics.Table.Memtable" + - "allow:org.apache.cassandra.metrics.Table.Compaction" + - "allow:org.apache.cassandra.metrics.table.read" + - "allow:org.apache.cassandra.metrics.table.write" + - "allow:org.apache.cassandra.metrics.table.range" + - "allow:org.apache.cassandra.metrics.table.coordinator" + - "allow:org.apache.cassandra.metrics.table.dropped_mutations" +``` + +## Next steps + +* Explore other K8ssandra Operator [tasks]({{< relref "/tasks" >}}). +* See the [Reference]({{< relref "/reference" >}}) topics for information about K8ssandra Operator Custom Resource Definitions (CRDs) and the single K8ssandra Operator Helm chart. diff --git a/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go b/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go index 3c0f44282..63341ee9f 100644 --- a/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go +++ b/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go @@ -180,7 +180,7 @@ func Test_GetTelemetryAgentConfigMapWithDefinedEndpoint(t *testing.T) { cm, err := Cfg.GetTelemetryAgentConfigMap() println(cm.Data) assert.NoError(t, err) - assert.Equal(t, expectedCm.Data["metric-collector.yaml"], cm.Data["metric-collector.yaml"]) + assert.Equal(t, expectedCm.Data["metrics-collector.yaml"], cm.Data["metrics-collector.yaml"]) assert.Equal(t, expectedCm.Name, cm.Name) assert.Equal(t, expectedCm.Namespace, cm.Namespace) } @@ -202,7 +202,7 @@ func Test_GetTelemetryAgentConfigMapWithDefinedRelabels(t *testing.T) { cm, err := Cfg.GetTelemetryAgentConfigMap() println(cm.Data) assert.NoError(t, err) - assert.Equal(t, expectedCm.Data["metric-collector.yaml"], cm.Data["metric-collector.yaml"]) + assert.Equal(t, expectedCm.Data["metrics-collector.yaml"], cm.Data["metrics-collector.yaml"]) assert.Equal(t, expectedCm.Name, cm.Name) assert.Equal(t, expectedCm.Namespace, cm.Namespace) } @@ -234,8 +234,8 @@ func Test_AddStsVolumes(t *testing.T) { } expectedVm := corev1.VolumeMount{ Name: "metrics-agent-config", - MountPath: "/opt/management-api/configs/metric-collector.yaml", - SubPath: "metric-collector.yaml", + MountPath: "/opt/management-api/configs/metrics-collector.yaml", + SubPath: "metrics-collector.yaml", } assert.Contains(t, dc.Spec.PodTemplateSpec.Spec.Containers[cassContainer].VolumeMounts, expectedVm) } From adaa30693d81233a162a3444fe2cee58bad2bae6 Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Fri, 17 Feb 2023 10:45:16 +1100 Subject: [PATCH 8/9] Make changes to default metrics so that new metrics name format is correctly parsed by regex. --- .../cassandra_agent/cassandra_agent_config.go | 54 +++++++++---------- .../cassandra_agent_config_test.go | 53 +++++++++--------- 2 files changed, 48 insertions(+), 59 deletions(-) diff --git a/pkg/telemetry/cassandra_agent/cassandra_agent_config.go b/pkg/telemetry/cassandra_agent/cassandra_agent_config.go index 465f56ff8..6ca1236f5 100644 --- a/pkg/telemetry/cassandra_agent/cassandra_agent_config.go +++ b/pkg/telemetry/cassandra_agent/cassandra_agent_config.go @@ -30,80 +30,74 @@ var ( }, Relabels: []promapi.RelabelConfig{ { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.Table.*", + SourceLabels: []string{"table"}, + Regex: ".+", TargetLabel: "should_drop", Replacement: "true", }, { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.table.*", - TargetLabel: "should_drop", - Replacement: "true", - }, - { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.table\\.live_ss_table_count", + SourceLabels: []string{"__name__"}, + Regex: "org_apache_cassandra_metrics_table_live_ss_table_count", TargetLabel: "should_drop", Replacement: "false", }, { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.Table\\.LiveSSTableCount", + SourceLabels: []string{"__name__"}, + Regex: "org_apache_cassandra_metrics_table_live_disk_space_used", TargetLabel: "should_drop", Replacement: "false", }, { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.table\\.live_disk_space_used", + SourceLabels: []string{"__name__"}, + Regex: "org_apache_cassandra_metrics_table_memtable.*", TargetLabel: "should_drop", Replacement: "false", }, { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.table\\.LiveDiskSpaceUsed", + SourceLabels: []string{"__name__"}, + Regex: "org_apache_cassandra_metrics_table_all_memtables.*", TargetLabel: "should_drop", Replacement: "false", }, { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.Table\\.Memtable", + SourceLabels: []string{"__name__"}, + Regex: "org_apache_cassandra_metrics_table_compaction_bytes_written", TargetLabel: "should_drop", Replacement: "false", }, { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.Table\\.Compaction", + SourceLabels: []string{"__name__"}, + Regex: "org_apache_cassandra_metrics_table_pending_compactions", TargetLabel: "should_drop", Replacement: "false", }, { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.table\\.read", + SourceLabels: []string{"__name__"}, + Regex: "org_apache_cassandra_metrics_table_read_.*", TargetLabel: "should_drop", Replacement: "false", }, { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.table\\.write", + SourceLabels: []string{"__name__"}, + Regex: "org_apache_cassandra_metrics_table_write_.*", TargetLabel: "should_drop", Replacement: "false", }, { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.table\\.range", + SourceLabels: []string{"__name__"}, + Regex: "org_apache_cassandra_metrics_table_range.*", TargetLabel: "should_drop", Replacement: "false", }, { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.table\\.coordinator", + SourceLabels: []string{"__name__"}, + Regex: "org_apache_cassandra_metrics_table_coordinator_.*", TargetLabel: "should_drop", Replacement: "false", }, { - SourceLabels: []string{"__origname__"}, - Regex: "org\\.apache\\.cassandra\\.metrics\\.table\\.dropped_mutations", + SourceLabels: []string{"__name__"}, + Regex: "org_apache_cassandra_metrics_table_dropped_mutations", TargetLabel: "should_drop", Replacement: "false", }, diff --git a/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go b/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go index 63341ee9f..aaf16615d 100644 --- a/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go +++ b/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go @@ -43,70 +43,65 @@ relabels: address: 192.168.1.10 port: "50000" relabels: -- regex: org\.apache\.cassandra\.metrics\.Table.* +- regex: .+ replacement: "true" sourceLabels: - - __origname__ + - table targetLabel: should_drop -- regex: org\.apache\.cassandra\.metrics\.table.* - replacement: "true" - sourceLabels: - - __origname__ - targetLabel: should_drop -- regex: org\.apache\.cassandra\.metrics\.table\.live_ss_table_count +- regex: org_apache_cassandra_metrics_table_live_ss_table_count replacement: "false" sourceLabels: - - __origname__ + - __name__ targetLabel: should_drop -- regex: org\.apache\.cassandra\.metrics\.Table\.LiveSSTableCount +- regex: org_apache_cassandra_metrics_table_live_disk_space_used replacement: "false" sourceLabels: - - __origname__ + - __name__ targetLabel: should_drop -- regex: org\.apache\.cassandra\.metrics\.table\.live_disk_space_used +- regex: org_apache_cassandra_metrics_table_memtable.* replacement: "false" sourceLabels: - - __origname__ + - __name__ targetLabel: should_drop -- regex: org\.apache\.cassandra\.metrics\.table\.LiveDiskSpaceUsed +- regex: org_apache_cassandra_metrics_table_all_memtables.* replacement: "false" sourceLabels: - - __origname__ + - __name__ targetLabel: should_drop -- regex: org\.apache\.cassandra\.metrics\.Table\.Memtable +- regex: org_apache_cassandra_metrics_table_compaction_bytes_written replacement: "false" sourceLabels: - - __origname__ + - __name__ targetLabel: should_drop -- regex: org\.apache\.cassandra\.metrics\.Table\.Compaction +- regex: org_apache_cassandra_metrics_table_pending_compactions replacement: "false" sourceLabels: - - __origname__ + - __name__ targetLabel: should_drop -- regex: org\.apache\.cassandra\.metrics\.table\.read +- regex: org_apache_cassandra_metrics_table_read_.* replacement: "false" sourceLabels: - - __origname__ + - __name__ targetLabel: should_drop -- regex: org\.apache\.cassandra\.metrics\.table\.write +- regex: org_apache_cassandra_metrics_table_write_.* replacement: "false" sourceLabels: - - __origname__ + - __name__ targetLabel: should_drop -- regex: org\.apache\.cassandra\.metrics\.table\.range +- regex: org_apache_cassandra_metrics_table_range.* replacement: "false" sourceLabels: - - __origname__ + - __name__ targetLabel: should_drop -- regex: org\.apache\.cassandra\.metrics\.table\.coordinator +- regex: org_apache_cassandra_metrics_table_coordinator_.* replacement: "false" sourceLabels: - - __origname__ + - __name__ targetLabel: should_drop -- regex: org\.apache\.cassandra\.metrics\.table\.dropped_mutations +- regex: org_apache_cassandra_metrics_table_dropped_mutations replacement: "false" sourceLabels: - - __origname__ + - __name__ targetLabel: should_drop - action: drop regex: "true" From 00b97dc611dd731f76eabf9a0c57a32a26542d7c Mon Sep 17 00:00:00 2001 From: Miles-Garnsey Date: Mon, 20 Feb 2023 12:15:54 +1100 Subject: [PATCH 9/9] Return error from ReconcileTelemetryAgentConfig when it errors. --- docs/content/en/tasks/monitor/_index.md | 200 ------------------ .../cassandra_agent/cassandra_agent_config.go | 2 +- .../cassandra_agent_config_test.go | 34 --- 3 files changed, 1 insertion(+), 235 deletions(-) diff --git a/docs/content/en/tasks/monitor/_index.md b/docs/content/en/tasks/monitor/_index.md index 5f6c74ab6..1bde08f78 100644 --- a/docs/content/en/tasks/monitor/_index.md +++ b/docs/content/en/tasks/monitor/_index.md @@ -3,203 +3,3 @@ title: "Monitor K8ssandra" linkTitle: "Monitor" weight: 6 description: "Access tools to monitor your Apache Cassandra® cluster running in Kubernetes." ---- - - -# Monitoring using the kube-prometheus-stack - -While K8ssandra v1 managed the deployment of the kube-prometheus stack, that ability was removed in k8ssandra-operator. -The following guide will show you how to install Prometheus and Grafana on your Kubernetes cluster using the kube-prometheus-stack set of Helm charts. - -## Prerequisites - -The k8ssandra-operator should be installed in the `k8ssanda-operator` namespace. -See the [installation documentation]({{< relref "/install" >}}) for more information. - -## Installing and configuring the kube-prometheus-stack - -`k8ssandra-operator` has integrations with Prometheus which allow for the simple rollout of Prometheus ServiceMonitors for Stargate, Cassandra and Reaper. -ServiceMonitors are custom resources of [prometheus-operator](https://github.com/prometheus-operator/prometheus-operator) which describe the set of targets to be scraped by Prometheus. -The prometheus-operator is a core component of the kube-prometheus-stack. - -### Install the kube-prometheus-stack - -We will install the kube-prometheus-stack in the same `k8ssandra-operator` namespace in order to simplify this guide. -Create the following `kube-prom-stack-values.yaml` file: - -```yaml -prometheus: - prometheusSpec: - serviceMonitorSelectorNilUsesHelmValues: false - serviceMonitorSelector: {} - serviceMonitorNamespaceSelector: {} -grafana: - enabled: true - adminUser: admin - adminPassword: secret - defaultDashboardsEnabled: false - # -- Additional plugins to be installed during Grafana startup, - # `grafana-polystat-panel` is used by the default Cassandra dashboards. - plugins: - - grafana-polystat-panel - grafana.ini: {} - image: - repository: grafana/grafana - tag: 7.5.11 - sha: "" - pullPolicy: IfNotPresent -``` - -*Download this file [here](kube-prom-stack-values.yaml).* - -Add the prometheus-community Helm repository: - -```bash -helm repo add prometheus-community https://prometheus-community.github.io/helm-charts -helm repo update -``` - -Then, install the kube-prometheus-stack using the following command, and referencing the `kube-prom-stack-values.yaml` file: - -```bash -helm install prometheus-grafana prometheus-community/kube-prometheus-stack -n k8ssandra-operator -f kube-prom-stack-values.yaml -``` - -This will install all the monitoring components in the `k8ssandra-operator` namespace. - -### Creating a K8ssandraCluster with telemetry enabled - -The following guide assumes k8ssandra-operator is already installed, and a K8ssandraCluster object was created with the following manifest, in the `k8ssandra-operator` namespace: - -```yaml -apiVersion: k8ssandra.io/v1alpha1 -kind: K8ssandraCluster -metadata: - name: test - namespace: k8ssandra-operator -spec: - cassandra: - serverVersion: "4.0.3" - serverImage: k8ssandra/cass-management-api:4.0.3 - telemetry: - prometheus: - enabled: true - storageConfig: - cassandraDataVolumeClaimSpec: - storageClassName: standard - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 1Gi - config: - jvmOptions: - heapSize: 512M - datacenters: - - metadata: - name: dc1 - size: 3 - mgmtAPIHeap: 64Mi - stargate: - size: 1 - telemetry: - prometheus: - enabled: true - reaper: - keyspace: reaper_db - telemetry: - prometheus: - enabled: true -``` -*Download this manifest [here](k8ssandra.yaml).* - -Setting `telemetry.prometheus.enabled` to `true` on the `.spec.cassandra`, `.spec.stargate` and `.spec.reaper` sections of the K8ssandraCluster CR will automatically create the ServiceMonitors. -*Note: Reaper's telemetry block was added in K8ssandra v1.2.0 and Reaper v3.2.0.* - -You can selectively enable service monitor creation for each component without any requirement to enable them all. -Wait for the pods to come up in the `k8ssandra-operator` namespace and fully start. - -Running `kubectl get servicemonitor -n k8ssandra-operator` should return three ServiceMonitor resources once all the pods are up and running. -You should get the following output: - -```bash -% kubectl get servicemonitors -n k8ssandra-operator -NAME AGE -prometheus-grafana 7m41s -prometheus-grafana-kube-pr-alertmanager 7m41s -prometheus-grafana-kube-pr-apiserver 7m41s -prometheus-grafana-kube-pr-coredns 7m41s -prometheus-grafana-kube-pr-kube-controller-manager 7m41s -prometheus-grafana-kube-pr-kube-etcd 7m41s -prometheus-grafana-kube-pr-kube-proxy 7m41s -prometheus-grafana-kube-pr-kube-scheduler 7m41s -prometheus-grafana-kube-pr-kubelet 7m41s -prometheus-grafana-kube-pr-operator 7m41s -prometheus-grafana-kube-pr-prometheus 7m41s -prometheus-grafana-kube-state-metrics 7m41s -prometheus-grafana-prometheus-node-exporter 7m41s -test-dc1-cass-servicemonitor 5m47s -test-dc1-reaper-reaper-servicemonitor 5m47s -test-dc1-stargate-stargate-servicemonitor 5m47s -``` - -### Install the Grafana dashboards - -Grafana will pick up dashboards passed as configmaps that have the label `grafana_dashboard: "1"`. -Create the overview, condensed and stargate dashboards (download the manifest [here](grafana-dashboards.yaml)) configmaps: - -```bash -kubectl apply -f grafana-dashboards.yaml -n k8ssandra-operator -``` - -You can port-forward the Grafana service to access the dashboard at [http://localhost:3000](http://localhost:3000): `kubectl port-forward svc/grafana-service 3000:3000` -Log in with the credentials defined in the values file: `admin` / `secret` - -You should then see the following list of available dashboards: -![Dashboard list](grafana-dashboard-list.png) - -Clicking on the Overview Dashboard should get you to a screen similar to this: -![Overview Dashboard](grafana-overview-dashboard.png) - -### Filtering metrics - -Cassandra provides a lot of metrics which can create some overload, especially when there are many tables in a cluster. [Filtering rules for MCAC](https://github.com/datastax/metric-collector-for-apache-cassandra/blob/master/config/metrics-collector.yaml#L9-L72) can be defined in the telemetry spec: - -``` -apiVersion: k8ssandra.io/v1alpha1 -kind: K8ssandraCluster -metadata: - name: test -spec: - cassandra: - telemetry: - prometheus: - enabled: true - mcacMetricFilters: - - "deny:org.apache.cassandra.metrics.Table" - - "allow:org.apache.cassandra.metrics.Table.LiveSSTableCount" -``` - -When no filter is explicitly defined in the spec, default K8ssandra v1.x filters will be applied: - -``` - - "deny:org.apache.cassandra.metrics.Table" - - "deny:org.apache.cassandra.metrics.table" - - "allow:org.apache.cassandra.metrics.table.live_ss_table_count" - - "allow:org.apache.cassandra.metrics.Table.LiveSSTableCount" - - "allow:org.apache.cassandra.metrics.table.live_disk_space_used" - - "allow:org.apache.cassandra.metrics.table.LiveDiskSpaceUsed" - - "allow:org.apache.cassandra.metrics.Table.Pending" - - "allow:org.apache.cassandra.metrics.Table.Memtable" - - "allow:org.apache.cassandra.metrics.Table.Compaction" - - "allow:org.apache.cassandra.metrics.table.read" - - "allow:org.apache.cassandra.metrics.table.write" - - "allow:org.apache.cassandra.metrics.table.range" - - "allow:org.apache.cassandra.metrics.table.coordinator" - - "allow:org.apache.cassandra.metrics.table.dropped_mutations" -``` - -## Next steps - -* Explore other K8ssandra Operator [tasks]({{< relref "/tasks" >}}). -* See the [Reference]({{< relref "/reference" >}}) topics for information about K8ssandra Operator Custom Resource Definitions (CRDs) and the single K8ssandra Operator Helm chart. diff --git a/pkg/telemetry/cassandra_agent/cassandra_agent_config.go b/pkg/telemetry/cassandra_agent/cassandra_agent_config.go index 6ca1236f5..a0311c750 100644 --- a/pkg/telemetry/cassandra_agent/cassandra_agent_config.go +++ b/pkg/telemetry/cassandra_agent/cassandra_agent_config.go @@ -162,7 +162,7 @@ func (c Configurator) ReconcileTelemetryAgentConfig(dc *cassdcapi.CassandraDatac recRes := reconciliation.ReconcileObject(c.Ctx, c.RemoteClient, c.RequeueDelay, *desiredCm) switch { case recRes.IsError(): - fallthrough + return recRes case recRes.IsRequeue(): return recRes } diff --git a/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go b/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go index aaf16615d..a0149f6df 100644 --- a/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go +++ b/pkg/telemetry/cassandra_agent/cassandra_agent_config_test.go @@ -8,7 +8,6 @@ import ( k8ssandraapi "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" telemetryapi "github.com/k8ssandra/k8ssandra-operator/apis/telemetry/v1alpha1" - "github.com/k8ssandra/k8ssandra-operator/pkg/cassandra" telemetry "github.com/k8ssandra/k8ssandra-operator/pkg/telemetry" testutils "github.com/k8ssandra/k8ssandra-operator/pkg/test" promapi "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" @@ -201,36 +200,3 @@ func Test_GetTelemetryAgentConfigMapWithDefinedRelabels(t *testing.T) { assert.Equal(t, expectedCm.Name, cm.Name) assert.Equal(t, expectedCm.Namespace, cm.Namespace) } - -func Test_AddStsVolumes(t *testing.T) { - dc := testutils.NewCassandraDatacenter("test-dc", "test-namespace") - Cfg.RemoteClient = testutils.NewFakeClientWRestMapper() // Reset the Client - Cfg.AddStsVolumes(&dc) - expectedVol := corev1.Volume{ - Name: "metrics-agent-config", - VolumeSource: corev1.VolumeSource{ - ConfigMap: &corev1.ConfigMapVolumeSource{ - Items: []corev1.KeyToPath{ - { - Key: filepath.Base(agentConfigLocation), - Path: filepath.Base(agentConfigLocation), - }, - }, - LocalObjectReference: corev1.LocalObjectReference{ - Name: Cfg.Kluster.Name + "-" + Cfg.DcName + "-metrics-agent-config", - }, - }, - }, - } - assert.Contains(t, dc.Spec.PodTemplateSpec.Spec.Volumes, expectedVol) - cassContainer, found := cassandra.FindContainer(dc.Spec.PodTemplateSpec, "cassandra") - if !found { - assert.Fail(t, "no cassandra container found") - } - expectedVm := corev1.VolumeMount{ - Name: "metrics-agent-config", - MountPath: "/opt/management-api/configs/metrics-collector.yaml", - SubPath: "metrics-collector.yaml", - } - assert.Contains(t, dc.Spec.PodTemplateSpec.Spec.Containers[cassContainer].VolumeMounts, expectedVm) -}