diff --git a/dashboards/grafana/dashboards-jsonnet/read-path.jsonnet b/dashboards/grafana/dashboards-jsonnet/read-path.jsonnet
index 94e77cf..e2aae23 100644
--- a/dashboards/grafana/dashboards-jsonnet/read-path.jsonnet
+++ b/dashboards/grafana/dashboards-jsonnet/read-path.jsonnet
@@ -12,6 +12,12 @@ local textPanel = grafana.text;
local prefix = std.extVar('prefix');
+local fillLatencySeriesOverrides = {
+ 'alias': 'p999',
+ 'fillBelowTo': 'p98',
+ 'lines': false
+};
+
local fillMinMaxSeriesOverrides = {
'alias': 'max',
'fillBelowTo': 'min',
diff --git a/dashboards/grafana/dashboards-jsonnet/write-path.jsonnet b/dashboards/grafana/dashboards-jsonnet/write-path.jsonnet
new file mode 100644
index 0000000..942f82f
--- /dev/null
+++ b/dashboards/grafana/dashboards-jsonnet/write-path.jsonnet
@@ -0,0 +1,857 @@
+local grafana = (import 'grafonnet/grafana.libsonnet');
+local dashboard = grafana.dashboard;
+local prometheus = grafana.prometheus;
+local template = grafana.template;
+local row = grafana.row;
+
+local graphPanel = grafana.graphPanel;
+local textPanel = grafana.text;
+
+local prefix = std.extVar('prefix');
+
+local fillMinMaxSeriesOverrides = {
+ 'alias': 'max',
+ 'fillBelowTo': 'min',
+ 'lines': false
+};
+
+local removeMinlineSeriesOverrides = {
+ 'alias': 'min',
+ 'lines': false
+};
+
+// used in the single stat panels where higher is better - cache hit rates for example
+local reversedColors =[
+ '#d44a3a',
+ 'rgba(237, 129, 40, 0.89)',
+ '#299c46',
+];
+
+dashboard.new(
+ 'Cassandra Write Path',
+ schemaVersion=14,
+ refresh='30s',
+ time_from='now-30m',
+ editable=true,
+ tags=['Cassandra', 'Write', 'Write-Path', 'Mutation', 'Insert', 'Update', 'Upsert'],
+ style='dark'
+)
+.addTemplate(
+ grafana.template.datasource(
+ 'PROMETHEUS_DS',
+ 'prometheus',
+ 'Prometheus',
+ hide='all',
+ )
+)
+.addTemplate(
+ template.new(
+ 'cluster',
+ '$PROMETHEUS_DS',
+ 'label_values(collectd_collectd_queue_length{}, cluster)',
+ label='Cluster',
+ refresh='time',
+ )
+)
+.addTemplate(
+ template.new(
+ 'dc',
+ '$PROMETHEUS_DS',
+ 'label_values(collectd_collectd_queue_length{cluster="$cluster"}, dc)',
+ label='DataCenter',
+ refresh='time',
+ multi=true,
+ includeAll=true,
+ allValues=".*",
+ )
+)
+.addTemplate(
+ template.new(
+ 'rack',
+ '$PROMETHEUS_DS',
+ 'label_values(collectd_collectd_queue_length{cluster="$cluster", dc=~"$dc"}, rack)',
+ label='Rack',
+ refresh='time',
+ multi=true,
+ includeAll=true,
+ allValues=".*",
+ )
+)
+.addTemplate(
+ template.new(
+ 'node',
+ '$PROMETHEUS_DS',
+ 'label_values(collectd_collectd_queue_length{cluster="$cluster", dc=~"$dc", rack=~"$rack"}, instance)',
+ label='Node',
+ refresh='time',
+ multi=true,
+ includeAll=true,
+ allValues=".*",
+ )
+)
+.addRow(
+ row.new(title='', height='50px')
+ .addPanel(textPanel.new(transparent=true))
+ .addPanel(
+ textPanel.new(
+ transparent=true,
+ mode="html",
+ content='',
+ )
+ )
+ .addPanel(textPanel.new(transparent=true))
+)
+.addRow(
+ row.new(title='Local Writes Throughputs (Node Perspective)')
+ .addPanel(
+ graphPanel.new(
+ 'Local Writes Throughput per Table',
+ description='Total writes, cas_propose and cas_commit per table',
+ format='rps',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, keyspace, table) (rate(' + prefix + '_table_write_latency_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='Writes: {{keyspace}}.{{table}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, keyspace, table) (rate(' + prefix + '_table_cas_propose_latency_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='Cas Propose: {{keyspace}}.{{table}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, keyspace, table) (rate(' + prefix + '_table_cas_commit_latency_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='Cas Commit: {{keyspace}}.{{table}}',
+ )
+ )
+ )
+ .addPanel(
+ graphPanel.new(
+ 'Local Writes Throughput per Node',
+ description='Total writes, cas_propose and cas_commit per node',
+ format='rps',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, dc, rack, instance) (rate(' + prefix + '_table_write_latency_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='Writes: {{instance}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, dc, rack, instance) (rate(' + prefix + '_table_cas_propose_latency_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='Cas Propose: {{instance}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, dc, rack, instance) (rate(' + prefix + '_table_cas_commit_latency_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='Cas Commit: {{instance}}',
+ )
+ )
+ )
+ .addPanel(
+ graphPanel.new(
+ 'Local Writes Throughput per Node and Table',
+ description='Total writes, cas_propose and cas_commit per node and table',
+ format='rps',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, dc, rack, instance, keyspace, table) (rate(' + prefix + '_table_write_latency_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='Writes: {{keyspace}}.{{table}} on {{instance}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, dc, rack, instance, keyspace, table) (rate(' + prefix + '_table_cas_propose_latency_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='Cas Propose: {{keyspace}}.{{table}} on {{instance}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, dc, rack, instance, keyspace, table) (rate(' + prefix + '_table_cas_commit_latency_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='Cas Commit: {{keyspace}}.{{table}} on {{instance}}',
+ )
+ )
+ )
+)
+
+
+.addRow(
+ row.new(title='Local Write Latencies')
+
+ .addPanel(
+ graphPanel.new(
+ 'Local Write Latency per Table',
+ description='Write latency for local writes per table (98 - 999th percentile)',
+ format='µs',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='histogram_quantile(0.98, sum by (cluster, keyspace, table, le) (rate(' + prefix + '_table_write_latency_bucket{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])))',
+ legendFormat='p98 - {{keyspace}}.{{table}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='histogram_quantile(0.99, sum by (cluster, keyspace, table, le) (rate(' + prefix + '_table_write_latency_bucket{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])))',
+ legendFormat='p99 - {{keyspace}}.{{table}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='histogram_quantile(0.999, sum by (cluster, keyspace, table, le) (rate(' + prefix + '_table_write_latency_bucket{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])))',
+ legendFormat='p999 - {{keyspace}}.{{table}}',
+ )
+ )
+ )
+
+ .addPanel(
+ graphPanel.new(
+ 'Local Write Latency per Node',
+ description='Write latency for local writes per node (98 - 999th percentile)',
+ format='µs',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='histogram_quantile(0.98, sum by (cluster, dc, rack, instance, le) (rate(' + prefix + '_table_write_latency_bucket{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])))',
+ legendFormat='p98 - {{instance}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='histogram_quantile(0.99, sum by (cluster, dc, rack, instance, le) (rate(' + prefix + '_table_write_latency_bucket{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])))',
+ legendFormat='p99 - {{instance}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='histogram_quantile(0.999, sum by (cluster, dc, rack, instance, le) (rate(' + prefix + '_table_write_latency_bucket{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])))',
+ legendFormat='p999 - {{instance}}',
+ )
+ )
+ )
+
+ .addPanel(
+ graphPanel.new(
+ 'Local Write Latency per Table and Node',
+ description='Write latency for local writes per table and per node (98 - 999th percentile)',
+ format='µs',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='histogram_quantile(0.98, sum by (cluster, dc, rack, instance, keyspace, table, le) (rate(' + prefix + '_table_write_latency_bucket{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])))',
+ legendFormat='p98 - {{keyspace}}.{{table}} - {{instance}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='histogram_quantile(0.99, sum by (cluster, dc, rack, instance, keyspace, table, le) (rate(' + prefix + '_table_write_latency_bucket{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])))',
+ legendFormat='p99 - {{keyspace}}.{{table}} - {{instance}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='histogram_quantile(0.999, sum by (cluster, dc, rack, instance, keyspace, table, le) (rate(' + prefix + '_table_write_latency_bucket{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])))',
+ legendFormat='p999 - {{keyspace}}.{{table}} - {{instance}}',
+ )
+ )
+ )
+)
+
+.addRow(
+ row.new(title='Thread Pools')
+ .addPanel(
+ graphPanel.new(
+ 'Pending Tasks per Node',
+ description='Pending threads per node, by thread pool name filtering threads possibly impacting writes',
+ format='short',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, dc, rack, instance, pool_name) (' + prefix + '_thread_pools_pending_tasks{pool_name=~"memtable_flush_writer|memtable_post_flush|migration_stage|counter_mutation_stage|mutation_stage|view_mutation_stage|misc_stage|secondary_index_management|hints_dispatcher", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"})',
+ legendFormat='{{instance}} - pending {{pool_name}}',
+ )
+ )
+ )
+ .addPanel(
+ graphPanel.new(
+ 'Blocked Tasks per Node',
+ description='Pending threads per node, by thread pool name filtering threads possibly impacting writes',
+ format='short',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, dc, rack, instance, pool_name) (' + prefix + '_thread_pools_total_blocked_tasks_total{pool_name=~"memtable_flush_writer|memtable_post_flush|migration_stage|counter_mutation_stage|mutation_stage|view_mutation_stage|misc_stage|secondary_index_management|hints_dispatcher", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"})',
+ legendFormat='{{instance}} - blocked {{pool_name}}',
+ )
+ )
+ )
+ .addPanel(
+ graphPanel.new(
+ 'Dropped Messages per Node',
+ description='Pending threads per node, by thread pool name filtering threads possibly impacting writes',
+ format='short',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, dc, rack, instance, message_type) (rate(' + prefix + '_dropped_message_dropped_total{message_type=~"_trace|batch_store|batch_remove|counter_mutation|hint|mutation", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m]))',
+ legendFormat='{{instance}} - dropped {{message_type}}',
+ )
+ )
+ )
+)
+
+.addRow(
+ row.new(title='Max & Average Partition Size')
+
+ .addPanel(
+ graphPanel.new(
+ 'Max & Average Partition Size per Table',
+ description='Max & Average of the partition sizes, for each table',
+ format='bytes',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ bars=false,
+ lines=true,
+ stack=false,
+ decimals=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='max by (cluster, keyspace, table) (' + prefix + '_table_estimated_partition_size_histogram{quantile="1", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"})',
+ legendFormat='Max partition size: {{keyspace}}.{{table}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='avg by (cluster, keyspace, table) (' + prefix + '_table_estimated_partition_size_histogram{quantile=".50", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"})',
+ legendFormat='Avg partition size: {{keyspace}}.{{table}}',
+ )
+ )
+ )
+
+ .addPanel(
+ graphPanel.new(
+ 'Max & Average Partition Size per Node',
+ description='Max & Average of the partition sizes, for each node',
+ format='bytes',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ bars=false,
+ lines=true,
+ stack=false,
+ decimals=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='max by (cluster, dc, rack, instance) (' + prefix + '_table_estimated_partition_size_histogram{quantile="1", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"})',
+ legendFormat='Max partition size: {{instance}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='avg by (cluster, dc, rack, instance) (' + prefix + '_table_estimated_partition_size_histogram{quantile=".50", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"})',
+ legendFormat='Avg partition size: {{instance}}',
+ )
+ )
+ )
+
+ .addPanel(
+ graphPanel.new(
+ 'Max & Average Partition Size per Table and Node',
+ description='Max & Average of the partition sizes, for each combination of table and node',
+ format='bytes',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ bars=false,
+ lines=true,
+ stack=false,
+ decimals=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='max by (cluster, dc, rack, instance, keyspace, table) (' + prefix + '_table_estimated_partition_size_histogram{quantile="1", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"})',
+ legendFormat='Max partition size: {{keyspace}}.{{table}} - {{instance}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='avg by (cluster, dc, rack, instance, keyspace, table) (' + prefix + '_table_estimated_partition_size_histogram{quantile=".50", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"})',
+ legendFormat='Avg partition size: {{keyspace}}.{{table}} - {{instance}}',
+ )
+ )
+ )
+
+)
+
+.addRow(
+ row.new(title='Hardware / Operating System')
+
+ .addPanel(
+ graphPanel.new(
+ 'CPU Utilization',
+ description='Maximum CPU utilisation (max 100%)',
+ format='percentunit',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ percentage=true,
+ decimals=1,
+ min=0,
+ max=1,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='max by (cluster) (1 - (sum by (cluster, dc, rack, instance) (rate(collectd_cpu_total{type="idle", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])) / sum by (cluster, dc, rack, instance) (rate(collectd_cpu_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))))',
+ legendFormat='max',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='min by (cluster) (1 - (sum by (cluster, dc, rack, instance) (rate(collectd_cpu_total{type="idle", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])) / sum by (cluster, dc, rack, instance) (rate(collectd_cpu_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))))',
+ legendFormat='min',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='avg by (cluster) (1 - (sum by (cluster, dc, rack, instance) (rate(collectd_cpu_total{type="idle", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])) / sum by (cluster, dc, rack, instance) (rate(collectd_cpu_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))))',
+ legendFormat='avg',
+ )
+ )
+ .addSeriesOverride(fillMinMaxSeriesOverrides)
+ .addSeriesOverride(removeMinlineSeriesOverrides)
+ )
+ .addPanel(
+ graphPanel.new(
+ 'Unix Load (1m rate)',
+ description='Max Unix load on a node for a cluster',
+ format='short',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='max by (cluster) (collectd_load_shortterm{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"})',
+ legendFormat='max',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ 'min by (cluster) (collectd_load_shortterm{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"})',
+ legendFormat='min',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ 'avg by (cluster) (collectd_load_shortterm{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"})',
+ legendFormat='avg',
+ )
+ )
+ .addSeriesOverride(fillMinMaxSeriesOverrides)
+ .addSeriesOverride(removeMinlineSeriesOverrides)
+ )
+ .addPanel(
+ graphPanel.new(
+ 'Memory Utilisation',
+ description='Maximum Memory allocated per usage (worst node) - excludes caches, buffers, etc',
+ format='bytes',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ fill=1,
+ linewidth=2,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='min by (cluster) (sum by (cluster, dc, rack, instance) (collectd_memory{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}))',
+ legendFormat='min memory available',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='max by (cluster, memory) (sum by (cluster, dc, rack, instance, memory) (collectd_memory{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}))',
+ legendFormat='max memory {{memory}}',
+ )
+ )
+ )
+ .addPanel(
+ graphPanel.new(
+ 'Network I/O',
+ description='Network In and Out per cluster',
+ format='bytes',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=1,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ bars=false,
+ )
+ .addTarget(
+ prometheus.target(
+ 'sum by (cluster) (rate(collectd_interface_if_octets_rx_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='outgoing',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ 'sum by (cluster) (rate(collectd_interface_if_octets_rx_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='incoming',
+ )
+ )
+ .addSeriesOverride({
+ "alias": "incoming",
+ "transform": "negative-Y"
+ })
+ )
+ .addPanel(
+ graphPanel.new(
+ 'Context Switching',
+ description='Amount of context switching per second per host',
+ format='short',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='sum by (cluster, dc, rack, instance) (rate(collectd_contextswitch_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='{{instance}} - Context Switches',
+ )
+ )
+ )
+
+)
+
+.addRow(
+ row.new(title='Disks Performances')
+
+ // TODO Alain
+ .addPanel(
+ graphPanel.new(
+ 'Disk Writes IOPS - Total per Node',
+ description='Sum of all disks hits for writes per second, for each node',
+ format='iops',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ bars=false,
+ lines=true,
+ stack=false,
+ decimals=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='Sum by (cluster, dc, rack, instance) (rate(collectd_processes_io_ops_write_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='disks writes - iops: {{instance}}',
+ )
+ )
+ )
+
+ .addPanel(
+ graphPanel.new(
+ 'Disk Writes Throughput - Total per Node',
+ description='Sum of all disks throughputs for writes per second, for each node',
+ format='bps',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ min=0,
+ bars=false,
+ lines=true,
+ stack=false,
+ decimals=0,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='Sum by (cluster, dc, rack, instance) (rate(collectd_processes_io_octets_tx_total{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s]))',
+ legendFormat='disks writes - throughput: {{instance}}',
+ )
+ )
+ )
+
+ // TODO
+ // Disk Write Latency (io.w_await?)
+ // Disk Write Queued (io.wrqm/s)
+ // Disk Utilization (io.util%)
+ // About disks, change to see disks individually (no aggregation) to see commitlog disk apart.
+
+)
+
+.addRow(
+ row.new(title='JVM / Garbage Collection')
+ .addPanel(
+ graphPanel.new(
+ 'Application Throughput (% time NOT doing GC)',
+ description='Percentage of the time the node is *not* doing a GC, thus Cassandra is not stopped for GC',
+ format='percentunit',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ decimals=2,
+ max=1,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='1 - (sum by (cluster, dc, rack, instance) (rate(' + prefix + '_jvm_gc_time{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])) / 1000)',
+ legendFormat='{{dc}}-{{instance}}',
+ )
+ )
+ )
+ .addPanel(
+ graphPanel.new(
+ 'Garbage Collection Time',
+ description='Garbage collection duration',
+ format='ms',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ fill=0,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ )
+ .addTarget(
+ prometheus.target(
+ expr='rate(' + prefix + '_jvm_gc_time{cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}[1m:30s])',
+ legendFormat='{{dc}}-{{instance}}',
+ )
+ )
+ )
+ .addPanel(
+ graphPanel.new(
+ 'JVM Heap Memory Utilisation',
+ description='JVM Heap Memory size (worst node) and minimum available heap size per node',
+ format='bytes',
+ datasource='$PROMETHEUS_DS',
+ transparent=true,
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_sort='current',
+ legend_sortDesc=true,
+ shared_tooltip=false,
+ fill=1,
+ linewidth=2,
+ )
+ .addTarget(
+ prometheus.target(
+ expr= prefix + '_jvm_memory_used{memory_type="heap", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"}',
+ legendFormat='{{dc}}-{{instance}}',
+ )
+ )
+ .addTarget(
+ prometheus.target(
+ expr='min by ( cluster)
+ (' + prefix + '_jvm_memory_max{memory_type="heap", cluster="$cluster", dc=~"$dc", rack=~"$rack", instance=~"$node"})',
+ legendFormat='Heap memory available',
+ )
+ )
+ )
+)
+
+
+// TODO following versions:
+
+// Cassandra
+// Section with:
+// - memtable sizes
+// - mcac_table_waiting_on_free_memtable_space
+// - other memtable useful info (memtable switch count, ...)?
+
+// add to latency section a chart specifically with CAS and MV latencies (specifically - apart from write latencies)?
+// For now it's mixed with other latencies
+
+// section time skew
+// mcac_table_col_update_time_delta_histogram --> https://stackoverflow.com/questions/42180358/what-does-this-cassandra-metric-colupdatetimedeltahistogram-mean
+// Clock drift (here and in overview dashboard as well!). Missing ntp data to build that chart
diff --git a/dashboards/grafana/generated-dashboards/write-path.json b/dashboards/grafana/generated-dashboards/write-path.json
new file mode 100644
index 0000000..f793c48
--- /dev/null
+++ b/dashboards/grafana/generated-dashboards/write-path.json
@@ -0,0 +1,2340 @@
+{
+ "__inputs": [ ],
+ "__requires": [ ],
+ "annotations": {
+ "list": [ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "refresh": "30s",
+ "rows": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "height": "50px",
+ "panels": [
+ {
+ "content": "",
+ "datasource": null,
+ "gridPos": { },
+ "id": 2,
+ "mode": "markdown",
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "content": "",
+ "datasource": null,
+ "gridPos": { },
+ "id": 3,
+ "mode": "html",
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "content": "",
+ "datasource": null,
+ "gridPos": { },
+ "id": 4,
+ "mode": "markdown",
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Total writes, cas_propose and cas_commit per table",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 5,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (cluster, keyspace, table) (rate(mcac_table_write_latency_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Writes: {{keyspace}}.{{table}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum by (cluster, keyspace, table) (rate(mcac_table_cas_propose_latency_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Cas Propose: {{keyspace}}.{{table}}",
+ "refId": "B"
+ },
+ {
+ "expr": "sum by (cluster, keyspace, table) (rate(mcac_table_cas_commit_latency_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Cas Commit: {{keyspace}}.{{table}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Local Writes Throughput per Table",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "rps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "rps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Total writes, cas_propose and cas_commit per node",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 6,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (cluster, dc, rack, instance) (rate(mcac_table_write_latency_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Writes: {{instance}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum by (cluster, dc, rack, instance) (rate(mcac_table_cas_propose_latency_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Cas Propose: {{instance}}",
+ "refId": "B"
+ },
+ {
+ "expr": "sum by (cluster, dc, rack, instance) (rate(mcac_table_cas_commit_latency_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Cas Commit: {{instance}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Local Writes Throughput per Node",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "rps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "rps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Total writes, cas_propose and cas_commit per node and table",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 7,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (cluster, dc, rack, instance, keyspace, table) (rate(mcac_table_write_latency_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Writes: {{keyspace}}.{{table}} on {{instance}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum by (cluster, dc, rack, instance, keyspace, table) (rate(mcac_table_cas_propose_latency_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Cas Propose: {{keyspace}}.{{table}} on {{instance}}",
+ "refId": "B"
+ },
+ {
+ "expr": "sum by (cluster, dc, rack, instance, keyspace, table) (rate(mcac_table_cas_commit_latency_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Cas Commit: {{keyspace}}.{{table}} on {{instance}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Local Writes Throughput per Node and Table",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "rps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "rps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Local Writes Throughputs (Node Perspective)",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Write latency for local writes per table (98 - 999th percentile)",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.98, sum by (cluster, keyspace, table, le) (rate(mcac_table_write_latency_bucket{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "p98 - {{keyspace}}.{{table}}",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum by (cluster, keyspace, table, le) (rate(mcac_table_write_latency_bucket{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "p99 - {{keyspace}}.{{table}}",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.999, sum by (cluster, keyspace, table, le) (rate(mcac_table_write_latency_bucket{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "p999 - {{keyspace}}.{{table}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Local Write Latency per Table",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Write latency for local writes per node (98 - 999th percentile)",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 9,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.98, sum by (cluster, dc, rack, instance, le) (rate(mcac_table_write_latency_bucket{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "p98 - {{instance}}",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum by (cluster, dc, rack, instance, le) (rate(mcac_table_write_latency_bucket{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "p99 - {{instance}}",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.999, sum by (cluster, dc, rack, instance, le) (rate(mcac_table_write_latency_bucket{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "p999 - {{instance}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Local Write Latency per Node",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Write latency for local writes per table and per node (98 - 999th percentile)",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 10,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.98, sum by (cluster, dc, rack, instance, keyspace, table, le) (rate(mcac_table_write_latency_bucket{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "p98 - {{keyspace}}.{{table}} - {{instance}}",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum by (cluster, dc, rack, instance, keyspace, table, le) (rate(mcac_table_write_latency_bucket{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "p99 - {{keyspace}}.{{table}} - {{instance}}",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.999, sum by (cluster, dc, rack, instance, keyspace, table, le) (rate(mcac_table_write_latency_bucket{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "p999 - {{keyspace}}.{{table}} - {{instance}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Local Write Latency per Table and Node",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "µs",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Local Write Latencies",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Pending threads per node, by thread pool name filtering threads possibly impacting writes",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 11,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (cluster, dc, rack, instance, pool_name) (mcac_thread_pools_pending_tasks{pool_name=~\"memtable_flush_writer|memtable_post_flush|migration_stage|counter_mutation_stage|mutation_stage|view_mutation_stage|misc_stage|secondary_index_management|hints_dispatcher\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} - pending {{pool_name}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pending Tasks per Node",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Pending threads per node, by thread pool name filtering threads possibly impacting writes",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 12,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (cluster, dc, rack, instance, pool_name) (mcac_thread_pools_total_blocked_tasks_total{pool_name=~\"memtable_flush_writer|memtable_post_flush|migration_stage|counter_mutation_stage|mutation_stage|view_mutation_stage|misc_stage|secondary_index_management|hints_dispatcher\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} - blocked {{pool_name}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Blocked Tasks per Node",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Pending threads per node, by thread pool name filtering threads possibly impacting writes",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 13,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (cluster, dc, rack, instance, message_type) (rate(mcac_dropped_message_dropped_total{message_type=~\"_trace|batch_store|batch_remove|counter_mutation|hint|mutation\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} - dropped {{message_type}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Dropped Messages per Node",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Thread Pools",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "decimals": 0,
+ "description": "Max & Average of the partition sizes, for each table",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 14,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "max by (cluster, keyspace, table) (mcac_table_estimated_partition_size_histogram{quantile=\"1\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Max partition size: {{keyspace}}.{{table}}",
+ "refId": "A"
+ },
+ {
+ "expr": "avg by (cluster, keyspace, table) (mcac_table_estimated_partition_size_histogram{quantile=\".50\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Avg partition size: {{keyspace}}.{{table}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Max & Average Partition Size per Table",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "decimals": 0,
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "decimals": 0,
+ "description": "Max & Average of the partition sizes, for each node",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 15,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "max by (cluster, dc, rack, instance) (mcac_table_estimated_partition_size_histogram{quantile=\"1\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Max partition size: {{instance}}",
+ "refId": "A"
+ },
+ {
+ "expr": "avg by (cluster, dc, rack, instance) (mcac_table_estimated_partition_size_histogram{quantile=\".50\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Avg partition size: {{instance}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Max & Average Partition Size per Node",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "decimals": 0,
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "decimals": 0,
+ "description": "Max & Average of the partition sizes, for each combination of table and node",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 16,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "max by (cluster, dc, rack, instance, keyspace, table) (mcac_table_estimated_partition_size_histogram{quantile=\"1\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Max partition size: {{keyspace}}.{{table}} - {{instance}}",
+ "refId": "A"
+ },
+ {
+ "expr": "avg by (cluster, dc, rack, instance, keyspace, table) (mcac_table_estimated_partition_size_histogram{quantile=\".50\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Avg partition size: {{keyspace}}.{{table}} - {{instance}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Max & Average Partition Size per Table and Node",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "decimals": 0,
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Max & Average Partition Size",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "decimals": 1,
+ "description": "Maximum CPU utilisation (max 100%)",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 17,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": true,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "max",
+ "fillBelowTo": "min",
+ "lines": false
+ },
+ {
+ "alias": "min",
+ "lines": false
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "max by (cluster) (1 - (sum by (cluster, dc, rack, instance) (rate(collectd_cpu_total{type=\"idle\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])) / sum by (cluster, dc, rack, instance) (rate(collectd_cpu_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "max",
+ "refId": "A"
+ },
+ {
+ "expr": "min by (cluster) (1 - (sum by (cluster, dc, rack, instance) (rate(collectd_cpu_total{type=\"idle\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])) / sum by (cluster, dc, rack, instance) (rate(collectd_cpu_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "min",
+ "refId": "B"
+ },
+ {
+ "expr": "avg by (cluster) (1 - (sum by (cluster, dc, rack, instance) (rate(collectd_cpu_total{type=\"idle\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])) / sum by (cluster, dc, rack, instance) (rate(collectd_cpu_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "avg",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Utilization",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "decimals": 1,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": 1,
+ "min": 0,
+ "show": true
+ },
+ {
+ "decimals": 1,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": 1,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Max Unix load on a node for a cluster",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 18,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "max",
+ "fillBelowTo": "min",
+ "lines": false
+ },
+ {
+ "alias": "min",
+ "lines": false
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "max by (cluster) (collectd_load_shortterm{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "max",
+ "refId": "A"
+ },
+ {
+ "expr": "min by (cluster) (collectd_load_shortterm{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "min",
+ "refId": "B"
+ },
+ {
+ "expr": "avg by (cluster) (collectd_load_shortterm{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "avg",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Unix Load (1m rate)",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Maximum Memory allocated per usage (worst node) - excludes caches, buffers, etc",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 19,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "min by (cluster) (sum by (cluster, dc, rack, instance) (collectd_memory{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "min memory available",
+ "refId": "A"
+ },
+ {
+ "expr": "max by (cluster, memory) (sum by (cluster, dc, rack, instance, memory) (collectd_memory{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "max memory {{memory}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Utilisation",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Network In and Out per cluster",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 20,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "incoming",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (cluster) (rate(collectd_interface_if_octets_rx_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "outgoing",
+ "refId": "A"
+ },
+ {
+ "expr": "sum by (cluster) (rate(collectd_interface_if_octets_rx_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "incoming",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network I/O",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Amount of context switching per second per host",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 21,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (cluster, dc, rack, instance) (rate(collectd_contextswitch_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}} - Context Switches",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Context Switching",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Hardware / Operating System",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "decimals": 0,
+ "description": "Sum of all disks hits for writes per second, for each node",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 22,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "Sum by (cluster, dc, rack, instance) (rate(collectd_processes_io_ops_write_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "disks writes - iops: {{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk Writes IOPS - Total per Node",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "iops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "decimals": 0,
+ "format": "iops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "decimals": 0,
+ "description": "Sum of all disks throughputs for writes per second, for each node",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 23,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "Sum by (cluster, dc, rack, instance) (rate(collectd_processes_io_octets_tx_total{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "disks writes - throughput: {{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk Writes Throughput - Total per Node",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "decimals": 0,
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Disks Performances",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "panels": [
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "decimals": 2,
+ "description": "Percentage of the time the node is *not* doing a GC, thus Cassandra is not stopped for GC",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 24,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "1 - (sum by (cluster, dc, rack, instance) (rate(mcac_jvm_gc_time{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])) / 1000)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{dc}}-{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Application Throughput (% time NOT doing GC)",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "decimals": 2,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": 1,
+ "min": null,
+ "show": true
+ },
+ {
+ "decimals": 2,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": 1,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "Garbage collection duration",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 25,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(mcac_jvm_gc_time{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}[1m:30s])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{dc}}-{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Garbage Collection Time",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$PROMETHEUS_DS",
+ "description": "JVM Heap Memory size (worst node) and minimum available heap size per node",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": { },
+ "id": 26,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "mcac_jvm_memory_used{memory_type=\"heap\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{dc}}-{{instance}}",
+ "refId": "A"
+ },
+ {
+ "expr": "min by ( cluster)\n (mcac_jvm_memory_max{memory_type=\"heap\", cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\", instance=~\"$node\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Heap memory available",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "JVM Heap Memory Utilisation",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "transparent": true,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "JVM / Garbage Collection",
+ "titleSize": "h6",
+ "type": "row"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "Cassandra",
+ "Write",
+ "Write-Path",
+ "Mutation",
+ "Insert",
+ "Update",
+ "Upsert"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "Prometheus",
+ "value": "Prometheus"
+ },
+ "hide": 2,
+ "label": null,
+ "name": "PROMETHEUS_DS",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$PROMETHEUS_DS",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Cluster",
+ "multi": false,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(collectd_collectd_queue_length{}, cluster)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".*",
+ "current": { },
+ "datasource": "$PROMETHEUS_DS",
+ "hide": 0,
+ "includeAll": true,
+ "label": "DataCenter",
+ "multi": true,
+ "name": "dc",
+ "options": [ ],
+ "query": "label_values(collectd_collectd_queue_length{cluster=\"$cluster\"}, dc)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".*",
+ "current": { },
+ "datasource": "$PROMETHEUS_DS",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Rack",
+ "multi": true,
+ "name": "rack",
+ "options": [ ],
+ "query": "label_values(collectd_collectd_queue_length{cluster=\"$cluster\", dc=~\"$dc\"}, rack)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".*",
+ "current": { },
+ "datasource": "$PROMETHEUS_DS",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Node",
+ "multi": true,
+ "name": "node",
+ "options": [ ],
+ "query": "label_values(collectd_collectd_queue_length{cluster=\"$cluster\", dc=~\"$dc\", rack=~\"$rack\"}, instance)",
+ "refresh": 2,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-30m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Cassandra Write Path",
+ "version": 0
+}