From c839e2bc10ed19e33063bb4a42eeb1c46751e26b Mon Sep 17 00:00:00 2001 From: Bruce Becker Date: Sat, 18 May 2024 08:14:57 +0200 Subject: [PATCH] fix(grafana): move consul configurations to their own job and template Signed-off-by: Bruce Becker --- .../grafana-integrations-consul.yml.tmpl | 52 +++++++++ grafana-agent/grafana-integrations.nomad | 102 ++++++++++++++++++ grafana-agent/grafana-integrations.yml.tmpl | 87 +++++++++++++++ 3 files changed, 241 insertions(+) create mode 100644 grafana-agent/grafana-integrations-consul.yml.tmpl create mode 100644 grafana-agent/grafana-integrations.nomad create mode 100644 grafana-agent/grafana-integrations.yml.tmpl diff --git a/grafana-agent/grafana-integrations-consul.yml.tmpl b/grafana-agent/grafana-integrations-consul.yml.tmpl new file mode 100644 index 0000000..a994b5b --- /dev/null +++ b/grafana-agent/grafana-integrations-consul.yml.tmpl @@ -0,0 +1,52 @@ +{{- with secret "hashiatho.me-v2/grafana_cloud" -}} +server: + log_level: info +metrics: + kvstore_config: + store: consul + prefix: "grafana/configurations/" + consul: + host: "localhost:8500" + wal_directory: tmp/wal + global: + scrape_interval: 60s + +integrations: + prometheus_remote_write: + - basic_auth: + password: '{{ .Data.data.api_key }}' + username: '{{ .Data.data.metrics_id }}' + url: '{{ .Data.data.metrics_url }}/api/prom/push' + agent: + enabled: true + relabel_configs: + - action: replace + source_labels: + - agent_hostname + target_label: instance + - action: replace + target_label: job + replacement: "integrations/agent-check" + metric_relabel_configs: + - action: keep + regex: (prometheus_target_.*|prometheus_sd_discovered_targets|agent_build.*|agent_wal_samples_appended_total|process_start_time_seconds) + source_labels: + - __name__ + consul_exporter: + enabled: true + server: localhost:8500 + scrape_integration: true + scrape_interval: 120s + kv_prefix: "hashiatho.me" + relabel_configs: + - replacement: hashiathome + target_label: instance + - replacement: "integrations/consul" + target_label: job + metric_relabel_configs: + - action: keep + regex: consul_raft_leader|consul_raft_leader_lastcontact_count|consul_raft_peers|consul_up + source_labels: + - __name__ + +{{ end }} diff --git a/grafana-agent/grafana-integrations.nomad b/grafana-agent/grafana-integrations.nomad new file mode 100644 index 0000000..f626e85 --- /dev/null +++ b/grafana-agent/grafana-integrations.nomad @@ -0,0 +1,102 @@ +variable "graf_agent_rel_url" { + description = "Base URL for grafana release packages." + type = string + default = "https://github.com/grafana/agent/releases/download" +} +variable "graf_agent_version" { + description = "Grafana Agent version to be used." + type = string + default = "0.40.3" +} + +variable "scrape_interval" { + description = "Default scrape interval" + type = string + default = "60s" +} + +job "grafana-monitoring" { + vault {} + type = "service" + group "consul" { + restart { + render_templates = true + attempts = 2 + interval = "5m" + mode = "delay" + } + update { + max_parallel = 3 + health_check = "checks" + min_healthy_time = "10s" + healthy_deadline = "5m" + progress_deadline = "10m" + auto_revert = true + auto_promote = true + canary = 1 + } + network { + port "http" {} + port "grpc" {} + } + + task "agent" { + resources { + memory = 512 + cpu = 500 + } + identity { + name = "vault" + aud = ["vault.io"] + env = true + file = true + change_mode = "restart" + ttl = "1h" + } + + service { + port = "http" + name = "grafana-agent-consul-http" + check { + type = "http" + name = "agent_health" + path = "/-/healthy" + interval = "20s" + timeout = "5s" + } + } + + service { + port = "grpc" + name = "grafana-agent-consul-grpc" + check { + type = "tcp" + interval = "20s" + timeout = "5s" + } + } + env { + HOSTNAME = attr.unique.hostname + } + driver = "raw_exec" + template { + data = file("grafana-integrations-consul.yml.tmpl") + destination = "local/agent.yml" + } + artifact { + source = "${var.graf_agent_rel_url}/v${var.graf_agent_version}/grafana-agent-linux-${attr.cpu.arch}.zip" + destination = "local/grafana-agent" + mode = "file" + } + config { + command = "local/grafana-agent" + args = [ + "-config.file", "local/agent.yml", + "-server.http.address", "${NOMAD_ADDR_http}", + "-server.grpc.address", "${NOMAD_ADDR_grpc}", + "-disable-reporting" + ] + } + } + } +} diff --git a/grafana-agent/grafana-integrations.yml.tmpl b/grafana-agent/grafana-integrations.yml.tmpl new file mode 100644 index 0000000..5d905e9 --- /dev/null +++ b/grafana-agent/grafana-integrations.yml.tmpl @@ -0,0 +1,87 @@ +{{- with secret "hashiatho.me-v2/grafana_cloud" -}} +integrations: + prometheus_remote_write: + - basic_auth: + password: '{{ .Data.data.api_key }}' + username: '{{ .Data.data.metrics_id }}' + url: '{{ .Data.data.metrics_url }}/api/prom/push' + agent: + enabled: true + relabel_configs: + - action: replace + source_labels: + - agent_hostname + target_label: instance + + - action: replace + target_label: job + replacement: "integrations/agent-check" + metric_relabel_configs: + - action: keep + regex: (prometheus_target_.*|prometheus_sd_discovered_targets|agent_build.*|agent_wal_samples_appended_total|process_start_time_seconds) + source_labels: + - __name__ + consul_exporter: + enabled: true + server: localhost:8500 + relabel_configs: + - replacement: hashiathome + target_label: instance + - replacement: "integrations/consul" + target_label: job + metric_relabel_configs: + - action: keep + regex: consul_raft_leader|consul_raft_leader_lastcontact_count|consul_raft_peers|consul_up + source_labels: + - __name__ + +server: + log_level: debug +metrics: + configs: + - name: nomad + remote_write: + - basic_auth: + password: {{ .Data.data.api_key }} + username: {{ .Data.data.metrics_id }} + url: {{ .Data.data.metrics_url }} + scrape_configs: + - job_name: 'integrations/consul' + metrics_path: /v1/agent/metrics + params: + format: ["prometheus"] + scrape_interval: 60s + static_configs: + - targets: ['localhost:8500'] + labels: + instance: '' + metric_relabel_configs: + - action: keep + regex: consul_raft_leader|consul_raft_leader_lastcontact_count|consul_raft_peers|consul_up + source_labels: + - __name__ + - job_name: integrations/nomad + metrics_path: /v1/metrics + params: + format: ['prometheus'] + relabel_configs: + - replacement: 'hah' + target_label: instance + consul_sd_configs: + - datacenter: "dc1" + services: + - nomad + tags: + - http + - nomad + - ansible-managed + allow_stale: true + metric_relabel_configs: + - action: keep + regex: nomad_client_allocated_cpu|nomad_client_allocated_disk|nomad_client_allocated_memory|nomad_client_allocs_cpu_total_percent|nomad_client_allocs_cpu_total_ticks|nomad_client_allocs_memory_cache|nomad_client_allocs_memory_rss|nomad_client_host_cpu_idle|nomad_client_host_disk_available|nomad_client_host_disk_inodes_percent|nomad_client_host_disk_size|nomad_client_host_memory_available|nomad_client_host_memory_free|nomad_client_host_memory_total|nomad_client_host_memory_used|nomad_client_unallocated_cpu|nomad_client_unallocated_disk|nomad_client_unallocated_memory|nomad_client_uptime + source_labels: + - __name__ + wal_directory: tmp/wal + global: + scrape_interval: 60s +{{ end }}