Skip to content

Commit

Permalink
fix(grafana): move consul configurations to their own job and template
Browse files Browse the repository at this point in the history
Signed-off-by: Bruce Becker <[email protected]>
  • Loading branch information
brucellino committed May 18, 2024
1 parent 08aa82f commit c839e2b
Show file tree
Hide file tree
Showing 3 changed files with 241 additions and 0 deletions.
52 changes: 52 additions & 0 deletions grafana-agent/grafana-integrations-consul.yml.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{{- with secret "hashiatho.me-v2/grafana_cloud" -}}
server:
log_level: info
metrics:
kvstore_config:
store: consul
prefix: "grafana/configurations/"
consul:
host: "localhost:8500"
wal_directory: tmp/wal
global:
scrape_interval: 60s

integrations:
prometheus_remote_write:
- basic_auth:
password: '{{ .Data.data.api_key }}'
username: '{{ .Data.data.metrics_id }}'
url: '{{ .Data.data.metrics_url }}/api/prom/push'
agent:
enabled: true
relabel_configs:
- action: replace
source_labels:
- agent_hostname
target_label: instance
- action: replace
target_label: job
replacement: "integrations/agent-check"
metric_relabel_configs:
- action: keep
regex: (prometheus_target_.*|prometheus_sd_discovered_targets|agent_build.*|agent_wal_samples_appended_total|process_start_time_seconds)
source_labels:
- __name__
consul_exporter:
enabled: true
server: localhost:8500
scrape_integration: true
scrape_interval: 120s
kv_prefix: "hashiatho.me"
relabel_configs:
- replacement: hashiathome
target_label: instance
- replacement: "integrations/consul"
target_label: job
metric_relabel_configs:
- action: keep
regex: consul_raft_leader|consul_raft_leader_lastcontact_count|consul_raft_peers|consul_up
source_labels:
- __name__

{{ end }}
102 changes: 102 additions & 0 deletions grafana-agent/grafana-integrations.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
variable "graf_agent_rel_url" {
description = "Base URL for grafana release packages."
type = string
default = "https://github.com/grafana/agent/releases/download"
}
variable "graf_agent_version" {
description = "Grafana Agent version to be used."
type = string
default = "0.40.3"
}

variable "scrape_interval" {
description = "Default scrape interval"
type = string
default = "60s"
}

job "grafana-monitoring" {
vault {}
type = "service"
group "consul" {
restart {
render_templates = true
attempts = 2
interval = "5m"
mode = "delay"
}
update {
max_parallel = 3
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
progress_deadline = "10m"
auto_revert = true
auto_promote = true
canary = 1
}
network {
port "http" {}
port "grpc" {}
}

task "agent" {
resources {
memory = 512
cpu = 500
}
identity {
name = "vault"
aud = ["vault.io"]
env = true
file = true
change_mode = "restart"
ttl = "1h"
}

service {
port = "http"
name = "grafana-agent-consul-http"
check {
type = "http"
name = "agent_health"
path = "/-/healthy"
interval = "20s"
timeout = "5s"
}
}

service {
port = "grpc"
name = "grafana-agent-consul-grpc"
check {
type = "tcp"
interval = "20s"
timeout = "5s"
}
}
env {
HOSTNAME = attr.unique.hostname
}
driver = "raw_exec"
template {
data = file("grafana-integrations-consul.yml.tmpl")
destination = "local/agent.yml"
}
artifact {
source = "${var.graf_agent_rel_url}/v${var.graf_agent_version}/grafana-agent-linux-${attr.cpu.arch}.zip"
destination = "local/grafana-agent"
mode = "file"
}
config {
command = "local/grafana-agent"
args = [
"-config.file", "local/agent.yml",
"-server.http.address", "${NOMAD_ADDR_http}",
"-server.grpc.address", "${NOMAD_ADDR_grpc}",
"-disable-reporting"
]
}
}
}
}
87 changes: 87 additions & 0 deletions grafana-agent/grafana-integrations.yml.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
{{- with secret "hashiatho.me-v2/grafana_cloud" -}}
integrations:
prometheus_remote_write:
- basic_auth:
password: '{{ .Data.data.api_key }}'
username: '{{ .Data.data.metrics_id }}'
url: '{{ .Data.data.metrics_url }}/api/prom/push'
agent:
enabled: true
relabel_configs:
- action: replace
source_labels:
- agent_hostname
target_label: instance

- action: replace
target_label: job
replacement: "integrations/agent-check"
metric_relabel_configs:
- action: keep
regex: (prometheus_target_.*|prometheus_sd_discovered_targets|agent_build.*|agent_wal_samples_appended_total|process_start_time_seconds)
source_labels:
- __name__
consul_exporter:
enabled: true
server: localhost:8500
relabel_configs:
- replacement: hashiathome
target_label: instance
- replacement: "integrations/consul"
target_label: job
metric_relabel_configs:
- action: keep
regex: consul_raft_leader|consul_raft_leader_lastcontact_count|consul_raft_peers|consul_up
source_labels:
- __name__

server:
log_level: debug
metrics:
configs:
- name: nomad
remote_write:
- basic_auth:
password: {{ .Data.data.api_key }}
username: {{ .Data.data.metrics_id }}
url: {{ .Data.data.metrics_url }}
scrape_configs:
- job_name: 'integrations/consul'
metrics_path: /v1/agent/metrics
params:
format: ["prometheus"]
scrape_interval: 60s
static_configs:
- targets: ['localhost:8500']
labels:
instance: '<your-instance-name>'
metric_relabel_configs:
- action: keep
regex: consul_raft_leader|consul_raft_leader_lastcontact_count|consul_raft_peers|consul_up
source_labels:
- __name__
- job_name: integrations/nomad
metrics_path: /v1/metrics
params:
format: ['prometheus']
relabel_configs:
- replacement: 'hah'
target_label: instance
consul_sd_configs:
- datacenter: "dc1"
services:
- nomad
tags:
- http
- nomad
- ansible-managed
allow_stale: true
metric_relabel_configs:
- action: keep
regex: nomad_client_allocated_cpu|nomad_client_allocated_disk|nomad_client_allocated_memory|nomad_client_allocs_cpu_total_percent|nomad_client_allocs_cpu_total_ticks|nomad_client_allocs_memory_cache|nomad_client_allocs_memory_rss|nomad_client_host_cpu_idle|nomad_client_host_disk_available|nomad_client_host_disk_inodes_percent|nomad_client_host_disk_size|nomad_client_host_memory_available|nomad_client_host_memory_free|nomad_client_host_memory_total|nomad_client_host_memory_used|nomad_client_unallocated_cpu|nomad_client_unallocated_disk|nomad_client_unallocated_memory|nomad_client_uptime
source_labels:
- __name__
wal_directory: tmp/wal
global:
scrape_interval: 60s
{{ end }}

0 comments on commit c839e2b

Please sign in to comment.