From 906836dcd5f696b8a4d4ca403361fab9e550be94 Mon Sep 17 00:00:00 2001 From: Sohamdg081992 <31517098+Sohamdg081992@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:25:46 -0800 Subject: [PATCH] Authentication using TLS certs + few metrics for civ2 ux + update release notes (#671) * Removing duplicate alerts from ci recommended alerts * Remove test branch * Remove preview keyword from policy readme * Test mtls ref app * . * update main go * . * . * . * . * . * . * adding secret to chart * . * . * . * . * . * [WIP] update secret locations * switch directory * switch from opt dir since it overwrites main sh * fix typo * [WIP]Put the certs in same dir in ref app * [WIP] update secret name * Testing SAN IP error * . * update cert to use daemonset node ip * use extension to add ip san since cnf did not work * . * Update configmaps for http & https ports * Adding tls config in reference app podmonitor * adding liveness probe for cert volume * add role binding to make podmonitor work * correct pod monitor cert * Update the mount location similar to oss location * switch file path inside ref app * Test CRD changes * Testing crd changes * [wip]testing crd * . * testing old location to start app * update the location back * correct the folder structure for new directory * make certificate copy for crd * update location in liveness probe * Create separate secrets for configmap path and CRD path and update liveness probe * update image for mtls ref app * update cluster role * using single secret for both crd & cm * remove role binding yaml * Remove leftover test secrets * Remove unused /var/tmp directory from liveness probe * Remove redundant openssl client config file * remove branch name * add back branch * Add below metrics for civ2 Ux `target=cadvisor` container_start_time_seconds `target=kube-state` kube_pod_container_status_ready kube_pod_init_container_* kube_pod_deletion_timestamp kube_pod_status_reason kube_pod_init_container_resource_requests * update release notes for 2/8 release * fix commas * remove private keys --------- Co-authored-by: vishwanath --- RELEASENOTES.md | 14 ++++- internal/referenceapp/golang/linux/Dockerfile | 4 ++ internal/referenceapp/golang/main.go | 15 +++++- .../linux-http-scrape-config.yaml | 18 +++++++ .../linux-https-scrape-config.yaml | 15 ++++++ internal/referenceapp/prometheus-config | 16 ++++++ .../referenceapp/prometheus-mtls-ref-app.yaml | 54 +++++++++++++++++++ otelcollector/VERSION | 2 +- ...arser-default-targets-metrics-keep-list.rb | 4 +- .../templates/ama-metrics-clusterRole.yaml | 2 +- .../templates/ama-metrics-daemonset.yaml | 14 +++++ .../templates/ama-metrics-deployment.yaml | 7 +++ .../pod-monitor-reference-app.yaml | 19 +++++-- otelcollector/scripts/main.sh | 18 +++---- 14 files changed, 183 insertions(+), 19 deletions(-) create mode 100644 internal/referenceapp/linux-http-scrape-config.yaml create mode 100644 internal/referenceapp/linux-https-scrape-config.yaml create mode 100644 internal/referenceapp/prometheus-config create mode 100644 internal/referenceapp/prometheus-mtls-ref-app.yaml diff --git a/RELEASENOTES.md b/RELEASENOTES.md index b873214de..286fb88dd 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -1,7 +1,19 @@ # Azure Monitor Metrics for AKS clusters -### Pending +## Release 02-08-2024 + +* Linux image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.4-main-` +* Windows image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.4-main--win` +* TA image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.4-main--targetallocator` +* cfg sidecar image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.4-main--cfg` * Change log - + * feat: add ccp config map settings for public preview + * feat: Enable MTLS authentication + * fix: add some metrics for civ2 ux + * fix: Add telemetry for collector and addon token adaptor + * fix: Set autoresolve to true for new agent version alert + * fix: SDL Requirment : add policheck + * fix: [infra] Fix commented out ARC deploy chart condition * fix: stop copying libssl.so.1.1 & libcrypto.so.1.1 as they are already available with openssl in distroless and copying them over causes FIPS HMAC verification failures * fix: update windows liveness timeoutSeconds, periodSeconds to 60 and reduce tasklist usage in liveness probe diff --git a/internal/referenceapp/golang/linux/Dockerfile b/internal/referenceapp/golang/linux/Dockerfile index dca1db551..24dd641b1 100644 --- a/internal/referenceapp/golang/linux/Dockerfile +++ b/internal/referenceapp/golang/linux/Dockerfile @@ -14,6 +14,10 @@ COPY go.mod . COPY go.sum . RUN go mod download + +COPY client-cert.pem /etc/prometheus/certs/ +COPY client-key.pem /etc/prometheus/certs/ + # Copy the code into the container COPY . . diff --git a/internal/referenceapp/golang/main.go b/internal/referenceapp/golang/main.go index 488ef44bd..4a14aac6c 100644 --- a/internal/referenceapp/golang/main.go +++ b/internal/referenceapp/golang/main.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "log" "math/rand" "net/http" "os" @@ -457,6 +458,9 @@ func untypedHandler(w http.ResponseWriter, r *http.Request) { } func main() { + + certFile := "/etc/prometheus/certs/client-cert.pem" + keyFile := "/etc/prometheus/certs/client-key.pem" if os.Getenv("RUN_PERF_TEST") == "true" { if os.Getenv("SCRAPE_INTERVAL") != "" { scrapeIntervalSec, _ = strconv.Atoi(os.Getenv("SCRAPE_INTERVAL")) @@ -480,8 +484,17 @@ func main() { http.ListenAndServe(":2113", untypedServer) }() + defer func() { + if r := recover(); r != nil { + log.Printf("HTTP server failed to start: %v", r) + } + }() + // Run main server for weather app metrics - http.ListenAndServe(":2112", weatherServer) + err := http.ListenAndServeTLS(":2112", certFile, keyFile, weatherServer) + if err != nil { + log.Printf("HTTP server failed to start: %v", err) + } fmt.Printf("ending main function") } diff --git a/internal/referenceapp/linux-http-scrape-config.yaml b/internal/referenceapp/linux-http-scrape-config.yaml new file mode 100644 index 000000000..d72c59063 --- /dev/null +++ b/internal/referenceapp/linux-http-scrape-config.yaml @@ -0,0 +1,18 @@ +scrape_configs: + - job_name: prometheus_ref_app_1 + scheme: http + scrape_interval: 60s + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app] + action: keep + regex: "prometheus-reference-app" + - source_labels: [__address__] + action: replace + target_label: __param_target + regex: ":2113" + - source_labels: [__param_target] + action: keep + regex: "2113" + - action: drop diff --git a/internal/referenceapp/linux-https-scrape-config.yaml b/internal/referenceapp/linux-https-scrape-config.yaml new file mode 100644 index 000000000..2cf0da50a --- /dev/null +++ b/internal/referenceapp/linux-https-scrape-config.yaml @@ -0,0 +1,15 @@ +scrape_configs: + - job_name: prometheus_ref_app + scheme: https + scrape_interval: 60s + tls_config: + ca_file: /etc/prometheus/certs/client-cert.pem + cert_file: /etc/prometheus/certs/client-cert.pem + key_file: /etc/prometheus/certs/client-key.pem + insecure_skip_verify: false + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app] + action: keep + regex: "prometheus-reference-app" diff --git a/internal/referenceapp/prometheus-config b/internal/referenceapp/prometheus-config new file mode 100644 index 000000000..c77e6ef39 --- /dev/null +++ b/internal/referenceapp/prometheus-config @@ -0,0 +1,16 @@ +scrape_configs: + - job_name: prometheus_ref_app + scheme: http + scrape_interval: 60s + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca/ca-cert.pem + cert_file: /var/run/secrets/kubernetes.io/serviceaccount/client/client-cert.pem + key_file: /var/run/secrets/kubernetes.io/serviceaccount/client/client-key.pem + insecure_skip_verify: false + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app] + action: keep + regex: "prometheus-reference-app" + diff --git a/internal/referenceapp/prometheus-mtls-ref-app.yaml b/internal/referenceapp/prometheus-mtls-ref-app.yaml new file mode 100644 index 000000000..0c75eb741 --- /dev/null +++ b/internal/referenceapp/prometheus-mtls-ref-app.yaml @@ -0,0 +1,54 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: prometheus-reference-app +spec: + selector: + matchLabels: + app: prometheus-reference-app + template: + metadata: + labels: + app: prometheus-reference-app + spec: + containers: + - name: prometheus-reference-app-golang + image: mcr.microsoft.com/azuremonitor/containerinsights/cidev/prometheus-collector/images:6.8.1-testTLS-11-10-2023-afd40f4c-ref-app-golang + env: + - name: RUN_PERF_TEST + value: "false" + - name: SCRAPE_INTERVAL + value: "15" + - name: METRIC_COUNT + value: "125000" + ports: + - containerPort: 2112 + protocol: TCP + - containerPort: 2113 + protocol: TCP + nodeSelector: + kubernetes.io/os: linux + architecture: amd64 +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus-reference-service + labels: + app: prometheus-reference-app +spec: + selector: + app: prometheus-reference-app + ports: + - name: "weather-app" + protocol: TCP + port: 2112 + targetPort: 2112 + - name: "untyped-metrics" + protocol: TCP + port: 2113 + targetPort: 2113 + - name: "python-client" + protocol: TCP + port: 2114 + targetPort: 2114 diff --git a/otelcollector/VERSION b/otelcollector/VERSION index 021c9405b..9ffc8cfb6 100644 --- a/otelcollector/VERSION +++ b/otelcollector/VERSION @@ -1 +1 @@ -6.8.3 +6.8.4 diff --git a/otelcollector/configmapparser/tomlparser-default-targets-metrics-keep-list.rb b/otelcollector/configmapparser/tomlparser-default-targets-metrics-keep-list.rb index 40ccb1b9f..937f91ec3 100644 --- a/otelcollector/configmapparser/tomlparser-default-targets-metrics-keep-list.rb +++ b/otelcollector/configmapparser/tomlparser-default-targets-metrics-keep-list.rb @@ -56,10 +56,10 @@ # minimal profile when MAC mode is enabled. This list includes metrics used by default dashboards + rec rules + alerts, when MAC mode is enabled. @kubeletRegex_minimal_mac = "kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_used_bytes|kubelet_node_name|kubelet_running_pods|kubelet_running_pod_count|kubelet_running_sum_containers|kubelet_running_containers|kubelet_running_container_count|volume_manager_total_volumes|kubelet_node_config_error|kubelet_runtime_operations_total|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_duration_seconds_bucket|kubelet_runtime_operations_duration_seconds_sum|kubelet_runtime_operations_duration_seconds_count|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_sum|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_sum|kubelet_pod_worker_duration_seconds_count|storage_operation_duration_seconds_bucket|storage_operation_duration_seconds_sum|storage_operation_duration_seconds_count|storage_operation_errors_total|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_sum|kubelet_cgroup_manager_duration_seconds_count|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pleg_relist_interval_seconds_count|kubelet_pleg_relist_interval_seconds_sum|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_duration_seconds_sum|rest_client_requests_total|rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines|kubernetes_build_info|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_server_expiration_renew_errors|kubelet_certificate_manager_server_ttl_seconds|kubelet_volume_stats_available_bytes|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_inodes_free|kubelet_volume_stats_inodes_used|kubelet_volume_stats_inodes|kube_persistentvolumeclaim_access_mode|kube_persistentvolumeclaim_labels|kube_persistentvolume_status_phase" @corednsRegex_minimal_mac = "coredns_build_info|coredns_panics_total|coredns_dns_responses_total|coredns_forward_responses_total|coredns_dns_request_duration_seconds|coredns_dns_request_duration_seconds_bucket|coredns_dns_request_duration_seconds_sum|coredns_dns_request_duration_seconds_count|coredns_forward_request_duration_seconds|coredns_forward_request_duration_seconds_bucket|coredns_forward_request_duration_seconds_sum|coredns_forward_request_duration_seconds_count|coredns_dns_requests_total|coredns_forward_requests_total|coredns_cache_hits_total|coredns_cache_misses_total|coredns_cache_entries|coredns_plugin_enabled|coredns_dns_request_size_bytes|coredns_dns_request_size_bytes_bucket|coredns_dns_request_size_bytes_sum|coredns_dns_request_size_bytes_count|coredns_dns_response_size_bytes|coredns_dns_response_size_bytes_bucket|coredns_dns_response_size_bytes_sum|coredns_dns_response_size_bytes_count|coredns_dns_response_size_bytes_bucket|coredns_dns_response_size_bytes_sum|coredns_dns_response_size_bytes_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines|kubernetes_build_info" -@cadvisorRegex_minimal_mac = "container_spec_cpu_quota|container_spec_cpu_period|container_memory_rss|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_network_receive_packets_total|container_network_transmit_packets_total|container_network_receive_packets_dropped_total|container_network_transmit_packets_dropped_total|container_fs_reads_total|container_fs_writes_total|container_fs_reads_bytes_total|container_fs_writes_bytes_total|container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_memory_cache|container_memory_swap|container_cpu_cfs_throttled_periods_total|container_cpu_cfs_periods_total|container_memory_rss|kubernetes_build_info" +@cadvisorRegex_minimal_mac = "container_spec_cpu_quota|container_spec_cpu_period|container_memory_rss|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_network_receive_packets_total|container_network_transmit_packets_total|container_network_receive_packets_dropped_total|container_network_transmit_packets_dropped_total|container_fs_reads_total|container_fs_writes_total|container_fs_reads_bytes_total|container_fs_writes_bytes_total|container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_memory_cache|container_memory_swap|container_cpu_cfs_throttled_periods_total|container_cpu_cfs_periods_total|container_memory_rss|kubernetes_build_info|container_start_time_seconds" @kubeproxyRegex_minimal_mac = "kubeproxy_sync_proxy_rules_duration_seconds|kubeproxy_sync_proxy_rules_duration_seconds_bucket|kubeproxy_sync_proxy_rules_duration_seconds_sum|kubeproxy_sync_proxy_rules_duration_seconds_count|kubeproxy_network_programming_duration_seconds|kubeproxy_network_programming_duration_seconds_bucket|kubeproxy_network_programming_duration_seconds_sum|kubeproxy_network_programming_duration_seconds_count|rest_client_requests_total|rest_client_request_duration_seconds|rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines|kubernetes_build_info" @apiserverRegex_minimal_mac = "apiserver_request_duration_seconds|apiserver_request_duration_seconds_bucket|apiserver_request_duration_seconds_sum|apiserver_request_duration_seconds_count|apiserver_request_total|workqueue_adds_total|workqueue_depth|workqueue_queue_duration_seconds|workqueue_queue_duration_seconds_bucket|workqueue_queue_duration_seconds_sum|workqueue_queue_duration_seconds_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines|kubernetes_build_info|apiserver_request_slo_duration_seconds_bucket|apiserver_request_slo_duration_seconds_sum|apiserver_request_slo_duration_seconds_count" -@kubestateRegex_minimal_mac = "kube_job_status_succeeded|kube_job_spec_completions|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_current_number_scheduled|kube_daemonset_status_number_misscheduled|kube_daemonset_status_number_ready|kube_deployment_status_replicas_ready|kube_pod_container_status_last_terminated_reason|kube_pod_container_status_waiting_reason|kube_pod_container_status_restarts_total|kube_node_status_allocatable|kube_pod_owner|kube_pod_container_resource_requests|kube_pod_status_phase|kube_pod_container_resource_limits|kube_replicaset_owner|kube_resourcequota|kube_namespace_status_phase|kube_node_status_capacity|kube_node_info|kube_pod_info|kube_deployment_spec_replicas|kube_deployment_status_replicas_available|kube_deployment_status_replicas_updated|kube_statefulset_status_replicas_ready|kube_statefulset_status_replicas|kube_statefulset_status_replicas_updated|kube_job_status_start_time|kube_job_status_active|kube_job_failed|kube_horizontalpodautoscaler_status_desired_replicas|kube_horizontalpodautoscaler_status_current_replicas|kube_horizontalpodautoscaler_spec_min_replicas|kube_horizontalpodautoscaler_spec_max_replicas|kubernetes_build_info|kube_node_status_condition|kube_node_spec_taint|kube_pod_container_info|kube_.*_labels|kube_.*_annotations|kube_service_info|kube_pod_container_status_running|kube_pod_container_status_waiting|kube_pod_container_status_terminated|kube_pod_container_state_started|kube_pod_created|kube_pod_start_time|kube_pod_init_container_info|kube_pod_init_container_status_terminated|kube_pod_init_container_status_terminated_reason|kube_pod_init_container_status_ready|kube_pod_init_container_resource_limits|kube_pod_init_container_status_running|kube_pod_init_container_status_waiting|kube_pod_init_container_status_restarts_total" +@kubestateRegex_minimal_mac = "kube_job_status_succeeded|kube_job_spec_completions|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_current_number_scheduled|kube_daemonset_status_number_misscheduled|kube_daemonset_status_number_ready|kube_deployment_status_replicas_ready|kube_pod_container_status_last_terminated_reason|kube_pod_container_status_waiting_reason|kube_pod_container_status_restarts_total|kube_node_status_allocatable|kube_pod_owner|kube_pod_container_resource_requests|kube_pod_status_phase|kube_pod_container_resource_limits|kube_replicaset_owner|kube_resourcequota|kube_namespace_status_phase|kube_node_status_capacity|kube_node_info|kube_pod_info|kube_deployment_spec_replicas|kube_deployment_status_replicas_available|kube_deployment_status_replicas_updated|kube_statefulset_status_replicas_ready|kube_statefulset_status_replicas|kube_statefulset_status_replicas_updated|kube_job_status_start_time|kube_job_status_active|kube_job_failed|kube_horizontalpodautoscaler_status_desired_replicas|kube_horizontalpodautoscaler_status_current_replicas|kube_horizontalpodautoscaler_spec_min_replicas|kube_horizontalpodautoscaler_spec_max_replicas|kubernetes_build_info|kube_node_status_condition|kube_node_spec_taint|kube_pod_container_info|kube_.*_labels|kube_.*_annotations|kube_service_info|kube_pod_container_status_running|kube_pod_container_status_waiting|kube_pod_container_status_terminated|kube_pod_container_state_started|kube_pod_created|kube_pod_start_time|kube_pod_init_container_info|kube_pod_init_container_status_terminated|kube_pod_init_container_status_terminated_reason|kube_pod_init_container_status_ready|kube_pod_init_container_resource_limits|kube_pod_init_container_status_running|kube_pod_init_container_status_waiting|kube_pod_init_container_status_restarts_total|kube_pod_container_status_ready|kube_pod_init_container_*|kube_pod_deletion_timestamp|kube_pod_status_reason|kube_pod_init_container_resource_requests" @nodeexporterRegex_minimal_mac = "node_filesystem_readonly|node_memory_MemTotal_bytes|node_cpu_seconds_total|node_memory_MemAvailable_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemFree_bytes|node_memory_Slab_bytes|node_filesystem_avail_bytes|node_filesystem_size_bytes|node_time_seconds|node_exporter_build_info|node_load1|node_vmstat_pgmajfault|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_receive_drop_total|node_network_transmit_drop_total|node_disk_io_time_seconds_total|node_disk_io_time_weighted_seconds_total|node_load5|node_load15|node_disk_read_bytes_total|node_disk_written_bytes_total|node_uname_info|kubernetes_build_info|node_boot_time_seconds" @kappiebasicRegex_minimal_mac = "kappie.*" @networkobservabilityRetinaRegex_minimal_mac = "networkobservability.*" diff --git a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-clusterRole.yaml b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-clusterRole.yaml index a063be487..516bcb37a 100644 --- a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-clusterRole.yaml +++ b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-clusterRole.yaml @@ -24,7 +24,7 @@ rules: verbs: ["list", "get", "watch"] - apiGroups: [""] resources: ["secrets"] - resourceNames: ["aad-msi-auth-token"] + resourceNames: ["aad-msi-auth-token", "ama-metrics-mtls-secret"] verbs: ["get", "watch"] - nonResourceURLs: ["/metrics"] verbs: ["get"] diff --git a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-daemonset.yaml b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-daemonset.yaml index 9eb4f33f5..40b8a426c 100644 --- a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-daemonset.yaml +++ b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-daemonset.yaml @@ -137,6 +137,9 @@ spec: - mountPath: /etc/config/settings name: settings-vol-config readOnly: true + - mountPath: /etc/prometheus/certs + name: ama-metrics-tls-secret-volume + readOnly: true - mountPath: /etc/config/settings/prometheus name: prometheus-config-vol readOnly: true @@ -248,6 +251,10 @@ spec: configMap: name: ama-metrics-settings-configmap optional: true + - name: ama-metrics-tls-secret-volume + secret: + secretName: ama-metrics-mtls-secret + optional: true - name: prometheus-config-vol configMap: name: ama-metrics-prometheus-config-node @@ -384,6 +391,9 @@ spec: - mountPath: /etc/config/settings/prometheus name: prometheus-config-vol readOnly: true + - mountPath: /etc/prometheus/certs + name: ama-metrics-tls-secret-volume + readOnly: true - name: host-log-containers readOnly: true mountPath: /var/log/containers @@ -459,6 +469,10 @@ spec: - name: host-log-containers hostPath: path: /var/log/containers + - name: ama-metrics-tls-secret-volume + secret: + secretName: ama-metrics-mtls-secret + optional: true - name: host-log-pods hostPath: path: /var/log/pods diff --git a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-deployment.yaml b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-deployment.yaml index 62ab46de0..53b409d77 100644 --- a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-deployment.yaml +++ b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-deployment.yaml @@ -167,6 +167,9 @@ spec: - mountPath: /etc/config/settings name: settings-vol-config readOnly: true + - mountPath: /etc/prometheus/certs + name: ama-metrics-tls-secret-volume + readOnly: true - mountPath: /etc/config/settings/prometheus name: prometheus-config-vol readOnly: true @@ -308,6 +311,10 @@ spec: hostPath: path: /etc/pki/ca-trust/anchors/ type: DirectoryOrCreate + - name: ama-metrics-tls-secret-volume + secret: + secretName: ama-metrics-mtls-secret + optional: true {{- if or (ne .Values.AzureMonitorMetrics.ArcExtension true) (and (not (hasPrefix "aks_edge" .Values.ClusterDistribution )) (and (ne .Values.Azure.Cluster.Distribution "aks_edge_k3s") (ne .Values.Azure.Cluster.Distribution "aks_edge_k8s"))) }} - name: anchors-ubuntu hostPath: diff --git a/otelcollector/deploy/example-custom-resources/pod-monitor-reference-app.yaml b/otelcollector/deploy/example-custom-resources/pod-monitor-reference-app.yaml index b17567a40..230a87692 100644 --- a/otelcollector/deploy/example-custom-resources/pod-monitor-reference-app.yaml +++ b/otelcollector/deploy/example-custom-resources/pod-monitor-reference-app.yaml @@ -10,7 +10,21 @@ spec: matchLabels: app: prometheus-reference-app podMetricsEndpoints: - - relabelings: + - scheme: https + tlsConfig: + ca: + secret: + key: "client-cert.pem" + name: "ama-metrics-mtls-secret" + cert: + secret: + key: "client-cert.pem" + name: "ama-metrics-mtls-secret" + keySecret: + key: "client-key.pem" + name: "ama-metrics-mtls-secret" + insecureSkipVerify: false + relabelings: - sourceLabels: [__meta_kubernetes_pod_label_app] action: keep regex: "prometheus-reference-app" @@ -18,6 +32,3 @@ spec: action: replace regex: ('$$NODE_NAME$$') targetLabel: instance - - - diff --git a/otelcollector/scripts/main.sh b/otelcollector/scripts/main.sh index 363e0b2bd..864c98feb 100644 --- a/otelcollector/scripts/main.sh +++ b/otelcollector/scripts/main.sh @@ -5,7 +5,7 @@ source /opt/logger.sh #Run inotify as a daemon to track changes to the mounted configmap. touch /opt/inotifyoutput.txt -inotifywait /etc/config/settings --daemon --recursive --outfile "/opt/inotifyoutput.txt" --event create,delete --format '%e : %T' --timefmt '+%s' +inotifywait /etc/config/settings /etc/prometheus/certs --daemon --recursive --outfile "/opt/inotifyoutput.txt" --event create,delete --format '%e : %T' --timefmt '+%s' # Run ARC EULA utility source /opt/arc-eula.sh @@ -17,9 +17,9 @@ echo_var "MODE" "$MODE" echo_var "CONTROLLER_TYPE" "$CONTROLLER_TYPE" echo_var "CLUSTER" "$CLUSTER" -aikey=$(echo $APPLICATIONINSIGHTS_AUTH | base64 -d) -export TELEMETRY_APPLICATIONINSIGHTS_KEY=$aikey -echo "export TELEMETRY_APPLICATIONINSIGHTS_KEY=$aikey" >> ~/.bashrc +aikey=$(echo $APPLICATIONINSIGHTS_AUTH | base64 -d) +export TELEMETRY_APPLICATIONINSIGHTS_KEY=$aikey +echo "export TELEMETRY_APPLICATIONINSIGHTS_KEY=$aikey" >> ~/.bashrc source ~/.bashrc #get controller kind in lowercase, trimmed @@ -48,7 +48,7 @@ fi # Add our target-allocator service to the no_proxy env variable export NO_PROXY=$NO_PROXY,ama-metrics-operator-targets.kube-system.svc.cluster.local -echo "export NO_PROXY=$NO_PROXY" >> ~/.bashrc +echo "export NO_PROXY=$NO_PROXY" >> ~/.bashrc export no_proxy=$no_proxy,ama-metrics-operator-targets.kube-system.svc.cluster.local echo "export no_proxy=$no_proxy" >> ~/.bashrc @@ -127,7 +127,7 @@ if [ "${MAC}" == "true" ]; then tokenAdapterResult=$(wget -T 2 -S http://localhost:9999/healthz -Y off 2>&1| grep HTTP/|awk '{print $2}'| grep 200) fi if [ ! -z $tokenAdapterResult ]; then - echo "found token adapter to be healthy after $waitedSecsSoFar secs" + echo "found token adapter to be healthy after $waitedSecsSoFar secs" # log telemetry about success after waiting for waitedSecsSoFar and break echo "export tokenadapterHealthyAfterSecs=$waitedSecsSoFar" >>~/.bashrc break @@ -139,7 +139,7 @@ if [ "${MAC}" == "true" ]; then #end wait for addon-token-adapter to be healthy fi -export ME_CONFIG_FILE=$meConfigFile +export ME_CONFIG_FILE=$meConfigFile export FLUENT_BIT_CONFIG_FILE=$fluentBitConfigFile echo "export ME_CONFIG_FILE=$meConfigFile" >> ~/.bashrc echo "export FLUENT_BIT_CONFIG_FILE=$fluentBitConfigFile" >> ~/.bashrc @@ -188,7 +188,7 @@ if [ "${MAC}" != "true" ]; then source ~/.bashrc echo_var "AKV_FILES" "$AZMON_METRIC_ACCOUNTS_AKV_FILES" - + echo "Starting metricsextension" # will need to rotate the entire log location # will need to remove accountname fetching from env @@ -302,4 +302,4 @@ shutdown() { trap "shutdown" SIGTERM -sleep inf & wait \ No newline at end of file +sleep inf & wait