diff --git a/otelcollector/configmapparser/default-prom-configs/controlplane_apiserver.yml b/otelcollector/configmapparser/default-prom-configs/controlplane_apiserver.yml index bf799e4a4..aa92ce62d 100644 --- a/otelcollector/configmapparser/default-prom-configs/controlplane_apiserver.yml +++ b/otelcollector/configmapparser/default-prom-configs/controlplane_apiserver.yml @@ -1,36 +1,36 @@ -# scrape_configs: -# - job_name: prometheus_ref_app_apiserver -# scheme: http -# scrape_interval: 60s -# kubernetes_sd_configs: -# - role: pod -# relabel_configs: -# - source_labels: [__meta_kubernetes_pod_label_app] -# action: keep -# regex: "prometheus-reference-app" - scrape_configs: - - job_name: controlplane_apiserver +scrape_configs: + - job_name: prometheus_ref_app_apiserver + scheme: http scrape_interval: $$SCRAPE_INTERVAL$$ - label_limit: 63 - label_name_length_limit: 511 - label_value_length_limit: 1023 kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - $$POD_NAMESPACE$$ - scheme: https - tls_config: - ca_file: /etc/kubernetes/secrets/ca.pem - insecure_skip_verify: true - bearer_token_file: /etc/kubernetes/secrets/token + - role: pod relabel_configs: - - source_labels: - - __meta_kubernetes_pod_label_k8s_app - - __meta_kubernetes_pod_container_name - action: keep - regex: kube-apiserver;kube-apiserver - - source_labels: [ __meta_kubernetes_pod_name ] - regex: (.*) - target_label: pod - action: replace \ No newline at end of file + - source_labels: [__meta_kubernetes_pod_label_app] + action: keep + regex: "prometheus-reference-app" + # scrape_configs: + # - job_name: controlplane_apiserver + # scrape_interval: $$SCRAPE_INTERVAL$$ + # label_limit: 63 + # label_name_length_limit: 511 + # label_value_length_limit: 1023 + # kubernetes_sd_configs: + # - role: endpoints + # namespaces: + # names: + # - $$POD_NAMESPACE$$ + # scheme: https + # tls_config: + # ca_file: /etc/kubernetes/secrets/ca.pem + # insecure_skip_verify: true + # bearer_token_file: /etc/kubernetes/secrets/token + # relabel_configs: + # - source_labels: + # - __meta_kubernetes_pod_label_k8s_app + # - __meta_kubernetes_pod_container_name + # action: keep + # regex: kube-apiserver;kube-apiserver + # - source_labels: [ __meta_kubernetes_pod_name ] + # regex: (.*) + # target_label: pod + # action: replace \ No newline at end of file diff --git a/otelcollector/configmapparser/default-prom-configs/controlplane_cluster_autoscaler.yml b/otelcollector/configmapparser/default-prom-configs/controlplane_cluster_autoscaler.yml index b13b8f00f..2b270c0a8 100644 --- a/otelcollector/configmapparser/default-prom-configs/controlplane_cluster_autoscaler.yml +++ b/otelcollector/configmapparser/default-prom-configs/controlplane_cluster_autoscaler.yml @@ -1,41 +1,41 @@ -# scrape_configs: -# - job_name: prometheus_ref_app_cluster_autoscaler -# scheme: http -# scrape_interval: 60s -# kubernetes_sd_configs: -# - role: pod -# relabel_configs: -# - source_labels: [__meta_kubernetes_pod_label_app] -# action: keep -# regex: "prometheus-reference-app" - scrape_configs: - - job_name: controlplane_cluster_autoscaler +scrape_configs: + - job_name: prometheus_ref_app_cluster_autoscaler + scheme: http scrape_interval: $$SCRAPE_INTERVAL$$ - label_limit: 63 - label_name_length_limit: 511 - label_value_length_limit: 1023 - follow_redirects: false kubernetes_sd_configs: - - role: pod - namespaces: - names: - - $$POD_NAMESPACE$$ + - role: pod relabel_configs: - - source_labels: [ __meta_kubernetes_pod_label_app, __meta_kubernetes_pod_container_name ] - action: keep - regex: cluster-autoscaler;cluster-autoscaler - - source_labels: [ __meta_kubernetes_pod_annotation_aks_prometheus_io_scrape ] - action: keep - regex: true - - source_labels: [ __meta_kubernetes_pod_annotation_aks_prometheus_io_path ] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [ __address__, __meta_kubernetes_pod_annotation_aks_prometheus_io_port ] - action: replace - separator: ":" - target_label: __address__ - - source_labels: [ __meta_kubernetes_pod_name ] - regex: (.*) - target_label: pod - action: replace \ No newline at end of file + - source_labels: [__meta_kubernetes_pod_label_app] + action: keep + regex: "prometheus-reference-app" + # scrape_configs: + # - job_name: controlplane_cluster_autoscaler + # scrape_interval: $$SCRAPE_INTERVAL$$ + # label_limit: 63 + # label_name_length_limit: 511 + # label_value_length_limit: 1023 + # follow_redirects: false + # kubernetes_sd_configs: + # - role: pod + # namespaces: + # names: + # - $$POD_NAMESPACE$$ + # relabel_configs: + # - source_labels: [ __meta_kubernetes_pod_label_app, __meta_kubernetes_pod_container_name ] + # action: keep + # regex: cluster-autoscaler;cluster-autoscaler + # - source_labels: [ __meta_kubernetes_pod_annotation_aks_prometheus_io_scrape ] + # action: keep + # regex: true + # - source_labels: [ __meta_kubernetes_pod_annotation_aks_prometheus_io_path ] + # action: replace + # target_label: __metrics_path__ + # regex: (.+) + # - source_labels: [ __address__, __meta_kubernetes_pod_annotation_aks_prometheus_io_port ] + # action: replace + # separator: ":" + # target_label: __address__ + # - source_labels: [ __meta_kubernetes_pod_name ] + # regex: (.*) + # target_label: pod + # action: replace \ No newline at end of file diff --git a/otelcollector/configmapparser/default-prom-configs/controlplane_etcd.yml b/otelcollector/configmapparser/default-prom-configs/controlplane_etcd.yml index 290a2f1f7..926f838b5 100644 --- a/otelcollector/configmapparser/default-prom-configs/controlplane_etcd.yml +++ b/otelcollector/configmapparser/default-prom-configs/controlplane_etcd.yml @@ -1,36 +1,36 @@ -# scrape_configs: -# - job_name: prometheus_ref_app_etcd -# scheme: http -# scrape_interval: 60s -# kubernetes_sd_configs: -# - role: pod -# relabel_configs: -# - source_labels: [__meta_kubernetes_pod_label_app] -# action: keep -# regex: "prometheus-reference-app" - scrape_configs: - - job_name: controlplane_etcd +scrape_configs: + - job_name: prometheus_ref_app_etcd + scheme: http scrape_interval: $$SCRAPE_INTERVAL$$ - label_limit: 63 - label_name_length_limit: 511 - label_value_length_limit: 1023 - follow_redirects: false - scheme: https kubernetes_sd_configs: - - role: endpoints - namespaces: - names: - - $$POD_NAMESPACE$$ - tls_config: - ca_file: /etc/kubernetes/secrets/etcd-client-ca.crt - cert_file: /etc/kubernetes/secrets/etcd-client.crt - key_file: /etc/kubernetes/secrets/etcd-client.key - insecure_skip_verify: true + - role: pod relabel_configs: - - source_labels: [ __meta_kubernetes_service_label_app, __meta_kubernetes_pod_container_port_number ] - action: keep - regex: etcd;2379 - - source_labels: [ __meta_kubernetes_pod_name ] - regex: (.*) - target_label: pod - action: replace \ No newline at end of file + - source_labels: [__meta_kubernetes_pod_label_app] + action: keep + regex: "prometheus-reference-app" + # scrape_configs: + # - job_name: controlplane_etcd + # scrape_interval: $$SCRAPE_INTERVAL$$ + # label_limit: 63 + # label_name_length_limit: 511 + # label_value_length_limit: 1023 + # follow_redirects: false + # scheme: https + # kubernetes_sd_configs: + # - role: endpoints + # namespaces: + # names: + # - $$POD_NAMESPACE$$ + # tls_config: + # ca_file: /etc/kubernetes/secrets/etcd-client-ca.crt + # cert_file: /etc/kubernetes/secrets/etcd-client.crt + # key_file: /etc/kubernetes/secrets/etcd-client.key + # insecure_skip_verify: true + # relabel_configs: + # - source_labels: [ __meta_kubernetes_service_label_app, __meta_kubernetes_pod_container_port_number ] + # action: keep + # regex: etcd;2379 + # - source_labels: [ __meta_kubernetes_pod_name ] + # regex: (.*) + # target_label: pod + # action: replace \ No newline at end of file diff --git a/otelcollector/configmapparser/default-prom-configs/controlplane_kube_controller_manager.yml b/otelcollector/configmapparser/default-prom-configs/controlplane_kube_controller_manager.yml index 800cc8d05..8163dba88 100644 --- a/otelcollector/configmapparser/default-prom-configs/controlplane_kube_controller_manager.yml +++ b/otelcollector/configmapparser/default-prom-configs/controlplane_kube_controller_manager.yml @@ -1,47 +1,47 @@ -# scrape_configs: -# - job_name: prometheus_ref_app_kube_controller_manager -# scheme: http -# scrape_interval: 60s -# kubernetes_sd_configs: -# - role: pod -# relabel_configs: -# - source_labels: [__meta_kubernetes_pod_label_app] -# action: keep -# regex: "prometheus-reference-app" - scrape_configs: - - job_name: controlplane_kube_controller_manager +scrape_configs: + - job_name: prometheus_ref_app_kube_controller_manager + scheme: http scrape_interval: $$SCRAPE_INTERVAL$$ - label_limit: 63 - label_name_length_limit: 511 - label_value_length_limit: 1023 - follow_redirects: false - scheme: https kubernetes_sd_configs: - - role: pod - namespaces: - names: - - $$POD_NAMESPACE$$ - tls_config: - ca_file: /etc/kubernetes/secrets/ca.pem - cert_file: /etc/kubernetes/secrets/client.pem - key_file: /etc/kubernetes/secrets/client-key.pem - insecure_skip_verify: true + - role: pod relabel_configs: - - source_labels: [__meta_kubernetes_pod_label_k8s_app, __meta_kubernetes_pod_container_name] - action: keep - regex: kube-controller-manager;kube-controller-manager - - source_labels: [__meta_kubernetes_pod_annotation_aks_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_pod_annotation_aks_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [ __address__, __meta_kubernetes_pod_annotation_aks_prometheus_io_port ] - action: replace - separator: ":" - target_label: __address__ - - source_labels: [__meta_kubernetes_pod_name] - regex: (.*) - target_label: pod - action: replace \ No newline at end of file + - source_labels: [__meta_kubernetes_pod_label_app] + action: keep + regex: "prometheus-reference-app" + # scrape_configs: + # - job_name: controlplane_kube_controller_manager + # scrape_interval: $$SCRAPE_INTERVAL$$ + # label_limit: 63 + # label_name_length_limit: 511 + # label_value_length_limit: 1023 + # follow_redirects: false + # scheme: https + # kubernetes_sd_configs: + # - role: pod + # namespaces: + # names: + # - $$POD_NAMESPACE$$ + # tls_config: + # ca_file: /etc/kubernetes/secrets/ca.pem + # cert_file: /etc/kubernetes/secrets/client.pem + # key_file: /etc/kubernetes/secrets/client-key.pem + # insecure_skip_verify: true + # relabel_configs: + # - source_labels: [__meta_kubernetes_pod_label_k8s_app, __meta_kubernetes_pod_container_name] + # action: keep + # regex: kube-controller-manager;kube-controller-manager + # - source_labels: [__meta_kubernetes_pod_annotation_aks_prometheus_io_scrape] + # action: keep + # regex: true + # - source_labels: [__meta_kubernetes_pod_annotation_aks_prometheus_io_path] + # action: replace + # target_label: __metrics_path__ + # regex: (.+) + # - source_labels: [ __address__, __meta_kubernetes_pod_annotation_aks_prometheus_io_port ] + # action: replace + # separator: ":" + # target_label: __address__ + # - source_labels: [__meta_kubernetes_pod_name] + # regex: (.*) + # target_label: pod + # action: replace \ No newline at end of file diff --git a/otelcollector/configmapparser/default-prom-configs/controlplane_kube_scheduler.yml b/otelcollector/configmapparser/default-prom-configs/controlplane_kube_scheduler.yml index e76896e43..a6cc93b04 100644 --- a/otelcollector/configmapparser/default-prom-configs/controlplane_kube_scheduler.yml +++ b/otelcollector/configmapparser/default-prom-configs/controlplane_kube_scheduler.yml @@ -1,47 +1,47 @@ -# scrape_configs: -# - job_name: prometheus_ref_app_kube_scheduler -# scheme: http -# scrape_interval: 60s -# kubernetes_sd_configs: -# - role: pod -# relabel_configs: -# - source_labels: [__meta_kubernetes_pod_label_app] -# action: keep -# regex: "prometheus-reference-app" - scrape_configs: - - job_name: controlplane_kube_scheduler +scrape_configs: + - job_name: prometheus_ref_app_kube_scheduler + scheme: http scrape_interval: $$SCRAPE_INTERVAL$$ - label_limit: 63 - label_name_length_limit: 511 - label_value_length_limit: 1023 - follow_redirects: false - scheme: https kubernetes_sd_configs: - - role: pod - namespaces: - names: - - $$POD_NAMESPACE$$ - tls_config: - ca_file: /etc/kubernetes/secrets/ca.pem - cert_file: /etc/kubernetes/secrets/client.pem - key_file: /etc/kubernetes/secrets/client-key.pem - insecure_skip_verify: true + - role: pod relabel_configs: - - source_labels: [__meta_kubernetes_pod_label_k8s_app, __meta_kubernetes_pod_container_name] - action: keep - regex: kube-scheduler;kube-scheduler - - source_labels: [__meta_kubernetes_pod_annotation_aks_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_pod_annotation_aks_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_aks_prometheus_io_port] - action: replace - separator: ":" - target_label: __address__ - - source_labels: [__meta_kubernetes_pod_name] - regex: (.*) - target_label: pod - action: replace \ No newline at end of file + - source_labels: [__meta_kubernetes_pod_label_app] + action: keep + regex: "prometheus-reference-app" + # scrape_configs: + # - job_name: controlplane_kube_scheduler + # scrape_interval: $$SCRAPE_INTERVAL$$ + # label_limit: 63 + # label_name_length_limit: 511 + # label_value_length_limit: 1023 + # follow_redirects: false + # scheme: https + # kubernetes_sd_configs: + # - role: pod + # namespaces: + # names: + # - $$POD_NAMESPACE$$ + # tls_config: + # ca_file: /etc/kubernetes/secrets/ca.pem + # cert_file: /etc/kubernetes/secrets/client.pem + # key_file: /etc/kubernetes/secrets/client-key.pem + # insecure_skip_verify: true + # relabel_configs: + # - source_labels: [__meta_kubernetes_pod_label_k8s_app, __meta_kubernetes_pod_container_name] + # action: keep + # regex: kube-scheduler;kube-scheduler + # - source_labels: [__meta_kubernetes_pod_annotation_aks_prometheus_io_scrape] + # action: keep + # regex: true + # - source_labels: [__meta_kubernetes_pod_annotation_aks_prometheus_io_path] + # action: replace + # target_label: __metrics_path__ + # regex: (.+) + # - source_labels: [__address__, __meta_kubernetes_pod_annotation_aks_prometheus_io_port] + # action: replace + # separator: ":" + # target_label: __address__ + # - source_labels: [__meta_kubernetes_pod_name] + # regex: (.*) + # target_label: pod + # action: replace \ No newline at end of file diff --git a/otelcollector/configmapparser/prometheus-ccp-config-merger.rb b/otelcollector/configmapparser/prometheus-ccp-config-merger.rb index d24cc6599..5118ce3a6 100644 --- a/otelcollector/configmapparser/prometheus-ccp-config-merger.rb +++ b/otelcollector/configmapparser/prometheus-ccp-config-merger.rb @@ -128,7 +128,7 @@ def populateDefaultPrometheusConfig defaultConfigs.push(@controlplane_etcd_default_file) end - if !ENV["AZMON_PROMETHEUS_NO_DEFAULT_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_NO_DEFAULT_SCRAPING_ENABLED"].downcase == "true" + if !ENV["AZMON_PROMETHEUS_CONTROLPLANE_COLLECTOR_HEALTH_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_CONTROLPLANE_COLLECTOR_HEALTH_SCRAPING_ENABLED"].downcase == "true" defaultConfigs.push(@controlplane_prometheuscollectorhealth_default_file) end @@ -171,6 +171,7 @@ def writeDefaultScrapeTargetsFile() File.open(@mergedDefaultConfigPath, "w") { |file| file.puts mergedDefaultConfigYaml } end rescue => errorStr + File.open(@mergedDefaultConfigPath, "w") { |file| file.puts "" } ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Error while populating default scrape targets and writing them to the default scrape targets file") end end diff --git a/otelcollector/configmapparser/tomlparser-ccp-default-scrape-settings.rb b/otelcollector/configmapparser/tomlparser-ccp-default-scrape-settings.rb index 330f94e17..8b9fce1cf 100644 --- a/otelcollector/configmapparser/tomlparser-ccp-default-scrape-settings.rb +++ b/otelcollector/configmapparser/tomlparser-ccp-default-scrape-settings.rb @@ -15,6 +15,7 @@ @controlplane_apiserver_enabled = true @controlplane_cluster_autoscaler_enabled = false @controlplane_etcd_enabled = true +@controleplane_prometheuscollectorhealth_enabled = false @noDefaultsEnabled = false # Use parser to parse the configmap toml file to a ruby structure @@ -57,6 +58,10 @@ def populateSettingValuesFromConfigMap(parsedConfig) @controlplane_etcd_enabled = parsedConfig[:controlplane_etcd] puts "config::Using configmap scrape settings for controlplane-etcd: #{@controlplane_etcd_enabled}" end + if !parsedConfig[:controlplane_prometheuscollectorhealth].nil? + @controleplane_prometheuscollectorhealth_enabled = parsedConfig[:controlplane_prometheuscollectorhealth] + puts "config::Using configmap scrape settings for controlplane_prometheuscollectorhealth: #{@controleplane_prometheuscollectorhealth_enabled}" + end if ENV["MODE"].nil? && ENV["MODE"].strip.downcase == "advanced" controllerType = ENV["CONTROLLER_TYPE"] @@ -82,6 +87,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) @controlplane_kube_controller_manager_enabled = false @controlplane_kube_scheduler_enabled = false @controlplane_cluster_autoscaler_enabled = false + @controleplane_prometheuscollectorhealth_enabled = false end if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version, so hardcoding it configMapSettings = parseConfigMap @@ -103,6 +109,7 @@ def populateSettingValuesFromConfigMap(parsedConfig) file.write("AZMON_PROMETHEUS_CONTROLPLANE_APISERVER_ENABLED=#{@controlplane_apiserver_enabled}\n") file.write("AZMON_PROMETHEUS_CONTROLPLANE_CLUSTER_AUTOSCALER_ENABLED=#{@controlplane_cluster_autoscaler_enabled}\n") file.write("AZMON_PROMETHEUS_CONTROLPLANE_ETCD_ENABLED=#{@controlplane_etcd_enabled}\n") + file.write("AZMON_PROMETHEUS_CONTROLPLANE_COLLECTOR_HEALTH_SCRAPING_ENABLED=#{@controleplane_prometheuscollectorhealth_enabled}\n") file.write("AZMON_PROMETHEUS_NO_DEFAULT_SCRAPING_ENABLED=#{@noDefaultsEnabled}\n") # Close file after writing all metric collection setting environment variables file.close diff --git a/otelcollector/configmapparser/tomlparser-ccp-default-targets-metrics-keep-list.rb b/otelcollector/configmapparser/tomlparser-ccp-default-targets-metrics-keep-list.rb index 11d4af39a..4b8ef43b0 100644 --- a/otelcollector/configmapparser/tomlparser-ccp-default-targets-metrics-keep-list.rb +++ b/otelcollector/configmapparser/tomlparser-ccp-default-targets-metrics-keep-list.rb @@ -27,6 +27,8 @@ @controlplane_kube_controller_manager_minimal_mac = "rest_client_requests_duration_seconds_bucket|rest_client_requests_total|workqueue_depth|node_collector_evictions_total" @controlplane_etcd_minimal_mac = "etcd_memory_in_bytes|etcd_cpu_in_cores|etcd_db_limit_in_bytes|etcd_db_max_size_in_bytes|etcd_db_fragmentation_rate|etcd_db_total_object_count|etcd_db_top_N_object_counts_by_type|etcd_db_top_N_object_size_by_type|etcd2_enabled" +@minimalIngestionProfile = ENV["MINIMAL_INGESTION_PROFILE"] + # Use parser to parse the configmap toml file to a ruby structure def parseConfigMap begin @@ -119,6 +121,14 @@ def populateSettingValuesFromConfigMap(parsedConfig) rescue => errorStr ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while reading config map settings for default targets metrics keep list - #{errorStr}, using defaults, please check config map for errors") end + + + ConfigParseErrorLogger.log(LOGGING_PREFIX, "Reading configmap setting for minimalingestionprofile") + minimalIngestionProfileSetting = parsedConfig[:minimalingestionprofile] + if !minimalIngestionProfileSetting.nil? + @minimalIngestionProfile = minimalIngestionProfileSetting.to_s.downcase + ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap setting for minimalIngestionProfile -> #{@minimalIngestionProfile}") + end end # -------Apply profile for ingestion-------- @@ -126,14 +136,16 @@ def populateSettingValuesFromConfigMap(parsedConfig) # so the theory here is -- # if customer provided regex is valid, our regex validation for that will pass, a OR of customer provided regex with our minimal profile regex would be a valid regex as well, so we dont check again for the wholistic validation of merged regex # if customer provided regex is invalid, our regex validation for customer provided regex will fail, and if minimal ingestion profile is enabled, we will use that and ignore customer provided one -def populateRegexValues +def populateRegexValuesWithMinimalIngestionProfile begin - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Populating regex with customer + default values for minimal ingestion profile") - @controlplane_kube_controller_manager_regex = @controlplane_kube_controller_manager_regex + "|" + @controlplane_kube_controller_manager_minimal_mac - @controlplane_kube_scheduler_regex = @controlplane_kube_scheduler_regex + "|" + @controlplane_kube_scheduler_minimal_mac - @controlplane_apiserver_regex = @controlplane_apiserver_regex + "|" + @controlplane_apiserver_minimal_mac - @controlplane_cluster_autoscaler_regex = @controlplane_cluster_autoscaler_regex + "|" + @controlplane_cluster_autoscaler_minimal_mac - @controlplane_etcd_regex = @controlplane_etcd_regex + "|" + @controlplane_etcd_minimal_mac + if @minimalIngestionProfile == "true" + ConfigParseErrorLogger.log(LOGGING_PREFIX, "Populating regex with customer + default values for minimal ingestion profile") + @controlplane_kube_controller_manager_regex = @controlplane_kube_controller_manager_regex + "|" + @controlplane_kube_controller_manager_minimal_mac + @controlplane_kube_scheduler_regex = @controlplane_kube_scheduler_regex + "|" + @controlplane_kube_scheduler_minimal_mac + @controlplane_apiserver_regex = @controlplane_apiserver_regex + "|" + @controlplane_apiserver_minimal_mac + @controlplane_cluster_autoscaler_regex = @controlplane_cluster_autoscaler_regex + "|" + @controlplane_cluster_autoscaler_minimal_mac + @controlplane_etcd_regex = @controlplane_etcd_regex + "|" + @controlplane_etcd_minimal_mac + end rescue => errorStr ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while populating regex values with minimal ingestion profile - #{errorStr}, skipping applying minimal ingestion profile regexes") end @@ -155,7 +167,7 @@ def populateRegexValues end # Populate the regex values after reading the configmap settings -populateRegexValues +populateRegexValuesWithMinimalIngestionProfile # Write the settings to file, so that they can be set as environment variables file = File.open("/opt/microsoft/configmapparser/config_def_targets_metrics_keep_list_hash", "w") diff --git a/otelcollector/configmapparser/tomlparser-ccp-prometheus-collector-settings.rb b/otelcollector/configmapparser/tomlparser-ccp-prometheus-collector-settings.rb new file mode 100644 index 000000000..a5fe0b866 --- /dev/null +++ b/otelcollector/configmapparser/tomlparser-ccp-prometheus-collector-settings.rb @@ -0,0 +1,126 @@ +#!/usr/local/bin/ruby +# frozen_string_literal: true + +require "tomlrb" +require_relative "ConfigParseErrorLogger" + +LOGGING_PREFIX = "config" + +@configMapMountPath = "/etc/config/settings/prometheus-collector-settings" +@configVersion = "" +@configSchemaVersion = "" + +# Setting default values which will be used in case they are not set in the configmap or if configmap doesnt exist +@defaultMetricAccountName = "NONE" + +@clusterAlias = "" # user provided alias (thru config map or chart param) +@clusterLabel = "" # value of the 'cluster' label in every time series scraped +@isOperatorEnabled = "" +@isOperatorEnabledChartSetting = "" + +# Use parser to parse the configmap toml file to a ruby structure +def parseConfigMap + begin + # Check to see if config map is created + if (File.file?(@configMapMountPath)) + parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true) + return parsedConfig + else + ConfigParseErrorLogger.log(LOGGING_PREFIX, "configmapprometheus-collector-configmap for prometheus collector settings not mounted, using defaults") + return nil + end + rescue => errorStr + ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while parsing config map for prometheus collector settings: #{errorStr}, using defaults, please check config map for errors") + return nil + end +end + +# Use the ruby structure created after config parsing to set the right values to be used for otel collector settings +def populateSettingValuesFromConfigMap(parsedConfig) + # Get if otel collector prometheus scraping is enabled + begin + if !parsedConfig.nil? && !parsedConfig[:default_metric_account_name].nil? + @defaultMetricAccountName = parsedConfig[:default_metric_account_name] + ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap setting for default metric account name: #{@defaultMetricAccountName}") + end + rescue => errorStr + ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while reading config map settings for prometheus collector settings- #{errorStr}, using defaults, please check config map for errors") + end + + begin + if !parsedConfig.nil? && !parsedConfig[:cluster_alias].nil? + @clusterAlias = parsedConfig[:cluster_alias].strip + ConfigParseErrorLogger.log(LOGGING_PREFIX, "Got configmap setting for cluster_alias:#{@clusterAlias}") + @clusterAlias = @clusterAlias.gsub(/[^0-9a-z]/i, "_") #replace all non alpha-numeric characters with "_" -- this is to ensure that all down stream places where this is used (like collector, telegraf config etc are keeping up with sanity) + ConfigParseErrorLogger.log(LOGGING_PREFIX, "After g-subing configmap setting for cluster_alias:#{@clusterAlias}") + end + rescue => errorStr + @clusterAlias = "" + ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while reading config map settings for cluster_alias in prometheus collector settings- #{errorStr}, using defaults, please check config map for errors") + end + + # Safeguard to fall back to non operator model, enable to set to true or false only when toggle is enabled + if !ENV["AZMON_OPERATOR_ENABLED"].nil? && ENV["AZMON_OPERATOR_ENABLED"].downcase == "true" + begin + @isOperatorEnabledChartSetting = "true" + if !parsedConfig.nil? && !parsedConfig[:operator_enabled].nil? + @isOperatorEnabled = parsedConfig[:operator_enabled] + ConfigParseErrorLogger.log(LOGGING_PREFIX, "Configmap setting enabling operator: #{@isOperatorEnabled}") + end + rescue => errorStr + ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while reading config map settings for prometheus collector settings- #{errorStr}, using defaults, please check config map for errors") + end + else + @isOperatorEnabledChartSetting = "false" + end +end + +@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"] +ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "Start prometheus-collector-settings Processing") +if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version, so hardcoding it + configMapSettings = parseConfigMap + if !configMapSettings.nil? + populateSettingValuesFromConfigMap(configMapSettings) + end +else + if (File.file?(@configMapMountPath)) + ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version") + end +end + +# get clustername from cluster's full ARM resourceid (to be used for mac mode as 'cluster' label) +begin + if !ENV["MAC"].nil? && !ENV["MAC"].empty? && ENV["MAC"].strip.downcase == "true" + resourceArray = ENV["CLUSTER"].strip.split("/") + @clusterLabel = resourceArray[resourceArray.length - 1] + else + @clusterLabel = ENV["CLUSTER"] + end +rescue => errorStr + @clusterLabel = ENV["CLUSTER"] + ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while parsing to determine cluster label from full cluster resource id in prometheus collector settings- #{errorStr}, using default as full CLUSTER passed-in '#{@clusterLabel}'") +end + +#override cluster label with cluster alias, if alias is specified + +if !@clusterAlias.nil? && !@clusterAlias.empty? && @clusterAlias.length > 0 + @clusterLabel = @clusterAlias + ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using clusterLabel from cluster_alias:#{@clusterAlias}") +end + +ConfigParseErrorLogger.log(LOGGING_PREFIX, "AZMON_CLUSTER_ALIAS:'#{@clusterAlias}'") +ConfigParseErrorLogger.log(LOGGING_PREFIX, "AZMON_CLUSTER_LABEL:#{@clusterLabel}") + +# Write the settings to file, so that they can be set as environment variables +file = File.open("/opt/microsoft/configmapparser/config_prometheus_collector_settings_env_var", "w") + +if !file.nil? + file.write("AZMON_DEFAULT_METRIC_ACCOUNT_NAME=#{@defaultMetricAccountName}\n") + file.write("AZMON_CLUSTER_LABEL=#{@clusterLabel}\n") #used for cluster label value when scraping + file.write("AZMON_CLUSTER_ALIAS=#{@clusterAlias}\n") #used only for telemetry + file.write("AZMON_OPERATOR_ENABLED_CHART_SETTING=#{@isOperatorEnabledChartSetting}\n") + file.close +else + ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while opening file for writing prometheus-collector config environment variables") +end +ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "End prometheus-collector-settings Processing") diff --git a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/Chart.yaml b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/Chart.yaml index aa7403b76..6f448cd6f 100644 --- a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/Chart.yaml +++ b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/Chart.yaml @@ -15,13 +15,13 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 6.8.0-ccp-shell-removal-branch-10-26-2023-3703fa73 +version: 6.8.0-ccp-shell-removal-branch-10-26-2023-1ac81793 # This is the version number of the application being deployed (basically, imagetag for the image built/compatible with this chart semver above). This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "6.8.0-ccp-shell-removal-branch-10-26-2023-3703fa73" +appVersion: "6.8.0-ccp-shell-removal-branch-10-26-2023-1ac81793" # dependencies: # - name: prometheus-node-exporter # version: "4.21.0" diff --git a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-daemonset.yaml b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-daemonset.yaml index c90161448..5bc4db135 100644 --- a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-daemonset.yaml +++ b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-daemonset.yaml @@ -1,468 +1,468 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: ama-metrics-node - namespace: kube-system - labels: - component: ama-metrics -spec: - selector: - matchLabels: - dsName: ama-metrics-node - updateStrategy: - type: RollingUpdate - rollingUpdate: - maxUnavailable: 50% - template: - metadata: - labels: - dsName: ama-metrics-node - kubernetes.azure.com/managedby: aks - annotations: - agentVersion: "0.0.0.1" - schema-versions: "v1" - spec: - priorityClassName: system-node-critical - serviceAccountName: ama-metrics-serviceaccount - containers: - - name: prometheus-collector - image: "mcr.microsoft.com{{ .Values.AzureMonitorMetrics.ImageRepository }}:{{ .Values.AzureMonitorMetrics.ImageTag }}" - imagePullPolicy: IfNotPresent - resources: - limits: - cpu: 200m - memory: 1Gi - requests: - cpu: 50m - memory: 150Mi - {{- if and (eq .Values.AzureMonitorMetrics.ArcExtension true) (.Values.Azure.proxySettings.isProxyEnabled) }} - envFrom: - - secretRef: - name: ama-metrics-proxy-config - {{- end }} - env: - - name: CLUSTER - {{- if eq .Values.AzureMonitorMetrics.ArcExtension true }} - value: "{{ .Values.Azure.Cluster.ResourceId }}" - {{- else }} - value: "{{ .Values.global.commonGlobals.Customer.AzureResourceID }}" - {{- end }} - - name: AKSREGION - {{- if eq .Values.AzureMonitorMetrics.ArcExtension true }} - value: "{{ .Values.Azure.Cluster.Region }}" - {{- else }} - value: "{{ .Values.global.commonGlobals.Region }}" - {{- end }} - - name: MAC - value: "true" - - name: AZMON_COLLECT_ENV - value: "false" - - name: customEnvironment - {{- if eq .Values.AzureMonitorMetrics.ArcExtension true }} - value: "{{ lower .Values.Azure.Cluster.Cloud }}" - {{- else if .Values.AzureMonitorMetrics.isArcACluster }} - value: "arcautonomous" - {{- else }} - value: "{{ lower .Values.global.commonGlobals.CloudEnvironment }}" - {{- end }} - - name: OMS_TLD - value: "opinsights.azure.com" - {{- if eq .Values.AzureMonitorMetrics.isArcACluster true }} - - name: customRegionalEndpoint - value: {{ required "customRegionalEndpoint is required in Arc Autonomous" .Values.AzureMonitorMetrics.arcAutonomousSettings.customRegionalEndpoint | toString | trim | quote }} - - name: customGlobalEndpoint - value: {{ required "customGlobalEndpoint is required in Arc Autonomous" .Values.AzureMonitorMetrics.arcAutonomousSettings.customGlobalEndpoint | toString | trim | quote }} - - name: customResourceEndpoint - value: {{ required "customResourceEndpoint is required in Arc Autonomous" .Values.AzureMonitorMetrics.arcAutonomousSettings.customResourceEndpoint | toString | trim | quote }} - {{- end }} - - name: CONTROLLER_TYPE - value: "DaemonSet" - - name: NODE_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: CONTAINER_CPU_LIMIT - valueFrom: - resourceFieldRef: - containerName: prometheus-collector - resource: limits.cpu - divisor: 1m - - name: CONTAINER_MEMORY_LIMIT - valueFrom: - resourceFieldRef: - containerName: prometheus-collector - resource: limits.memory - divisor: 1Mi - - name: KUBE_STATE_NAME - value: ama-metrics-ksm - - name: NODE_EXPORTER_NAME - value: "" # Replace this with the node exporter shipped out of box with AKS - - name: NODE_EXPORTER_TARGETPORT - {{- if eq .Values.AzureMonitorMetrics.ArcExtension true }} - value: "{{ index .Values "prometheus-node-exporter" "service" "targetPort" }}" - {{- else }} - value: "19100" - {{- end }} - {{- if .Values.AzureMonitorMetrics }} - {{- if .Values.AzureMonitorMetrics.KubeStateMetrics }} - - name: KUBE_STATE_VERSION - value: "mcr.microsoft.com{{ .Values.AzureMonitorMetrics.KubeStateMetrics.ImageRepository }}:{{ .Values.AzureMonitorMetrics.KubeStateMetrics.ImageTag }}" - {{- end }} - {{- end }} - - name: NODE_EXPORTER_VERSION - value: "v1.6.0" # Replace this with the version shipped by default - - name: AGENT_VERSION - value: {{ .Values.AzureMonitorMetrics.ImageTag }} - - name: MODE - value: "advanced" # only supported mode is 'advanced', any other value will be the default/non-advance mode - - name: WINMODE - value: "advanced" # WINDOWS: only supported mode is 'advanced', any other value will be the default/non-advance mode - - name: MINIMAL_INGESTION_PROFILE - value: "true" # only supported value is the string "true" - securityContext: - privileged: false - volumeMounts: - - mountPath: /etc/config/settings - name: settings-vol-config - readOnly: true - - mountPath: /etc/config/settings/prometheus - name: prometheus-config-vol - readOnly: true - - name: host-log-containers - readOnly: true - mountPath: /var/log/containers - - name: host-log-pods - readOnly: true - mountPath: /var/log/pods - - mountPath: /anchors/mariner - name: anchors-mariner - readOnly: true - {{- if or (ne .Values.AzureMonitorMetrics.ArcExtension true) (and (not (hasPrefix "aks_edge" .Values.ClusterDistribution )) (and (ne .Values.Azure.Cluster.Distribution "aks_edge_k3s") (ne .Values.Azure.Cluster.Distribution "aks_edge_k8s"))) }} - - mountPath: /anchors/ubuntu - name: anchors-ubuntu - readOnly: true - {{- end }} - {{- if eq .Values.AzureMonitorMetrics.ArcExtension true }} - - mountPath: /anchors/proxy - name: ama-metrics-proxy-cert - readOnly: true - {{- end }} - livenessProbe: - exec: - command: - - /bin/bash - - -c - - /opt/microsoft/liveness/livenessprobe.sh - initialDelaySeconds: 60 - periodSeconds: 15 - timeoutSeconds: 5 - failureThreshold: 3 - {{- if eq .Values.AzureMonitorMetrics.ArcExtension true }} - - name: arc-msi-adapter - imagePullPolicy: IfNotPresent - env: - - name: TOKEN_NAMESPACE - value: "azure-arc" - - name: LIVENESS_PROBE_PORT - value: "9999" - {{- .Values.Azure.Identity.MSIAdapterYaml | nindent 10 }} - {{- else }} - - name: addon-token-adapter - command: - - /addon-token-adapter - args: - - --secret-namespace=kube-system - - --secret-name=aad-msi-auth-token - - --token-server-listening-port=7777 - - --health-server-listening-port=9999 - image: "mcr.microsoft.com{{ .Values.AzureMonitorMetrics.AddonTokenAdapter.ImageRepository }}:{{ .Values.AzureMonitorMetrics.AddonTokenAdapter.ImageTag }}" - imagePullPolicy: IfNotPresent - env: - - name: AZMON_COLLECT_ENV - value: "false" - livenessProbe: - httpGet: - path: /healthz - port: 9999 - initialDelaySeconds: 10 - periodSeconds: 60 - resources: - limits: - cpu: 500m - memory: 500Mi - requests: - cpu: 20m - memory: 30Mi - securityContext: - capabilities: - drop: - - ALL - add: - - NET_ADMIN - - NET_RAW - {{- end }} - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/os - operator: In - values: - - linux - - key: type - operator: NotIn - values: - - virtual-kubelet - {{- if not .Values.AzureMonitorMetrics.ArcExtension }} - - key: kubernetes.azure.com/cluster - operator: Exists - {{- end }} - tolerations: - - key: CriticalAddonsOnly - operator: Exists - - operator: "Exists" - effect: NoExecute - - operator: "Exists" - effect: NoSchedule - - key: node-role.kubernetes.io/control-plane - operator: Exists - effect: NoSchedule - - key: node-role.kubernetes.io/master - operator: Exists - effect: NoSchedule - volumes: - - name: settings-vol-config - configMap: - name: ama-metrics-settings-configmap - optional: true - - name: prometheus-config-vol - configMap: - name: ama-metrics-prometheus-config-node - optional: true - - name: host-log-containers - hostPath: - path: /var/log/containers - - name: host-log-pods - hostPath: - path: /var/log/pods - - name: anchors-mariner - hostPath: - path: /etc/pki/ca-trust/anchors/ - type: DirectoryOrCreate - {{- if or (ne .Values.AzureMonitorMetrics.ArcExtension true) (and (not (hasPrefix "aks_edge" .Values.ClusterDistribution )) (and (ne .Values.Azure.Cluster.Distribution "aks_edge_k3s") (ne .Values.Azure.Cluster.Distribution "aks_edge_k8s"))) }} - - name: anchors-ubuntu - hostPath: - path: /usr/local/share/ca-certificates/ - type: DirectoryOrCreate - {{- end }} - {{- if .Values.AzureMonitorMetrics.ArcExtension }} - - name: ama-metrics-proxy-cert - secret: - secretName: ama-metrics-proxy-cert - optional: true - {{- end }} -{{- if not .Values.AzureMonitorMetrics.ArcExtension }} ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: ama-metrics-win-node - namespace: kube-system - labels: - component: ama-metrics -spec: - selector: - matchLabels: - dsName: ama-metrics-win-node - updateStrategy: - type: RollingUpdate - rollingUpdate: - maxUnavailable: 50% - template: - metadata: - labels: - dsName: ama-metrics-win-node - kubernetes.azure.com/managedby: aks - annotations: - agentVersion: "0.0.0.1" - schema-versions: "v1" - spec: - priorityClassName: system-node-critical - serviceAccountName: ama-metrics-serviceaccount - containers: - - name: prometheus-collector - image: "mcr.microsoft.com{{ .Values.AzureMonitorMetrics.ImageRepository }}:{{ .Values.AzureMonitorMetrics.ImageTagWin }}" - imagePullPolicy: IfNotPresent - resources: - limits: - cpu: 500m - memory: 1Gi - requests: - cpu: 150m - memory: 500Mi - env: - - name: CLUSTER - value: "{{ .Values.global.commonGlobals.Customer.AzureResourceID }}" - - name: AKSREGION - value: "{{ .Values.global.commonGlobals.Region }}" - - name: MAC - value: "true" - - name: AZMON_COLLECT_ENV - value: "false" - - name: customEnvironment - value: "{{ .Values.global.commonGlobals.CloudEnvironment }}" - - name: OMS_TLD - value: "opinsights.azure.com" - - name: CONTROLLER_TYPE - value: "DaemonSet" - - name: NODE_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: CONTAINER_CPU_LIMIT - valueFrom: - resourceFieldRef: - containerName: prometheus-collector - resource: limits.cpu - divisor: 1m - - name: CONTAINER_MEMORY_LIMIT - valueFrom: - resourceFieldRef: - containerName: prometheus-collector - resource: limits.memory - divisor: 1Mi - - name: KUBE_STATE_NAME - value: ama-metrics-ksm - - name: NODE_EXPORTER_NAME - value: "" # Replace this with the node exporter shipped out of box with AKS - - name: NODE_EXPORTER_TARGETPORT - value: "19100" - {{- if .Values.AzureMonitorMetrics }} - {{- if .Values.AzureMonitorMetrics.KubeStateMetrics }} - - name: KUBE_STATE_VERSION - value: "mcr.microsoft.com{{ .Values.AzureMonitorMetrics.KubeStateMetrics.ImageRepository }}:{{ .Values.AzureMonitorMetrics.KubeStateMetrics.ImageTagWin }}" - {{- end }} - {{- end }} - - name: NODE_EXPORTER_VERSION - value: "v1.6.0" # Replace this with the version shipped by default - - name: AGENT_VERSION - value: {{ .Values.AzureMonitorMetrics.ImageTag }} - - name: MODE - value: "advanced" # only supported mode is 'advanced', any other value will be the default/non-advance mode - - name: WINMODE - value: "advanced" # WINDOWS: only supported mode is 'advanced', any other value will be the default/non-advance mode - - name: MINIMAL_INGESTION_PROFILE - value: "true" # only supported value is the string "true" - securityContext: - privileged: false - volumeMounts: - - mountPath: /etc/config/settings - name: settings-vol-config - readOnly: true - - mountPath: /etc/config/settings/prometheus - name: prometheus-config-vol - readOnly: true - - name: host-log-containers - readOnly: true - mountPath: /var/log/containers - - name: host-log-pods - readOnly: true - mountPath: /var/log/pods - livenessProbe: - exec: - command: - - cmd - - /c - - C:\opt\microsoft\scripts\livenessprobe.cmd - periodSeconds: 15 - initialDelaySeconds: 300 - timeoutSeconds: 15 - failureThreshold: 3 - - name: addon-token-adapter-win - command: - - addon-token-adapter-win - args: - - --secret-namespace=kube-system - - --secret-name=aad-msi-auth-token - - --token-server-listening-port=7777 - - --health-server-listening-port=9999 - image: "mcr.microsoft.com{{ .Values.AzureMonitorMetrics.AddonTokenAdapter.ImageRepositoryWin }}:{{ .Values.AzureMonitorMetrics.AddonTokenAdapter.ImageTagWin }}" - imagePullPolicy: Always - livenessProbe: - httpGet: - path: /healthz - port: 9999 - initialDelaySeconds: 10 - periodSeconds: 60 - resources: - limits: - memory: 500Mi - requests: - cpu: 100m - memory: 100Mi - securityContext: - capabilities: - add: - - NET_ADMIN - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - labelSelector: - matchExpressions: - - key: kubernetes.io/os - operator: In - values: - - windows - - key: type - operator: NotIn - values: - - virtual-kubelet - tolerations: - - key: CriticalAddonsOnly - operator: Exists - - operator: "Exists" - effect: NoExecute - - operator: "Exists" - effect: NoSchedule - volumes: - - name: settings-vol-config - configMap: - name: ama-metrics-settings-configmap - optional: true - - name: prometheus-config-vol - configMap: - name: ama-metrics-prometheus-config-node-windows - optional: true - - name: host-log-containers - hostPath: - path: /var/log/containers - - name: host-log-pods - hostPath: - path: /var/log/pods -{{- end }} +# apiVersion: apps/v1 +# kind: DaemonSet +# metadata: +# name: ama-metrics-node +# namespace: kube-system +# labels: +# component: ama-metrics +# spec: +# selector: +# matchLabels: +# dsName: ama-metrics-node +# updateStrategy: +# type: RollingUpdate +# rollingUpdate: +# maxUnavailable: 50% +# template: +# metadata: +# labels: +# dsName: ama-metrics-node +# kubernetes.azure.com/managedby: aks +# annotations: +# agentVersion: "0.0.0.1" +# schema-versions: "v1" +# spec: +# priorityClassName: system-node-critical +# serviceAccountName: ama-metrics-serviceaccount +# containers: +# - name: prometheus-collector +# image: "mcr.microsoft.com{{ .Values.AzureMonitorMetrics.ImageRepository }}:{{ .Values.AzureMonitorMetrics.ImageTag }}" +# imagePullPolicy: IfNotPresent +# resources: +# limits: +# cpu: 200m +# memory: 1Gi +# requests: +# cpu: 50m +# memory: 150Mi +# {{- if and (eq .Values.AzureMonitorMetrics.ArcExtension true) (.Values.Azure.proxySettings.isProxyEnabled) }} +# envFrom: +# - secretRef: +# name: ama-metrics-proxy-config +# {{- end }} +# env: +# - name: CLUSTER +# {{- if eq .Values.AzureMonitorMetrics.ArcExtension true }} +# value: "{{ .Values.Azure.Cluster.ResourceId }}" +# {{- else }} +# value: "{{ .Values.global.commonGlobals.Customer.AzureResourceID }}" +# {{- end }} +# - name: AKSREGION +# {{- if eq .Values.AzureMonitorMetrics.ArcExtension true }} +# value: "{{ .Values.Azure.Cluster.Region }}" +# {{- else }} +# value: "{{ .Values.global.commonGlobals.Region }}" +# {{- end }} +# - name: MAC +# value: "true" +# - name: AZMON_COLLECT_ENV +# value: "false" +# - name: customEnvironment +# {{- if eq .Values.AzureMonitorMetrics.ArcExtension true }} +# value: "{{ lower .Values.Azure.Cluster.Cloud }}" +# {{- else if .Values.AzureMonitorMetrics.isArcACluster }} +# value: "arcautonomous" +# {{- else }} +# value: "{{ lower .Values.global.commonGlobals.CloudEnvironment }}" +# {{- end }} +# - name: OMS_TLD +# value: "opinsights.azure.com" +# {{- if eq .Values.AzureMonitorMetrics.isArcACluster true }} +# - name: customRegionalEndpoint +# value: {{ required "customRegionalEndpoint is required in Arc Autonomous" .Values.AzureMonitorMetrics.arcAutonomousSettings.customRegionalEndpoint | toString | trim | quote }} +# - name: customGlobalEndpoint +# value: {{ required "customGlobalEndpoint is required in Arc Autonomous" .Values.AzureMonitorMetrics.arcAutonomousSettings.customGlobalEndpoint | toString | trim | quote }} +# - name: customResourceEndpoint +# value: {{ required "customResourceEndpoint is required in Arc Autonomous" .Values.AzureMonitorMetrics.arcAutonomousSettings.customResourceEndpoint | toString | trim | quote }} +# {{- end }} +# - name: CONTROLLER_TYPE +# value: "DaemonSet" +# - name: NODE_IP +# valueFrom: +# fieldRef: +# fieldPath: status.hostIP +# - name: NODE_NAME +# valueFrom: +# fieldRef: +# fieldPath: spec.nodeName +# - name: POD_NAME +# valueFrom: +# fieldRef: +# fieldPath: metadata.name +# - name: POD_NAMESPACE +# valueFrom: +# fieldRef: +# fieldPath: metadata.namespace +# - name: CONTAINER_CPU_LIMIT +# valueFrom: +# resourceFieldRef: +# containerName: prometheus-collector +# resource: limits.cpu +# divisor: 1m +# - name: CONTAINER_MEMORY_LIMIT +# valueFrom: +# resourceFieldRef: +# containerName: prometheus-collector +# resource: limits.memory +# divisor: 1Mi +# - name: KUBE_STATE_NAME +# value: ama-metrics-ksm +# - name: NODE_EXPORTER_NAME +# value: "" # Replace this with the node exporter shipped out of box with AKS +# - name: NODE_EXPORTER_TARGETPORT +# {{- if eq .Values.AzureMonitorMetrics.ArcExtension true }} +# value: "{{ index .Values "prometheus-node-exporter" "service" "targetPort" }}" +# {{- else }} +# value: "19100" +# {{- end }} +# {{- if .Values.AzureMonitorMetrics }} +# {{- if .Values.AzureMonitorMetrics.KubeStateMetrics }} +# - name: KUBE_STATE_VERSION +# value: "mcr.microsoft.com{{ .Values.AzureMonitorMetrics.KubeStateMetrics.ImageRepository }}:{{ .Values.AzureMonitorMetrics.KubeStateMetrics.ImageTag }}" +# {{- end }} +# {{- end }} +# - name: NODE_EXPORTER_VERSION +# value: "v1.6.0" # Replace this with the version shipped by default +# - name: AGENT_VERSION +# value: {{ .Values.AzureMonitorMetrics.ImageTag }} +# - name: MODE +# value: "advanced" # only supported mode is 'advanced', any other value will be the default/non-advance mode +# - name: WINMODE +# value: "advanced" # WINDOWS: only supported mode is 'advanced', any other value will be the default/non-advance mode +# - name: MINIMAL_INGESTION_PROFILE +# value: "true" # only supported value is the string "true" +# securityContext: +# privileged: false +# volumeMounts: +# - mountPath: /etc/config/settings +# name: settings-vol-config +# readOnly: true +# - mountPath: /etc/config/settings/prometheus +# name: prometheus-config-vol +# readOnly: true +# - name: host-log-containers +# readOnly: true +# mountPath: /var/log/containers +# - name: host-log-pods +# readOnly: true +# mountPath: /var/log/pods +# - mountPath: /anchors/mariner +# name: anchors-mariner +# readOnly: true +# {{- if or (ne .Values.AzureMonitorMetrics.ArcExtension true) (and (not (hasPrefix "aks_edge" .Values.ClusterDistribution )) (and (ne .Values.Azure.Cluster.Distribution "aks_edge_k3s") (ne .Values.Azure.Cluster.Distribution "aks_edge_k8s"))) }} +# - mountPath: /anchors/ubuntu +# name: anchors-ubuntu +# readOnly: true +# {{- end }} +# {{- if eq .Values.AzureMonitorMetrics.ArcExtension true }} +# - mountPath: /anchors/proxy +# name: ama-metrics-proxy-cert +# readOnly: true +# {{- end }} +# livenessProbe: +# exec: +# command: +# - /bin/bash +# - -c +# - /opt/microsoft/liveness/livenessprobe.sh +# initialDelaySeconds: 60 +# periodSeconds: 15 +# timeoutSeconds: 5 +# failureThreshold: 3 +# {{- if eq .Values.AzureMonitorMetrics.ArcExtension true }} +# - name: arc-msi-adapter +# imagePullPolicy: IfNotPresent +# env: +# - name: TOKEN_NAMESPACE +# value: "azure-arc" +# - name: LIVENESS_PROBE_PORT +# value: "9999" +# {{- .Values.Azure.Identity.MSIAdapterYaml | nindent 10 }} +# {{- else }} +# - name: addon-token-adapter +# command: +# - /addon-token-adapter +# args: +# - --secret-namespace=kube-system +# - --secret-name=aad-msi-auth-token +# - --token-server-listening-port=7777 +# - --health-server-listening-port=9999 +# image: "mcr.microsoft.com{{ .Values.AzureMonitorMetrics.AddonTokenAdapter.ImageRepository }}:{{ .Values.AzureMonitorMetrics.AddonTokenAdapter.ImageTag }}" +# imagePullPolicy: IfNotPresent +# env: +# - name: AZMON_COLLECT_ENV +# value: "false" +# livenessProbe: +# httpGet: +# path: /healthz +# port: 9999 +# initialDelaySeconds: 10 +# periodSeconds: 60 +# resources: +# limits: +# cpu: 500m +# memory: 500Mi +# requests: +# cpu: 20m +# memory: 30Mi +# securityContext: +# capabilities: +# drop: +# - ALL +# add: +# - NET_ADMIN +# - NET_RAW +# {{- end }} +# affinity: +# nodeAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# nodeSelectorTerms: +# - matchExpressions: +# - key: kubernetes.io/os +# operator: In +# values: +# - linux +# - key: type +# operator: NotIn +# values: +# - virtual-kubelet +# {{- if not .Values.AzureMonitorMetrics.ArcExtension }} +# - key: kubernetes.azure.com/cluster +# operator: Exists +# {{- end }} +# tolerations: +# - key: CriticalAddonsOnly +# operator: Exists +# - operator: "Exists" +# effect: NoExecute +# - operator: "Exists" +# effect: NoSchedule +# - key: node-role.kubernetes.io/control-plane +# operator: Exists +# effect: NoSchedule +# - key: node-role.kubernetes.io/master +# operator: Exists +# effect: NoSchedule +# volumes: +# - name: settings-vol-config +# configMap: +# name: ama-metrics-settings-configmap +# optional: true +# - name: prometheus-config-vol +# configMap: +# name: ama-metrics-prometheus-config-node +# optional: true +# - name: host-log-containers +# hostPath: +# path: /var/log/containers +# - name: host-log-pods +# hostPath: +# path: /var/log/pods +# - name: anchors-mariner +# hostPath: +# path: /etc/pki/ca-trust/anchors/ +# type: DirectoryOrCreate +# {{- if or (ne .Values.AzureMonitorMetrics.ArcExtension true) (and (not (hasPrefix "aks_edge" .Values.ClusterDistribution )) (and (ne .Values.Azure.Cluster.Distribution "aks_edge_k3s") (ne .Values.Azure.Cluster.Distribution "aks_edge_k8s"))) }} +# - name: anchors-ubuntu +# hostPath: +# path: /usr/local/share/ca-certificates/ +# type: DirectoryOrCreate +# {{- end }} +# {{- if .Values.AzureMonitorMetrics.ArcExtension }} +# - name: ama-metrics-proxy-cert +# secret: +# secretName: ama-metrics-proxy-cert +# optional: true +# {{- end }} +# {{- if not .Values.AzureMonitorMetrics.ArcExtension }} +# --- +# apiVersion: apps/v1 +# kind: DaemonSet +# metadata: +# name: ama-metrics-win-node +# namespace: kube-system +# labels: +# component: ama-metrics +# spec: +# selector: +# matchLabels: +# dsName: ama-metrics-win-node +# updateStrategy: +# type: RollingUpdate +# rollingUpdate: +# maxUnavailable: 50% +# template: +# metadata: +# labels: +# dsName: ama-metrics-win-node +# kubernetes.azure.com/managedby: aks +# annotations: +# agentVersion: "0.0.0.1" +# schema-versions: "v1" +# spec: +# priorityClassName: system-node-critical +# serviceAccountName: ama-metrics-serviceaccount +# containers: +# - name: prometheus-collector +# image: "mcr.microsoft.com{{ .Values.AzureMonitorMetrics.ImageRepository }}:{{ .Values.AzureMonitorMetrics.ImageTagWin }}" +# imagePullPolicy: IfNotPresent +# resources: +# limits: +# cpu: 500m +# memory: 1Gi +# requests: +# cpu: 150m +# memory: 500Mi +# env: +# - name: CLUSTER +# value: "{{ .Values.global.commonGlobals.Customer.AzureResourceID }}" +# - name: AKSREGION +# value: "{{ .Values.global.commonGlobals.Region }}" +# - name: MAC +# value: "true" +# - name: AZMON_COLLECT_ENV +# value: "false" +# - name: customEnvironment +# value: "{{ .Values.global.commonGlobals.CloudEnvironment }}" +# - name: OMS_TLD +# value: "opinsights.azure.com" +# - name: CONTROLLER_TYPE +# value: "DaemonSet" +# - name: NODE_IP +# valueFrom: +# fieldRef: +# fieldPath: status.hostIP +# - name: NODE_NAME +# valueFrom: +# fieldRef: +# fieldPath: spec.nodeName +# - name: POD_NAME +# valueFrom: +# fieldRef: +# fieldPath: metadata.name +# - name: POD_NAMESPACE +# valueFrom: +# fieldRef: +# fieldPath: metadata.namespace +# - name: CONTAINER_CPU_LIMIT +# valueFrom: +# resourceFieldRef: +# containerName: prometheus-collector +# resource: limits.cpu +# divisor: 1m +# - name: CONTAINER_MEMORY_LIMIT +# valueFrom: +# resourceFieldRef: +# containerName: prometheus-collector +# resource: limits.memory +# divisor: 1Mi +# - name: KUBE_STATE_NAME +# value: ama-metrics-ksm +# - name: NODE_EXPORTER_NAME +# value: "" # Replace this with the node exporter shipped out of box with AKS +# - name: NODE_EXPORTER_TARGETPORT +# value: "19100" +# {{- if .Values.AzureMonitorMetrics }} +# {{- if .Values.AzureMonitorMetrics.KubeStateMetrics }} +# - name: KUBE_STATE_VERSION +# value: "mcr.microsoft.com{{ .Values.AzureMonitorMetrics.KubeStateMetrics.ImageRepository }}:{{ .Values.AzureMonitorMetrics.KubeStateMetrics.ImageTagWin }}" +# {{- end }} +# {{- end }} +# - name: NODE_EXPORTER_VERSION +# value: "v1.6.0" # Replace this with the version shipped by default +# - name: AGENT_VERSION +# value: {{ .Values.AzureMonitorMetrics.ImageTag }} +# - name: MODE +# value: "advanced" # only supported mode is 'advanced', any other value will be the default/non-advance mode +# - name: WINMODE +# value: "advanced" # WINDOWS: only supported mode is 'advanced', any other value will be the default/non-advance mode +# - name: MINIMAL_INGESTION_PROFILE +# value: "true" # only supported value is the string "true" +# securityContext: +# privileged: false +# volumeMounts: +# - mountPath: /etc/config/settings +# name: settings-vol-config +# readOnly: true +# - mountPath: /etc/config/settings/prometheus +# name: prometheus-config-vol +# readOnly: true +# - name: host-log-containers +# readOnly: true +# mountPath: /var/log/containers +# - name: host-log-pods +# readOnly: true +# mountPath: /var/log/pods +# livenessProbe: +# exec: +# command: +# - cmd +# - /c +# - C:\opt\microsoft\scripts\livenessprobe.cmd +# periodSeconds: 15 +# initialDelaySeconds: 300 +# timeoutSeconds: 15 +# failureThreshold: 3 +# - name: addon-token-adapter-win +# command: +# - addon-token-adapter-win +# args: +# - --secret-namespace=kube-system +# - --secret-name=aad-msi-auth-token +# - --token-server-listening-port=7777 +# - --health-server-listening-port=9999 +# image: "mcr.microsoft.com{{ .Values.AzureMonitorMetrics.AddonTokenAdapter.ImageRepositoryWin }}:{{ .Values.AzureMonitorMetrics.AddonTokenAdapter.ImageTagWin }}" +# imagePullPolicy: Always +# livenessProbe: +# httpGet: +# path: /healthz +# port: 9999 +# initialDelaySeconds: 10 +# periodSeconds: 60 +# resources: +# limits: +# memory: 500Mi +# requests: +# cpu: 100m +# memory: 100Mi +# securityContext: +# capabilities: +# add: +# - NET_ADMIN +# affinity: +# nodeAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# nodeSelectorTerms: +# - labelSelector: +# matchExpressions: +# - key: kubernetes.io/os +# operator: In +# values: +# - windows +# - key: type +# operator: NotIn +# values: +# - virtual-kubelet +# tolerations: +# - key: CriticalAddonsOnly +# operator: Exists +# - operator: "Exists" +# effect: NoExecute +# - operator: "Exists" +# effect: NoSchedule +# volumes: +# - name: settings-vol-config +# configMap: +# name: ama-metrics-settings-configmap +# optional: true +# - name: prometheus-config-vol +# configMap: +# name: ama-metrics-prometheus-config-node-windows +# optional: true +# - name: host-log-containers +# hostPath: +# path: /var/log/containers +# - name: host-log-pods +# hostPath: +# path: /var/log/pods +# {{- end }} diff --git a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/values.yaml b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/values.yaml index 4cf8c56ab..d9121c48f 100644 --- a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/values.yaml +++ b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/values.yaml @@ -36,10 +36,10 @@ AzureMonitorMetrics: - validatingwebhookconfigurations - volumeattachments ImageRepository: "/azuremonitor/containerinsights/cidev/prometheus-collector/images" - ImageTag: "6.8.0-ccp-shell-removal-branch-10-26-2023-3703fa73" - ImageTagWin: "6.8.0-ccp-shell-removal-branch-10-26-2023-3703fa73-win" - ImageTagTargetAllocator: "6.8.0-ccp-shell-removal-branch-10-26-2023-3703fa73-targetallocator" - ImageTagCfgReader: "6.8.0-ccp-shell-removal-branch-10-26-2023-3703fa73-cfg" + ImageTag: "6.8.0-ccp-shell-removal-branch-10-26-2023-1ac81793" + ImageTagWin: "6.8.0-ccp-shell-removal-branch-10-26-2023-1ac81793-win" + ImageTagTargetAllocator: "6.8.0-ccp-shell-removal-branch-10-26-2023-1ac81793-targetallocator" + ImageTagCfgReader: "6.8.0-ccp-shell-removal-branch-10-26-2023-1ac81793-cfg" TargetAllocatorEnabled: false DeploymentReplicas: 1 # The below 2 settings are not Azure Monitor Metrics adapter chart. They are substituted in a different manner. diff --git a/otelcollector/prometheuscollector/design.md b/otelcollector/prometheuscollector/design.md new file mode 100644 index 000000000..464f4a675 --- /dev/null +++ b/otelcollector/prometheuscollector/design.md @@ -0,0 +1,132 @@ +# Consolidated Go Code Design Document + +## 1. Introduction + +This document describes a consolidated Go code file containing functions and utilities for a larger application. These functions handle environment variables, external command execution, file operations, configuration management, and health checking. + +## 2. Functions + +### 2.1. `readEnvVarsFromEnvMdsdFile(envMdsdFile string) ([]string, error)` + +- **Purpose**: Reads environment variables from a file and returns them as a string slice. +- **Input**: `envMdsdFile` - Path to the file containing environment variables. +- **Output**: A string slice of environment variable strings, or an error if the file cannot be read. + +### 2.2. `startCommand(command string, args ...string)` + +- **Purpose**: Starts an external command with specified arguments. +- **Input**: `command` - Command to execute, `args` - Variable number of command arguments. +- **Output**: Executes the command asynchronously. + +### 2.3. `startCommandAndWait(command string, args ...string)` + +- **Purpose**: Starts an external command with specified arguments and waits for completion. +- **Input**: `command` - Command to execute, `args` - Variable number of command arguments. +- **Output**: Waits for the command to finish. + +### 2.4. `printMdsdVersion()` + +- **Purpose**: Prints the version of the MDSD application. +- **Input**: None. +- **Output**: Version information is printed. + +### 2.5. `readMeConfigFileAsString(meConfigFile string) string` + +- **Purpose**: Reads a file's content and returns it as a string. +- **Input**: `meConfigFile` - Path to the file to be read. +- **Output**: Content of the file as a string. + +### 2.6. `startMetricsExtensionWithConfigOverrides(configOverrides string)` + +- **Purpose**: Starts MetricsExtension with specified configurations and captures its output. +- **Input**: `configOverrides` - Configurations for MetricsExtension. +- **Output**: Captures and prints standard output and standard error. + +### 2.7. `readVersionFile(filePath string) (string, error)` + +- **Purpose**: Reads a file's content and returns it as a string. +- **Input**: `filePath` - Path to the file to be read. +- **Output**: Content of the file as a string, or an error if the file cannot be read. + +### 2.8. `fmtVar(name, value string)` + +- **Purpose**: Formats and prints environment variables with their values. +- **Input**: `name` - Name of the environment variable, `value` - Value of the environment variable. +- **Output**: Formatted environment variable strings are printed. + +### 2.9. `existsAndNotEmpty(filename string) bool` + +- **Purpose**: Checks if a file exists and is not empty. +- **Input**: `filename` - Path to the file to check. +- **Output**: `true` if the file exists and is not empty, `false` otherwise. + +### 2.10. `readAndTrim(filename string) (string, error)` + +- **Purpose**: Reads a file's content, trims leading/trailing spaces, and returns it as a string. +- **Input**: `filename` - Path to the file to be read. +- **Output**: Trimmed content of the file as a string, or an error if the file cannot be read. + +### 2.11. `exists(path string) bool` + +- **Purpose**: Checks if a file or directory exists. +- **Input**: `path` - Path to the file or directory to check. +- **Output**: `true` if it exists, `false` otherwise. + +### 2.12. `copyFile(sourcePath, destinationPath string) error` + +- **Purpose**: Copies a file from `sourcePath` to `destinationPath`. +- **Input**: `sourcePath` - Source file path, `destinationPath` - Destination file path. +- **Output**: Error if any occurs during copying. + +### 2.13. `setEnvVarsFromFile(filename string) error` + +- **Purpose**: Reads key-value pairs from a file and sets corresponding environment variables. +- **Input**: `filename` - Path to the file with key-value pairs. +- **Output**: Error if any issues arise while setting environment variables. + +### 2.14. `configmapparser()` + +- **Purpose**: Parses configuration settings, sets environment variables, and manages configuration files. +- **Input**: None. +- **Output**: Sets environment variables and may print error messages. + +### 2.15. `confgimapparserforccp()` + +- **Purpose**: Similar to `configmapparser()`, this function parses configuration settings for a specific scenario. +- **Input**: None. +- **Output**: Sets environment variables and may print error messages. + +### 2.16. `hasConfigChanged(filePath string) bool` + +- **Purpose**: Checks if a configuration file has changed by comparing its size. +- **Input**: `filePath` - Path to the file to check. +- **Output**: `true` if the file has changed, `false` otherwise. + +### 2.17. `healthHandler(w http.ResponseWriter, r *http.Request)` + +- **Purpose**: Handles health checks and returns status messages based on various conditions. +- **Input**: `w` - HTTP response writer, `r` - HTTP request. +- **Output**: Writes a response to the HTTP writer. + +### 2.18. `monitorInotify(outputFile string) error` + +- **Purpose**: Monitors changes in the configuration directory using the `inotifywait` command. +- **Input**: `outputFile` - Path to the output file for event logging. +- **Output**: Error if issues occur during the monitoring process. + +## 3. Function Interactions + +- `configmapparser()` and `confgimapparserforccp()` execute Ruby scripts to set environment variables based on configuration files. +- `copyFile()` is used for copying configuration files. +- `setEnvVarsFromFile()` reads key-value pairs and sets environment variables. +- `hasConfigChanged()` checks for changes in configuration files. +- `healthHandler()` provides health status based on various checks. +- `monitorInotify()` monitors changes in the configuration directory. + +## 4. Error Handling + +These functions implement error handling by returning errors for encountered issues. Errors can include file operations, process start errors, and other potential issues related to their operations. + +## 5. Conclusion + +This consolidated Go code file contains a collection of functions and utilities that enhance the functionality and reliability of the larger application. These functions handle environment variables, execute external commands, read/write files, manage configuration, and perform health checks. diff --git a/otelcollector/prometheuscollector/main.go b/otelcollector/prometheuscollector/main.go index 3aab6ef9a..0ae39c08b 100644 --- a/otelcollector/prometheuscollector/main.go +++ b/otelcollector/prometheuscollector/main.go @@ -12,7 +12,6 @@ import ( "io" "bufio" "strconv" - "path/filepath" ) func main(){ @@ -728,7 +727,7 @@ func confgimapparserforccp() { } // Parse the configmap to set the right environment variables for prometheus collector settings - startCommandAndWait("ruby", "/opt/microsoft/configmapparser/tomlparser-prometheus-collector-settings.rb") + startCommandAndWait("ruby", "/opt/microsoft/configmapparser/tomlparser-ccp-prometheus-collector-settings.rb") // sets env : AZMON_DEFAULT_METRIC_ACCOUNT_NAME, AZMON_CLUSTER_LABEL, AZMON_CLUSTER_ALIAS, AZMON_OPERATOR_ENABLED_CHART_SETTING in /opt/microsoft/configmapparser/config_prometheus_collector_settings_env_var filename := "/opt/microsoft/configmapparser/config_prometheus_collector_settings_env_var" err := setEnvVarsFromFile(filename) @@ -874,53 +873,3 @@ func monitorInotify(outputFile string) error { return nil } -func waitForFileCreation(directory, targetFile string) (string, error) { - for { - dir, err := os.Open(directory) - if err != nil { - return "", err - } - defer dir.Close() - - files, err := dir.Readdir(0) - if err != nil { - return "", err - } - - for _, file := range files { - if file.Name() == targetFile { - return file.Name(), nil - } - } - - time.Sleep(time.Second) // Sleep for a second before checking again - } -} - -func waitForConfigmapSyncContainer() { - settingsChangedFile := "/etc/config/settings/inotifysettingscreated" - ccpMetricsEnabled := os.Getenv("CCP_METRICS_ENABLED") - if ccpMetricsEnabled == "true" { - _, err := os.Stat(settingsChangedFile) - if os.IsNotExist(err) { - // Disable appinsights telemetry for ccp metrics - os.Setenv("DISABLE_TELEMETRY", "true") - - _, err := os.Stat(settingsChangedFile) - if os.IsNotExist(err) { - fmt.Println("Waiting for ama-metrics-config-sync container to finish initialization...") - - for { - event, err := waitForFileCreation(filepath.Dir(settingsChangedFile), filepath.Base(settingsChangedFile)) - if err != nil { - fmt.Println(err) - break - } - if event == filepath.Base(settingsChangedFile) { - break - } - } - } - } - } -}