From 226984b2b28171371f45d84b5d543dd4cd5b1ccf Mon Sep 17 00:00:00 2001 From: bragi92 Date: Mon, 4 Nov 2024 08:30:30 -0800 Subject: [PATCH] [feat] windows golang update (#969) [comment]: # (Note that your PR title should follow the conventional commit format: https://conventionalcommits.org/en/v1.0.0/#summary) # PR Description [comment]: # (The below checklist is for PRs adding new features. If a box is not checked, add a reason why it's not needed.) # New Feature Checklist - [ ] List telemetry added about the feature. - [ ] Link to the one-pager about the feature. - [ ] List any tasks necessary for release (3P docs, AKS RP chart changes, etc.) after merging the PR. - [ ] Attach results of scale and perf testing. [comment]: # (The below checklist is for code changes. Not all boxes necessarily need to be checked. Build, doc, and template changes do not need to fill out the checklist.) # Tests Checklist - [ ] Have end-to-end Ginkgo tests been run on your cluster and passed? To bootstrap your cluster to run the tests, follow [these instructions](/otelcollector/test/README.md#bootstrap-a-dev-cluster-to-run-ginkgo-tests). - Labels used when running the tests on your cluster: - [ ] `operator` - [ ] `windows` - [ ] `arm64` - [ ] `arc-extension` - [ ] `fips` - [ ] Have new tests been added? For features, have tests been added for this feature? For fixes, is there a test that could have caught this issue and could validate that the fix works? - [ ] Is a new scrape job needed? - [ ] The scrape job was added to the folder [test-cluster-yamls](/otelcollector/test/test-cluster-yamls/) in the correct configmap or as a CR. - [ ] Was a new test label added? - [ ] A string constant for the label was added to [constants.go](/otelcollector/test/utils/constants.go). - [ ] The label and description was added to the [test README](/otelcollector/test/README.md). - [ ] The label was added to this [PR checklist](/.github/pull_request_template). - [ ] The label was added as needed to [testkube-test-crs.yaml](/otelcollector/test/testkube/testkube-test-crs.yaml). - [ ] Are additional API server permissions needed for the new tests? - [ ] These permissions have been added to [api-server-permissions.yaml](/otelcollector/test/testkube/api-server-permissions.yaml). - [ ] Was a new test suite (a new folder under `/tests`) added? - [ ] The new test suite is included in [testkube-test-crs.yaml](/otelcollector/test/testkube/testkube-test-crs.yaml). --- .pipelines/azure-pipeline-build.yml | 1 + .../FullAzureMonitorMetricsProfile.json | 2 +- internal/grafana_uami/action.ps1 | 10 + internal/grafana_uami/patch-add-umi.json | 8 + otelcollector/build/linux/.dockerignore | 1 + otelcollector/build/windows/.dockerignore | 1 + otelcollector/build/windows/Dockerfile | 39 +- otelcollector/build/windows/scripts/main.ps1 | 491 ------------- otelcollector/build/windows/scripts/setup.ps1 | 9 - .../configmapparser/ConfigParseErrorLogger.rb | 45 -- .../prometheus-config-merger-with-operator.rb | 643 ------------------ .../prometheus-config-merger.rb | 621 ----------------- .../configmapparser/tomlparser-debug-mode.rb | 89 --- .../tomlparser-default-scrape-settings.rb | 195 ------ ...arser-default-targets-metrics-keep-list.rb | 392 ----------- ...omlparser-pod-annotation-based-scraping.rb | 73 -- ...omlparser-prometheus-collector-settings.rb | 137 ---- .../tomlparser-scrape-interval.rb | 337 --------- .../configmapparser/tomlparser-utils.rb | 39 -- .../Chart-template.yaml | 2 +- .../local_testing_aks.ps1 | 6 +- .../templates/ama-metrics-daemonset.yaml | 10 +- otelcollector/go.sum | 20 + otelcollector/main/main.go | 164 +++-- .../makefile_windows.ps1 | 37 +- .../configmap/ccp/configmapparserforccp.go | 10 +- .../shared/configmap/mp/configmapparser.go | 28 +- otelcollector/shared/file_utilities.go | 2 +- otelcollector/shared/helpers.go | 215 +++++- ...tilities.go => process_utilities_linux.go} | 9 +- .../shared/process_utilities_windows.go | 574 ++++++++++++++++ otelcollector/shared/proxy_settings.go | 16 +- otelcollector/shared/telemetry.go | 4 +- 33 files changed, 997 insertions(+), 3233 deletions(-) create mode 100644 internal/grafana_uami/action.ps1 create mode 100644 internal/grafana_uami/patch-add-umi.json create mode 100644 otelcollector/build/linux/.dockerignore create mode 100644 otelcollector/build/windows/.dockerignore delete mode 100644 otelcollector/build/windows/scripts/main.ps1 delete mode 100644 otelcollector/configmapparser/ConfigParseErrorLogger.rb delete mode 100644 otelcollector/configmapparser/prometheus-config-merger-with-operator.rb delete mode 100644 otelcollector/configmapparser/prometheus-config-merger.rb delete mode 100644 otelcollector/configmapparser/tomlparser-debug-mode.rb delete mode 100644 otelcollector/configmapparser/tomlparser-default-scrape-settings.rb delete mode 100644 otelcollector/configmapparser/tomlparser-default-targets-metrics-keep-list.rb delete mode 100644 otelcollector/configmapparser/tomlparser-pod-annotation-based-scraping.rb delete mode 100644 otelcollector/configmapparser/tomlparser-prometheus-collector-settings.rb delete mode 100644 otelcollector/configmapparser/tomlparser-scrape-interval.rb delete mode 100644 otelcollector/configmapparser/tomlparser-utils.rb rename otelcollector/shared/{process_utilities.go => process_utilities_linux.go} (97%) create mode 100644 otelcollector/shared/process_utilities_windows.go diff --git a/.pipelines/azure-pipeline-build.yml b/.pipelines/azure-pipeline-build.yml index 06b49033a..204078480 100644 --- a/.pipelines/azure-pipeline-build.yml +++ b/.pipelines/azure-pipeline-build.yml @@ -2,6 +2,7 @@ trigger: branches: include: - main + pr: autoCancel: true branches: diff --git a/AddonArmTemplate/FullAzureMonitorMetricsProfile.json b/AddonArmTemplate/FullAzureMonitorMetricsProfile.json index 825834eb9..dd911e352 100644 --- a/AddonArmTemplate/FullAzureMonitorMetricsProfile.json +++ b/AddonArmTemplate/FullAzureMonitorMetricsProfile.json @@ -553,4 +553,4 @@ } } ] -} +} \ No newline at end of file diff --git a/internal/grafana_uami/action.ps1 b/internal/grafana_uami/action.ps1 new file mode 100644 index 000000000..3c79414d6 --- /dev/null +++ b/internal/grafana_uami/action.ps1 @@ -0,0 +1,10 @@ +# ARMClient doc: https://github.com/projectkudu/ARMClient +# ARMClient login + +$grafanaResourceId="/subscriptions/{sub_id}/resourceGroups/{rg_name}/providers/Microsoft.Dashboard/grafana/{name}" +$grafanaApiVersion="2023-10-01-preview" + +armclient get "$($grafanaResourceId)?api-version=$($grafanaApiVersion)" + +Write-Output "Add user-assigned managed identity to Grafana" +armclient patch "$($grafanaResourceId)?api-version=$($grafanaApiVersion)" patch-add-umi.json -verbose diff --git a/internal/grafana_uami/patch-add-umi.json b/internal/grafana_uami/patch-add-umi.json new file mode 100644 index 000000000..2e7452ca9 --- /dev/null +++ b/internal/grafana_uami/patch-add-umi.json @@ -0,0 +1,8 @@ +{ + "identity": { + "type": "UserAssigned", + "userAssignedIdentities": { + "/subscriptions/{sub_id}/resourceGroups/{rg_name}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/{name}": {} + } + } +} \ No newline at end of file diff --git a/otelcollector/build/linux/.dockerignore b/otelcollector/build/linux/.dockerignore new file mode 100644 index 000000000..5c0721e83 --- /dev/null +++ b/otelcollector/build/linux/.dockerignore @@ -0,0 +1 @@ +shared/process_utilities_windows.go diff --git a/otelcollector/build/windows/.dockerignore b/otelcollector/build/windows/.dockerignore new file mode 100644 index 000000000..094ed9fc2 --- /dev/null +++ b/otelcollector/build/windows/.dockerignore @@ -0,0 +1 @@ +shared/process_utilities_linux.go diff --git a/otelcollector/build/windows/Dockerfile b/otelcollector/build/windows/Dockerfile index a00613ee8..408e407c6 100644 --- a/otelcollector/build/windows/Dockerfile +++ b/otelcollector/build/windows/Dockerfile @@ -23,11 +23,9 @@ ENV APPLICATIONINSIGHTS_AUTH_USNAT ZTliNjRmZmUtZDZlYi0xYjczLThjYWQtNDU2OTFjN2FhN ENV TELEMETRY_DISABLED false COPY ./build/windows/scripts/setup.ps1 $tmpdir/scripts/ -COPY ./build/windows/scripts/main.ps1 $tmpdir/scripts/ COPY ./build/windows/scripts/filesystemwatcher.ps1 $tmpdir/scripts/ -COPY ./build/windows/scripts/livenessprobe.cmd $tmpdir/microsoft/scripts/ - -COPY ./configmapparser/*.rb $tmpdir/microsoft/configmapparser/ +RUN mkdir "C:\\opt\\microsoft\\configmapparser" +RUN mkdir "C:\\opt\\microsoft\\scripts" COPY ./configmapparser/default-prom-configs/*.yml $tmpdir/microsoft/otelcollector/default-prom-configs/ COPY ./opentelemetry-collector-builder/otelcollector.exe ./opentelemetry-collector-builder/collector-config-default.yml ./opentelemetry-collector-builder/collector-config-template.yml $tmpdir/microsoft/otelcollector/ COPY ./prom-config-validator-builder/promconfigvalidator.exe $tmpdir/ @@ -36,40 +34,13 @@ COPY ./telegraf/telegraf-prometheus-collector-windows.conf $tmpdir/telegraf/ COPY ./fluent-bit/fluent-bit-windows.conf $tmpdir/fluent-bit/ COPY ./fluent-bit/fluent-bit-parsers.conf $tmpdir/fluent-bit/ COPY ./fluent-bit/src/out_appinsights.so $tmpdir/fluent-bit/bin/ +COPY ./main/main.exe $tmpdir/microsoft COPY ./react /Users/ContainerAdministrator/Documents/static/react COPY ./LICENSE $tmpdir/microsoft COPY ./NOTICE $tmpdir/microsoft -# Do not split this into multiple RUN! -# Docker creates a layer for every RUN-Statement -ENV chocolateyVersion "1.4.0" -RUN powershell -Command "Set-ExecutionPolicy Bypass -Scope Process -Force; iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))" -# Fluentd depends on cool.io whose fat gem is only available for Ruby < 2.5, so need to specify --platform ruby when install Ruby > 2.5 and install msys2 to get dev tools -RUN choco install -y ruby --version 2.6.5.1 --params "'/InstallDir:C:\ruby26'" \ -&& choco install -y msys2 --version 20211130.0.0 --params "'/NoPath /NoUpdate /InstallDir:C:\ruby26\msys64'" \ -&& choco install -y vim - -# gangams - optional MSYS2 update via ridk failing in merged docker file so skipping that since we dont need optional update -RUN refreshenv \ -&& ridk install 3 \ -&& echo gem: --no-document >> C:\ProgramData\gemrc \ -&& gem install cool.io -v 1.5.4 --platform ruby \ -&& gem install oj -v 3.3.10 \ -&& gem install json -v 2.2.0 \ -&& gem install fluentd -v 1.12.2 \ -&& gem install win32-service -v 1.0.1 \ -&& gem install win32-ipc -v 0.7.0 \ -&& gem install win32-event -v 0.6.3 \ -&& gem install windows-pr -v 1.2.6 \ -&& gem install tomlrb -v 1.3.0 \ -&& gem install deep_merge -v 1.2.1\ -&& gem install colorize\ -&& gem sources --clear-all - -# Remove gem cache and chocolatey -RUN powershell -Command "Remove-Item -Force C:\ruby26\lib\ruby\gems\2.6.0\cache\*.gem; Remove-Item -Recurse -Force 'C:\ProgramData\chocolatey'" - SHELL ["powershell"] RUN ./opt/scripts/setup.ps1 -ENTRYPOINT ["powershell", "C:\\opt\\scripts\\main.ps1"] +# Run the Go executable, entrypoint +ENTRYPOINT ["powershell", "C:\\opt\\microsoft\\main.exe"] diff --git a/otelcollector/build/windows/scripts/main.ps1 b/otelcollector/build/windows/scripts/main.ps1 deleted file mode 100644 index 1608c3940..000000000 --- a/otelcollector/build/windows/scripts/main.ps1 +++ /dev/null @@ -1,491 +0,0 @@ -#setting it to replicaset by default -$me_config_file = '/opt/metricextension/me_ds_win.config' - -function Set-EnvironmentVariablesAndConfigParser { - # Set windows 2019 or 2022 version (Microsoft Windows Server 2019 Datacenter or Microsoft Windows Server 2022 Datacenter) - $windowsVersion = (Get-WmiObject -class Win32_OperatingSystem).Caption - [System.Environment]::SetEnvironmentVariable("windowsVersion", $windowsVersion, "Process") - [System.Environment]::SetEnvironmentVariable("windowsVersion", $windowsVersion, "Machine") - - #resourceid override. - if ([string]::IsNullOrEmpty($env:MAC)) { - if ([string]::IsNullOrEmpty($env:CLUSTER)) { - Write-Output "CLUSTER is empty or not set. Using $env:NODE_NAME as CLUSTER" - [System.Environment]::SetEnvironmentVariable("customResourceId", $env:NODE_NAME, "Process") - [System.Environment]::SetEnvironmentVariable("customResourceId", $env:NODE_NAME, "Machine") - Write-Output "customResourceId=$env:customResourceId" - } - else { - [System.Environment]::SetEnvironmentVariable("customResourceId", $env:CLUSTER, "Process") - [System.Environment]::SetEnvironmentVariable("customResourceId", $env:CLUSTER, "Machine") - Write-Output "customResourceId=$env:customResourceId" - } - } - else { - [System.Environment]::SetEnvironmentVariable("customResourceId", $env:CLUSTER, "Process") - [System.Environment]::SetEnvironmentVariable("customResourceId", $env:CLUSTER, "Machine") - - [System.Environment]::SetEnvironmentVariable("customRegion", $env:AKSREGION, "Process") - [System.Environment]::SetEnvironmentVariable("customRegion", $env:AKSREGION, "Machine") - - # Setting these variables for telegraf - [System.Environment]::SetEnvironmentVariable("AKSREGION", $env:AKSREGION, "Process") - [System.Environment]::SetEnvironmentVariable("AKSREGION", $env:AKSREGION, "Machine") - [System.Environment]::SetEnvironmentVariable("CLUSTER", $env:CLUSTER, "Process") - [System.Environment]::SetEnvironmentVariable("CLUSTER", $env:CLUSTER, "Machine") - [System.Environment]::SetEnvironmentVariable("AZMON_CLUSTER_ALIAS", $env:AZMON_CLUSTER_ALIAS, "Process") - [System.Environment]::SetEnvironmentVariable("AZMON_CLUSTER_ALIAS", $env:AZMON_CLUSTER_ALIAS, "Machine") - - Write-Output "customResourceId=$env:customResourceId" - Write-Output "customRegion=$env:customRegion" - } - - ############### Environment variables for MA {Start} ############### - [System.Environment]::SetEnvironmentVariable("MONITORING_ROLE_INSTANCE", "cloudAgentRoleInstanceIdentity", "Process") - [System.Environment]::SetEnvironmentVariable("MA_RoleEnvironment_OsType", "Windows", "Process") - [System.Environment]::SetEnvironmentVariable("MONITORING_VERSION", "2.0", "Process") - [System.Environment]::SetEnvironmentVariable("MONITORING_ROLE", "cloudAgentRoleIdentity", "Process") - [System.Environment]::SetEnvironmentVariable("MONITORING_IDENTITY", "use_ip_address", "Process") - [System.Environment]::SetEnvironmentVariable("MONITORING_ROLE_INSTANCE", "cloudAgentRoleInstanceIdentity", "Machine") - [System.Environment]::SetEnvironmentVariable("MA_RoleEnvironment_OsType", "Windows", "Machine") - [System.Environment]::SetEnvironmentVariable("MONITORING_VERSION", "2.0", "Machine") - [System.Environment]::SetEnvironmentVariable("MONITORING_ROLE", "cloudAgentRoleIdentity", "Machine") - [System.Environment]::SetEnvironmentVariable("MONITORING_IDENTITY", "use_ip_address", "Machine") - [System.Environment]::SetEnvironmentVariable("MONITORING_USE_GENEVA_CONFIG_SERVICE", "false", "Process") - [System.Environment]::SetEnvironmentVariable("MONITORING_USE_GENEVA_CONFIG_SERVICE", "false", "Machine") - [System.Environment]::SetEnvironmentVariable("SKIP_IMDS_LOOKUP_FOR_LEGACY_AUTH", "true", "Process") - [System.Environment]::SetEnvironmentVariable("SKIP_IMDS_LOOKUP_FOR_LEGACY_AUTH", "true", "Machine") - [System.Environment]::SetEnvironmentVariable("ENABLE_MCS", "true", "Process") - [System.Environment]::SetEnvironmentVariable("ENABLE_MCS", "true", "Machine") - [System.Environment]::SetEnvironmentVariable("MDSD_USE_LOCAL_PERSISTENCY", "false", "Process") - [System.Environment]::SetEnvironmentVariable("MDSD_USE_LOCAL_PERSISTENCY", "false", "Machine") - [System.Environment]::SetEnvironmentVariable("MA_RoleEnvironment_Location", $env:AKSREGION, "Process") - [System.Environment]::SetEnvironmentVariable("MA_RoleEnvironment_ResourceId", $env:CLUSTER, "Process") - [System.Environment]::SetEnvironmentVariable("MCS_CUSTOM_RESOURCE_ID", $env:CLUSTER, "Process") - [System.Environment]::SetEnvironmentVariable("customRegion", $env:AKSREGION, "Process") - [System.Environment]::SetEnvironmentVariable("MA_RoleEnvironment_Location", $env:AKSREGION, "Machine") - [System.Environment]::SetEnvironmentVariable("MA_RoleEnvironment_ResourceId", $env:CLUSTER, "Machine") - [System.Environment]::SetEnvironmentVariable("MCS_CUSTOM_RESOURCE_ID", $env:CLUSTER, "Machine") - [System.Environment]::SetEnvironmentVariable("customRegion", $env:AKSREGION, "Machine") - - - $mcs_endpoint = "https://monitor.azure.com/" - $mcs_globalendpoint = "https://global.handler.control.monitor.azure.com" - $customEnvironment = [System.Environment]::GetEnvironmentVariable("customEnvironment", "process").ToLower() - - switch ($customEnvironment) { - "azurepubliccloud" { - if ($env:AKSREGION.ToLower() -eq "eastus2euap" -or $env:AKSREGION.ToLower() -eq "centraluseuap") { - $mcs_globalendpoint = "https://global.handler.canary.control.monitor.azure.com" - $mcs_endpoint = "https://monitor.azure.com/" - } - else { - $mcs_endpoint = "https://monitor.azure.com/" - $mcs_globalendpoint = "https://global.handler.control.monitor.azure.com" - } - } - "azureusgovernmentcloud" { - $mcs_globalendpoint = "https://global.handler.control.monitor.azure.us" - $mcs_endpoint = "https://monitor.azure.us/" - } - "azurechinacloud" { - $mcs_globalendpoint = "https://global.handler.control.monitor.azure.cn" - $mcs_endpoint = "https://monitor.azure.cn/" - } - "usnat" { - $mcs_globalendpoint = "https://global.handler.control.monitor.azure.eaglex.ic.gov" - $mcs_endpoint = "https://monitor.azure.eaglex.ic.gov/" - } - "ussec" { - $mcs_globalendpoint = "https://global.handler.control.monitor.azure.microsoft.scloud" - $mcs_endpoint = "https://monitor.azure.microsoft.scloud/" - } - default { - Write-Host "Unknown customEnvironment: $customEnvironment, setting mcs endpoint to default azurepubliccloud values" - $mcs_endpoint = "https://monitor.azure.com/" - $mcs_globalendpoint = "https://global.handler.control.monitor.azure.com" - } - } - - [System.Environment]::SetEnvironmentVariable("MCS_AZURE_RESOURCE_ENDPOINT", $mcs_endpoint, "Process") - [System.Environment]::SetEnvironmentVariable("MCS_GLOBAL_ENDPOINT", $mcs_globalendpoint, "Process") - [System.Environment]::SetEnvironmentVariable("MCS_AZURE_RESOURCE_ENDPOINT", $mcs_endpoint, "Machine") - [System.Environment]::SetEnvironmentVariable("MCS_GLOBAL_ENDPOINT", $mcs_globalendpoint, "Machine") - - ############### Environment variables for MA {End} ############### - - if ([string]::IsNullOrEmpty($env:MODE)) { - [System.Environment]::SetEnvironmentVariable("MODE", 'simple', "Process") - [System.Environment]::SetEnvironmentVariable("MODE", 'simple', "Machine") - } - - #set agent config schema version - if (Test-Path -Path '/etc/config/settings/schema-version') { - #trim - $config_schema_version = Get-Content -Path /etc/config/settings/schema-version - #remove all spaces - $config_schema_version = $config_schema_version.trim() - #take first 10 characters - if ($config_schema_version.Length -gt 10) { - $config_schema_version = $config_schema_version.SubString(0, 10) - } - [System.Environment]::SetEnvironmentVariable("AZMON_AGENT_CFG_SCHEMA_VERSION", $config_schema_version, "Process") - [System.Environment]::SetEnvironmentVariable("AZMON_AGENT_CFG_SCHEMA_VERSION", $config_schema_version, "Machine") - } - - #set agent config file version - if (Test-Path -Path '/etc/config/settings/config-version') { - #trim - $config_file_version = Get-Content -Path /etc/config/settings/config-version - #remove all spaces - $config_file_version = $config_file_version.Trim() - #take first 10 characters - if ($config_file_version.Length -gt 10) { - $config_file_version = $config_file_version.Substring(0, 10) - } - [System.Environment]::SetEnvironmentVariable("AZMON_AGENT_CFG_FILE_VERSION", $config_file_version, "Process") - [System.Environment]::SetEnvironmentVariable("AZMON_AGENT_CFG_FILE_VERSION", $config_file_version, "Machine") - } - - switch ($customEnvironment) { - "azurepubliccloud" { - $encodedaikey = [System.Environment]::GetEnvironmentVariable("APPLICATIONINSIGHTS_AUTH_PUBLIC", "process") - $aiendpoint = $null - Write-Host "setting telemetry output to the default azurepubliccloud instance" - } - "azureusgovernmentcloud" { - $encodedaikey = [System.Environment]::GetEnvironmentVariable("APPLICATIONINSIGHTS_AUTH_USGOVERNMENT", "process") - $aiendpoint = "https://dc.applicationinsights.us/v2/track" - Write-Host "setting telemetry output to the azureusgovernmentcloud instance" - } - "azurechinacloud" { - $encodedaikey = [System.Environment]::GetEnvironmentVariable("APPLICATIONINSIGHTS_AUTH_CHINACLOUD", "process") - $aiendpoint = "https://dc.applicationinsights.azure.cn/v2/track" - Write-Host "setting telemetry output to the azurechinacloud instance" - } - "usnat" { - $encodedaikey = [System.Environment]::GetEnvironmentVariable("APPLICATIONINSIGHTS_AUTH_USNAT", "process") - $aiendpoint = "https://dc.applicationinsights.azure.eaglex.ic.gov/v2/track" - Write-Host "setting telemetry output to the usnat instance" - } - "ussec" { - $encodedaikey = [System.Environment]::GetEnvironmentVariable("APPLICATIONINSIGHTS_AUTH_USSEC", "process") - $aiendpoint = "https://dc.applicationinsights.azure.microsoft.scloud/v2/track" - Write-Host "setting telemetry output to the ussec instance" - } - default { - Write-Host "Unknown customEnvironment: $customEnvironment, setting telemetry output to the default azurepubliccloud instance" - $encodedaikey = [System.Environment]::GetEnvironmentVariable("APPLICATIONINSIGHTS_AUTH_PUBLIC", "process") - $aiendpoint = $null - } - } - - [Environment]::SetEnvironmentVariable("APPLICATIONINSIGHTS_AUTH", $encodedaikey, "Process") - [Environment]::SetEnvironmentVariable("APPLICATIONINSIGHTS_AUTH", $encodedaikey, "Machine") - if ($null -ne $aiendpoint) { - [Environment]::SetEnvironmentVariable("APPLICATIONINSIGHTS_ENDPOINT", $aiendpoint, "Process") - [Environment]::SetEnvironmentVariable("APPLICATIONINSIGHTS_ENDPOINT", $aiendpoint, "Machine") - } - - # Delete this when telegraf is removed - $aiKeyDecoded = [System.Text.Encoding]::UTF8.GetString([System.Convert]::FromBase64String($env:APPLICATIONINSIGHTS_AUTH)) - [System.Environment]::SetEnvironmentVariable("TELEMETRY_APPLICATIONINSIGHTS_KEY", $aiKeyDecoded, "Process") - [System.Environment]::SetEnvironmentVariable("TELEMETRY_APPLICATIONINSIGHTS_KEY", $aiKeyDecoded, "Machine") - - # run config parser - ruby /opt/microsoft/configmapparser/tomlparser-prometheus-collector-settings.rb - - if (Test-Path -Path '/opt/microsoft/configmapparser/config_prometheus_collector_settings_env_var') { - foreach ($line in Get-Content /opt/microsoft/configmapparser/config_prometheus_collector_settings_env_var) { - if ($line.Contains('=')) { - $key = ($line -split '=')[0]; - $value = ($line -split '=')[1]; - [System.Environment]::SetEnvironmentVariable($key, $value, "Process") - [System.Environment]::SetEnvironmentVariable($key, $value, "Machine") - } - } - } - - # Parse the settings for default scrape configs - ruby /opt/microsoft/configmapparser/tomlparser-default-scrape-settings.rb - if (Test-Path -Path '/opt/microsoft/configmapparser/config_default_scrape_settings_env_var') { - foreach ($line in Get-Content /opt/microsoft/configmapparser/config_default_scrape_settings_env_var) { - if ($line.Contains('=')) { - $key = ($line -split '=')[0]; - $value = ($line -split '=')[1]; - [System.Environment]::SetEnvironmentVariable($key, $value, "Process") - [System.Environment]::SetEnvironmentVariable($key, $value, "Machine") - } - } - } - - # Parse the settings for debug mode - ruby /opt/microsoft/configmapparser/tomlparser-debug-mode.rb - if (Test-Path -Path '/opt/microsoft/configmapparser/config_debug_mode_env_var') { - foreach ($line in Get-Content /opt/microsoft/configmapparser/config_debug_mode_env_var) { - if ($line.Contains('=')) { - $key = ($line -split '=')[0]; - $value = ($line -split '=')[1]; - [System.Environment]::SetEnvironmentVariable($key, $value, "Process") - [System.Environment]::SetEnvironmentVariable($key, $value, "Machine") - } - } - } - - # Parse the settings for default targets metrics keep list config - ruby /opt/microsoft/configmapparser/tomlparser-default-targets-metrics-keep-list.rb - - # Parse the settings for default-targets-scrape-interval-settings config - ruby /opt/microsoft/configmapparser/tomlparser-scrape-interval.rb - - # Merge default anf custom prometheus config - ruby /opt/microsoft/configmapparser/prometheus-config-merger.rb - - [System.Environment]::SetEnvironmentVariable("AZMON_INVALID_CUSTOM_PROMETHEUS_CONFIG", "false", "Process") - [System.Environment]::SetEnvironmentVariable("AZMON_INVALID_CUSTOM_PROMETHEUS_CONFIG", "false", "Machine") - - [System.Environment]::SetEnvironmentVariable("CONFIG_VALIDATOR_RUNNING_IN_AGENT", "true", "Process") - [System.Environment]::SetEnvironmentVariable("CONFIG_VALIDATOR_RUNNING_IN_AGENT", "true", "Machine") - - if (Test-Path -Path '/opt/promMergedConfig.yml') { - C:\opt\promconfigvalidator --config "/opt/promMergedConfig.yml" --output "/opt/microsoft/otelcollector/collector-config.yml" --otelTemplate "/opt/microsoft/otelcollector/collector-config-template.yml" - if ( (!($?)) -or (!(Test-Path -Path "/opt/microsoft/otelcollector/collector-config.yml" ))) { - Write-Output "prom-config-validator::Prometheus custom config validation failed. The custom config will not be used" - # This env variable is used to indicate that the prometheus custom config was invalid and we fall back to defaults, used for telemetry - [System.Environment]::SetEnvironmentVariable("AZMON_INVALID_CUSTOM_PROMETHEUS_CONFIG", "true", "Process") - [System.Environment]::SetEnvironmentVariable("AZMON_INVALID_CUSTOM_PROMETHEUS_CONFIG", "true", "Machine") - if (Test-Path -Path '/opt/defaultsMergedConfig.yml') { - Write-Output "prom-config-validator::Running validator on just default scrape configs" - C:\opt\promconfigvalidator --config "/opt/defaultsMergedConfig.yml" --output "/opt/collector-config-with-defaults.yml" --otelTemplate "/opt/microsoft/otelcollector/collector-config-template.yml" - if ( (!($?)) -or (!(Test-Path -Path "/opt/collector-config-with-defaults.yml" ))) { - Write-Output "prom-config-validator::Prometheus default scrape config validation failed. No scrape configs will be used" - } - else { - Copy-Item "/opt/collector-config-with-defaults.yml" "/opt/microsoft/otelcollector/collector-config-default.yml" - } - } - [System.Environment]::SetEnvironmentVariable("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", "Process") - [System.Environment]::SetEnvironmentVariable("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", "Machine") - } - } - elseif (Test-Path -Path '/opt/defaultsMergedConfig.yml') { - Write-Output "prom-config-validator::No custom prometheus config found. Only using default scrape configs" - C:\opt\promconfigvalidator --config "/opt/defaultsMergedConfig.yml" --output "/opt/collector-config-with-defaults.yml" --otelTemplate "/opt/microsoft/otelcollector/collector-config-template.yml" - if ( (!($?)) -or (!(Test-Path -Path "/opt/collector-config-with-defaults.yml" ))) { - Write-Output "prom-config-validator::Prometheus default scrape config validation failed. No scrape configs will be used" - } - else { - Write-Output "prom-config-validator::Prometheus default scrape config validation succeeded, using this as collector config" - Copy-Item "/opt/collector-config-with-defaults.yml" "/opt/microsoft/otelcollector/collector-config-default.yml" - } - [System.Environment]::SetEnvironmentVariable("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", "Process") - [System.Environment]::SetEnvironmentVariable("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", "Machine") - } - else { - # This else block is needed, when there is no custom config mounted as config map or default configs enabled - Write-Output "prom-config-validator::No custom config or default scrape configs enabled. No scrape configs will be used" - [System.Environment]::SetEnvironmentVariable("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", "Process") - [System.Environment]::SetEnvironmentVariable("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", "Machine") - } - - if (Test-Path -Path '/opt/microsoft/prom_config_validator_env_var') { - foreach ($line in Get-Content /opt/microsoft/prom_config_validator_env_var) { - if ($line.Contains('=')) { - $key = ($line -split '=')[0]; - $value = ($line -split '=')[1]; - [System.Environment]::SetEnvironmentVariable($key, $value, "Process") - [System.Environment]::SetEnvironmentVariable($key, $value, "Machine") - } - } - } - - # #start cron daemon for logrotate - # service cron restart - - #start otelcollector - Write-Output "Use default prometheus config: $env:AZMON_USE_DEFAULT_PROMETHEUS_CONFIG" - - #get controller kind in lowercase, trimmed - $controllerType = $env:CONTROLLER_TYPE - $controllerType = $controllerType.Trim() - $cluster_override = $env:CLUSTER_OVERRIDE - if ($controllerType -eq "replicaset") { - if ($cluster_override -eq "true") { - $meConfigFile = "/opt/metricextension/me_internal.config" - } - else { - $meConfigFile = "/opt/metricextension/me.config" - } - } - else { - if ($cluster_override -eq "true") { - $meConfigFile = "/opt/metricextension/me_ds_internal_win.config" - } - else { - $meConfigFile = "/opt/metricextension/me_ds_win.config" - } - } - [System.Environment]::SetEnvironmentVariable("ME_CONFIG_FILE", $meConfigFile, "Process") - [System.Environment]::SetEnvironmentVariable("ME_CONFIG_FILE", $meConfigFile, "Machine") - - - # Set ME Config file - if (![string]::IsNullOrEmpty($env:CONTROLLER_TYPE)) { - [System.Environment]::SetEnvironmentVariable("ME_CONFIG_FILE", $me_config_file, "Process") - [System.Environment]::SetEnvironmentVariable("ME_CONFIG_FILE", $me_config_file, "Machine") - } - - # Set variables for telegraf (runs in machine environment) - [System.Environment]::SetEnvironmentVariable("AGENT_VERSION", $env:AGENT_VERSION, "Machine") - [System.Environment]::SetEnvironmentVariable("customResourceId", $env:customResourceId, "Machine") - [System.Environment]::SetEnvironmentVariable("NODE_NAME", $env:NODE_NAME, "Machine") - [System.Environment]::SetEnvironmentVariable("NODE_IP", $env:NODE_IP, "Machine") - [System.Environment]::SetEnvironmentVariable("MODE", $env:MODE, "Machine") - [System.Environment]::SetEnvironmentVariable("CONTROLLER_TYPE", $env:CONTROLLER_TYPE, "Machine") - [System.Environment]::SetEnvironmentVariable("POD_NAMESPACE", $env:POD_NAMESPACE, "Machine") - [System.Environment]::SetEnvironmentVariable("POD_NAME", $env:POD_NAME, "Machine") - [System.Environment]::SetEnvironmentVariable("OS_TYPE", $env:OS_TYPE, "Machine") - [System.Environment]::SetEnvironmentVariable("CONTAINER_CPU_LIMIT", $env:CONTAINER_CPU_LIMIT, "Machine") - [System.Environment]::SetEnvironmentVariable("CONTAINER_MEMORY_LIMIT", $env:CONTAINER_MEMORY_LIMIT, "Machine") - -} - -function Start-Fluentbit { - # Run fluent-bit service first so that we do not miss any logs being forwarded by the fluentd service and telegraf service. - # Run fluent-bit as a background job. Switch this to a windows service once fluent-bit supports natively running as a windows service - Write-Host "Starting fluent-bit" - Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\fluent-bit\bin\fluent-bit.exe" -ArgumentList @("-c", "C:\opt\fluent-bit\fluent-bit-windows.conf", "-e", "C:\opt\fluent-bit\bin\out_appinsights.so") } - # C:\opt\fluent-bit\bin\td-agent-bit.exe -c "C:\opt\fluent-bit\fluent-bit-windows.conf" -e "C:\opt\fluent-bit\bin\out_appinsights.so" -} - -function Start-Telegraf { - Write-Host "Installing telegraf service" - /opt/telegraf/telegraf.exe --service install --config "/opt/telegraf/telegraf-prometheus-collector-windows.conf" > $null - - # Setting delay auto start for telegraf since there have been known issues with windows server and telegraf - - # https://github.com/influxdata/telegraf/issues/4081 - # https://github.com/influxdata/telegraf/issues/3601 - try { - $serverName = [System.Environment]::GetEnvironmentVariable("POD_NAME", "process") - if (![string]::IsNullOrEmpty($serverName)) { - sc.exe \\$serverName config telegraf start= delayed-auto - Write-Host "Successfully set delayed start for telegraf" - - } - else { - Write-Host "Failed to get environment variable POD_NAME to set delayed telegraf start" - } - } - catch { - $e = $_.Exception - Write-Host $e - Write-Host "exception occured in delayed telegraf start.. continuing without exiting" - } - Write-Host "Running telegraf service in test mode" - /opt/telegraf/telegraf.exe --config "/opt/telegraf/telegraf-prometheus-collector-windows.conf" --test - Write-Host "Starting telegraf service" - # C:\opt\telegraf\telegraf.exe --service start - /opt/telegraf/telegraf.exe --config "/opt/telegraf/telegraf-prometheus-collector-windows.conf" --service start - - # Trying to start telegraf again if it did not start due to fluent bit not being ready at startup - Get-Service telegraf | findstr Running - if ($? -eq $false) { - Write-Host "trying to start telegraf in again in 30 seconds, since fluentbit might not have been ready..." - Start-Sleep -s 30 - /opt/telegraf/telegraf.exe --service start - } -} -function Start-OTEL-Collector { - if ($env:AZMON_USE_DEFAULT_PROMETHEUS_CONFIG -eq "true") { - Write-Output "Starting otelcollector with only default scrape configs enabled" - Start-Job -ScriptBlock { Start-Process -RedirectStandardError /opt/microsoft/otelcollector/collector-log.txt -NoNewWindow -FilePath "/opt/microsoft/otelcollector/otelcollector.exe" -ArgumentList @("--config", "/opt/microsoft/otelcollector/collector-config-default.yml") } > $null - } - else { - Write-Output "Starting otelcollector" - Start-Job -ScriptBlock { Start-Process -RedirectStandardError /opt/microsoft/otelcollector/collector-log.txt -NoNewWindow -FilePath "/opt/microsoft/otelcollector/otelcollector.exe" -ArgumentList @("--config", "/opt/microsoft/otelcollector/collector-config.yml") } > $null - } - tasklist /fi "imagename eq otelcollector.exe" /fo "table" | findstr otelcollector -} - -function Set-CertificateForME { - # Make a copy of the mounted akv directory to see if it changes - mkdir -p /opt/akv-copy > $null - Copy-Item -r /etc/config/settings/akv /opt/akv-copy - - Get-ChildItem "C:\etc\config\settings\akv\" | Foreach-Object { - # check if child is a file and not a directory - $filePath = $_.FullName - if (Test-Path $filePath -PathType Leaf) { - $filePath = $_.FullName - $file = Get-Content $filePath -Encoding Byte - if (($null -ne $file)) { - Write-Output "Importing PFX cert : $filePath" - Import-PfxCertificate -FilePath $filePath -CertStoreLocation Cert:\CurrentUser\My > $null - } - } - } -} - -function Start-FileSystemWatcher { - Start-Process powershell -NoNewWindow /opt/scripts/filesystemwatcher.ps1 > $null -} - -#start Windows AMA -function Start-MA { - Write-Output "Starting MA" - Start-Job -ScriptBlock { Start-Process -NoNewWindow -FilePath "C:\opt\genevamonitoringagent\genevamonitoringagent\Monitoring\Agent\MonAgentLauncher.exe" -ArgumentList @("-useenv") } -} - -function Start-ME { - Write-Output "Starting Metrics Extension" - Write-Output "ME_CONFIG_FILE = $env:ME_CONFIG_FILE" - Write-Output "AZMON_DEFAULT_METRIC_ACCOUNT_NAME = $env:AZMON_DEFAULT_METRIC_ACCOUNT_NAME" - Start-Job -ScriptBlock { - $me_config_file = $env:ME_CONFIG_FILE - $AZMON_DEFAULT_METRIC_ACCOUNT_NAME = $env:AZMON_DEFAULT_METRIC_ACCOUNT_NAME - $ME_ADDITIONAL_FLAGS = $env:ME_ADDITIONAL_FLAGS - if ($env:MAC -eq $true) { - if (![string]::IsNullOrEmpty($ME_ADDITIONAL_FLAGS)) { - Start-Process -NoNewWindow -FilePath "/opt/metricextension/MetricsExtension/MetricsExtension.Native.exe" -ArgumentList @("-Logger", "File", "-LogLevel", "Debug", "-LocalControlChannel", "-TokenSource", "AMCS", "-DataDirectory", "C:\opt\genevamonitoringagent\datadirectory\mcs\metricsextension\", "-Input", "otlp_grpc_prom", "-ConfigOverridesFilePath", $me_config_file, $ME_ADDITIONAL_FLAGS) > $null - } - else { - Start-Process -NoNewWindow -FilePath "/opt/metricextension/MetricsExtension/MetricsExtension.Native.exe" -ArgumentList @("-Logger", "File", "-LogLevel", "Debug", "-LocalControlChannel", "-TokenSource", "AMCS", "-DataDirectory", "C:\opt\genevamonitoringagent\datadirectory\mcs\metricsextension\", "-Input", "otlp_grpc_prom", "-ConfigOverridesFilePath", $me_config_file) > $null - # /opt/metricextension/MetricsExtension/MetricsExtension.Native.exe -Logger Console -LogLevel Info -LocalControlChannel -TokenSource AMCS -DataDirectory C:\opt\genevamonitoringagent\datadirectory\mcs\metricsextension\ -Input otlp_grpc_prom -ConfigOverridesFilePath '/opt/metricextension/me_ds_win.config' - } - } - else { - if (![string]::IsNullOrEmpty($ME_ADDITIONAL_FLAGS)) { - Start-Process -NoNewWindow -FilePath "/opt/metricextension/MetricsExtension/MetricsExtension.Native.exe" -ArgumentList @("-Logger", "File", "-LogLevel", "Info", "-DataDirectory", ".\", "-Input", "otlp_grpc_prom", "-MonitoringAccount", $AZMON_DEFAULT_METRIC_ACCOUNT_NAME, "-ConfigOverridesFilePath", $me_config_file, $ME_ADDITIONAL_FLAGS) > $null - } - else { - Start-Process -NoNewWindow -FilePath "/opt/metricextension/MetricsExtension/MetricsExtension.Native.exe" -ArgumentList @("-Logger", "File", "-LogLevel", "Info", "-DataDirectory", ".\", "-Input", "otlp_grpc_prom", "-MonitoringAccount", $AZMON_DEFAULT_METRIC_ACCOUNT_NAME, "-ConfigOverridesFilePath", $me_config_file) > $null - } - } - } - tasklist /fi "imagename eq MetricsExtension.Native.exe" /fo "table" | findstr MetricsExtension -} - -Start-Transcript -Path main.txt -if ($env:MAC -ne $true) { - Set-CertificateForME -} -Set-EnvironmentVariablesAndConfigParser -Start-Fluentbit -Start-Telegraf -Start-OTEL-Collector -if ($env:MAC -eq $true) { - Start-MA - # "Waiting for 60s for MA to get the config and put them in place for ME" - Start-Sleep 60 -} -Start-ME -# Waiting 60 more seconds since C:\opt\genevamonitoringagent\datadirectory\mcs\metricsextension needs to be created -Start-Sleep 60 -Start-FileSystemWatcher - -$epochTimeNow = [int](Get-Date).Subtract([datetime]'1970-01-01T00:00:00Z').TotalSeconds -Set-Content -Path /opt/microsoft/liveness/azmon-container-start-time $epochTimeNow - -# Notepad.exe | Out-Null -Write-Output "Starting ping to keep the container running" -ping -t 127.0.0.1 | Out-Null diff --git a/otelcollector/build/windows/scripts/setup.ps1 b/otelcollector/build/windows/scripts/setup.ps1 index 6d8db0783..75299e2e8 100644 --- a/otelcollector/build/windows/scripts/setup.ps1 +++ b/otelcollector/build/windows/scripts/setup.ps1 @@ -72,15 +72,6 @@ catch { } Write-Host ('Finished downloading Telegraf') ############################################################################################ -#Remove gemfile.lock for http_parser gem 0.6.0 -#see - https://github.com/fluent/fluentd/issues/3374 https://github.com/tmm1/http_parser.rb/issues/70 -$gemfile = "\ruby26\lib\ruby\gems\2.6.0\gems\http_parser.rb-0.6.0\Gemfile.lock" -$gemfileFullPath = $Env:SYSTEMDRIVE + "\" + $gemfile -If (Test-Path -Path $gemfile ) { - Write-Host ("Renaming unused gemfile.lock for http_parser 0.6.0") - Rename-Item -Path $gemfileFullPath -NewName "renamed_Gemfile_lock.renamed" -} -############################################################################################ Write-Host ('Installing GenevaMonitoringAgent'); try { $genevamonitoringagentUri = 'https://github.com/Azure/prometheus-collector/releases/download/Promtheus-MA-Windows-4.1.2024/GenevaMonitoringAgent.46.15.4.zip' diff --git a/otelcollector/configmapparser/ConfigParseErrorLogger.rb b/otelcollector/configmapparser/ConfigParseErrorLogger.rb deleted file mode 100644 index f7fe5b578..000000000 --- a/otelcollector/configmapparser/ConfigParseErrorLogger.rb +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -class ConfigParseErrorLogger - require "json" - require "colorize" - - def initialize - end - - class << self - def logError(prefix, message) - begin - errorMessage = "#{prefix}::error::#{message}" - STDERR.puts errorMessage.red - rescue => errorStr - puts "#{prefix}::Error in ConfigParserErrorLogger::logError: #{errorStr}".red - end - end - - def logWarning(prefix, message) - begin - puts "#{prefix}::warning::#{message}".yellow - rescue => errorStr - puts "#{prefix}::Error in ConfigParserErrorLogger::logWarning: #{errorStr}".red - end - end - - def logSection(prefix, message) - begin - puts message.center(86, "*").cyan - rescue => errorStr - puts "#{prefix}::Error in ConfigParserErrorLogger::logSection: #{errorStr}".red - end - end - - def log(prefix, message) - begin - puts "#{prefix}::#{message}" - rescue => errorStr - puts "#{prefix}::Error in ConfigParserErrorLogger::log: #{errorStr}".red - end - end - end -end diff --git a/otelcollector/configmapparser/prometheus-config-merger-with-operator.rb b/otelcollector/configmapparser/prometheus-config-merger-with-operator.rb deleted file mode 100644 index 97b3fa73d..000000000 --- a/otelcollector/configmapparser/prometheus-config-merger-with-operator.rb +++ /dev/null @@ -1,643 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -require "tomlrb" -require "deep_merge" -require "yaml" -require_relative "ConfigParseErrorLogger" - -LOGGING_PREFIX = "prometheus-config-merger-with-operator" -@configMapMountPath = "/etc/config/settings/prometheus/prometheus-config" -@promMergedConfigPath = "/opt/promMergedConfig.yml" -@mergedDefaultConfigPath = "/opt/defaultsMergedConfig.yml" -@replicasetControllerType = "replicaset" -@daemonsetControllerType = "daemonset" -@configReaderSidecarContainerType = "configreadersidecar" -@supportedSchemaVersion = true -@defaultPromConfigPathPrefix = "/opt/microsoft/otelcollector/default-prom-configs/" -@regexHashFile = "/opt/microsoft/configmapparser/config_def_targets_metrics_keep_list_hash" -@regexHash = {} -@sendDSUpMetric = false -@intervalHashFile = "/opt/microsoft/configmapparser/config_def_targets_scrape_intervals_hash" -@intervalHash = {} - -@kubeletDefaultFileRsSimple = @defaultPromConfigPathPrefix + "kubeletDefaultRsSimple.yml" -@kubeletDefaultFileRsAdvanced = @defaultPromConfigPathPrefix + "kubeletDefaultRsAdvanced.yml" -@kubeletDefaultFileDs = @defaultPromConfigPathPrefix + "kubeletDefaultDs.yml" -@kubeletDefaultFileRsAdvancedWindowsDaemonset = @defaultPromConfigPathPrefix + "kubeletDefaultRsAdvancedWindowsDaemonset.yml" -@corednsDefaultFile = @defaultPromConfigPathPrefix + "corednsDefault.yml" -@cadvisorDefaultFileRsSimple = @defaultPromConfigPathPrefix + "cadvisorDefaultRsSimple.yml" -@cadvisorDefaultFileRsAdvanced = @defaultPromConfigPathPrefix + "cadvisorDefaultRsAdvanced.yml" -@cadvisorDefaultFileDs = @defaultPromConfigPathPrefix + "cadvisorDefaultDs.yml" -@kubeproxyDefaultFile = @defaultPromConfigPathPrefix + "kubeproxyDefault.yml" -@apiserverDefaultFile = @defaultPromConfigPathPrefix + "apiserverDefault.yml" -@kubestateDefaultFile = @defaultPromConfigPathPrefix + "kubestateDefault.yml" -@nodeexporterDefaultFileRsSimple = @defaultPromConfigPathPrefix + "nodeexporterDefaultRsSimple.yml" -@nodeexporterDefaultFileRsAdvanced = @defaultPromConfigPathPrefix + "nodeexporterDefaultRsAdvanced.yml" -@nodeexporterDefaultFileDs = @defaultPromConfigPathPrefix + "nodeexporterDefaultDs.yml" -@prometheusCollectorHealthDefaultFile = @defaultPromConfigPathPrefix + "prometheusCollectorHealth.yml" -@windowsexporterDefaultRsSimpleFile = @defaultPromConfigPathPrefix + "windowsexporterDefaultRsSimple.yml" -@windowsexporterDefaultDsFile = @defaultPromConfigPathPrefix + "windowsexporterDefaultDs.yml" -@windowskubeproxyDefaultFileRsSimpleFile = @defaultPromConfigPathPrefix + "windowskubeproxyDefaultRsSimple.yml" -@windowskubeproxyDefaultDsFile = @defaultPromConfigPathPrefix + "windowskubeproxyDefaultDs.yml" -@podannotationsDefaultFile = @defaultPromConfigPathPrefix + "podannotationsDefault.yml" -@windowskubeproxyDefaultRsAdvancedFile = @defaultPromConfigPathPrefix + "windowskubeproxyDefaultRsAdvanced.yml" -@kappiebasicDefaultFileDs = @defaultPromConfigPathPrefix + "kappieBasicDefaultDs.yml" - -@networkobservabilityRetinaDefaultFileDs = @defaultPromConfigPathPrefix + "networkobservabilityRetinaDefaultDs.yml" -@networkobservabilityHubbleDefaultFileDs = @defaultPromConfigPathPrefix + "networkobservabilityHubbleDefaultDs.yml" -@networkobservabilityCiliumDefaultFileDs = @defaultPromConfigPathPrefix + "networkobservabilityCiliumDefaultDs.yml" - - -def parseConfigMap - begin - # Check to see if config map is created - if (File.file?(@configMapMountPath)) - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Custom prometheus config exists") - config = File.read(@configMapMountPath) - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Successfully parsed configmap for prometheus config") - return config - else - ConfigParseErrorLogger.logWarning(LOGGING_PREFIX, "Custom prometheus config does not exist, using only default scrape targets if they are enabled") - return "" - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while parsing configmap for prometheus config: #{errorStr}. Custom prometheus config will not be used. Please check configmap for errors") - return "" - end -end - -def loadRegexHash - begin - @regexHash = YAML.load_file(@regexHashFile) - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception in loadRegexHash for prometheus config: #{errorStr}. Keep list regexes will not be used") - end -end - -def loadIntervalHash - begin - @intervalHash = YAML.load_file(@intervalHashFile) - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception in loadIntervalHash for prometheus config: #{errorStr}. Scrape interval will not be used") - end -end - -def isConfigReaderSidecar - if !ENV["CONTAINER_TYPE"].nil? && !ENV["CONTAINER_TYPE"].empty? - currentContainerType = ENV["CONTAINER_TYPE"].strip.downcase - if !currentContainerType.nil? && currentContainerType == @configReaderSidecarContainerType - return true - end - end - return false -end - -def UpdateScrapeIntervalConfig(yamlConfigFile, scrapeIntervalSetting) - begin - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Updating scrape interval config for #{yamlConfigFile}") - config = YAML.load(File.read(yamlConfigFile)) - scrapeIntervalConfig = scrapeIntervalSetting - - # Iterate through each scrape config and update scrape interval config - if !config.nil? - scrapeConfigs = config["scrape_configs"] - if !scrapeConfigs.nil? && !scrapeConfigs.empty? - scrapeConfigs.each { |scfg| - scrapeCfgs = scfg["scrape_interval"] - if !scrapeCfgs.nil? - scfg["scrape_interval"] = scrapeIntervalConfig - end - } - cfgYamlWithScrapeConfig = YAML::dump(config) - File.open(yamlConfigFile, "w") { |file| file.puts cfgYamlWithScrapeConfig } - end - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while updating scrape interval config in default target file - #{yamlConfigFile} : #{errorStr}. The Scrape interval will not be used") - end -end - -def AppendMetricRelabelConfig(yamlConfigFile, keepListRegex) - begin - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Adding keep list regex or minimal ingestion regex for #{yamlConfigFile}") - config = YAML.load(File.read(yamlConfigFile)) - keepListMetricRelabelConfig = [{ "source_labels" => ["__name__"], "action" => "keep", "regex" => keepListRegex }] - - # Iterate through each scrape config and append metric relabel config for keep list - if !config.nil? - scrapeConfigs = config["scrape_configs"] - if !scrapeConfigs.nil? && !scrapeConfigs.empty? - scrapeConfigs.each { |scfg| - metricRelabelCfgs = scfg["metric_relabel_configs"] - if metricRelabelCfgs.nil? - scfg["metric_relabel_configs"] = keepListMetricRelabelConfig - else - scfg["metric_relabel_configs"] = metricRelabelCfgs.concat(keepListMetricRelabelConfig) - end - } - cfgYamlWithMetricRelabelConfig = YAML::dump(config) - File.open(yamlConfigFile, "w") { |file| file.puts cfgYamlWithMetricRelabelConfig } - end - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while appending metric relabel config in default target file - #{yamlConfigFile} : #{errorStr}. The keep list regex will not be used") - end -end - -def AppendRelabelConfig(yamlConfigFile, relabelConfig, keepRegex) - begin - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Adding relabel config for #{yamlConfigFile}") - config = YAML.load(File.read(yamlConfigFile)) - - # Iterate through each scrape config and append metric relabel config for keep list - if !config.nil? - scrapeConfigs = config["scrape_configs"] - if !scrapeConfigs.nil? && !scrapeConfigs.empty? - scrapeConfigs.each { |scfg| - relabelCfgs = scfg["relabel_configs"] - if relabelCfgs.nil? - scfg["relabel_configs"] = relabelConfig - else - scfg["relabel_configs"] = relabelCfgs.concat(relabelConfig) - end - } - cfgYamlWithRelabelConfig = YAML::dump(config) - File.open(yamlConfigFile, "w") { |file| file.puts cfgYamlWithRelabelConfig } - end - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while appending relabel config in default target file - #{yamlConfigFile} : #{errorStr}. The keep list regex will not be used") - end -end - -# Get the list of default configs to be included in the otel's prometheus config -def populateDefaultPrometheusConfig - begin - # check if running in daemonset or replicaset - currentControllerType = "" - if !ENV["CONTROLLER_TYPE"].nil? && !ENV["CONTROLLER_TYPE"].empty? - currentControllerType = ENV["CONTROLLER_TYPE"].strip.downcase - end - advancedMode = false #default is false - windowsDaemonset = false #default is false - - # get current mode (advanced or not...) - if !ENV["MODE"].nil? && !ENV["MODE"].empty? - currentMode = ENV["MODE"].strip.downcase - if currentMode == "advanced" - advancedMode = true - end - end - - # get if windowsdaemonset is enabled or not (ie. WINMODE env = advanced or not...) - if !ENV["WINMODE"].nil? && !ENV["WINMODE"].empty? - winMode = ENV["WINMODE"].strip.downcase - if winMode == "advanced" - windowsDaemonset = true - end - end - - defaultConfigs = [] - if !ENV["AZMON_PROMETHEUS_KUBELET_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_KUBELET_SCRAPING_ENABLED"].downcase == "true" - kubeletMetricsKeepListRegex = @regexHash["KUBELET_METRICS_KEEP_LIST_REGEX"] - kubeletScrapeInterval = @intervalHash["KUBELET_SCRAPE_INTERVAL"] - if (isConfigReaderSidecar || currentControllerType == @replicasetControllerType) - if advancedMode == false - UpdateScrapeIntervalConfig(@kubeletDefaultFileRsSimple, kubeletScrapeInterval) - if !kubeletMetricsKeepListRegex.nil? && !kubeletMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@kubeletDefaultFileRsSimple, kubeletMetricsKeepListRegex) - end - defaultConfigs.push(@kubeletDefaultFileRsSimple) - elsif windowsDaemonset == true && @sendDSUpMetric == true - UpdateScrapeIntervalConfig(@kubeletDefaultFileRsAdvancedWindowsDaemonset, kubeletScrapeInterval) - defaultConfigs.push(@kubeletDefaultFileRsAdvancedWindowsDaemonset) - elsif @sendDSUpMetric == true - UpdateScrapeIntervalConfig(@kubeletDefaultFileRsAdvanced, kubeletScrapeInterval) - defaultConfigs.push(@kubeletDefaultFileRsAdvanced) - end - else - if advancedMode == true && currentControllerType == @daemonsetControllerType && (windowsDaemonset == true || ENV["OS_TYPE"].downcase == "linux") - UpdateScrapeIntervalConfig(@kubeletDefaultFileDs, kubeletScrapeInterval) - if !kubeletMetricsKeepListRegex.nil? && !kubeletMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@kubeletDefaultFileDs, kubeletMetricsKeepListRegex) - end - contents = File.read(@kubeletDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - contents = contents.gsub("$$OS_TYPE$$", ENV["OS_TYPE"]) - File.open(@kubeletDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@kubeletDefaultFileDs) - end - end - end - if !ENV["AZMON_PROMETHEUS_COREDNS_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_COREDNS_SCRAPING_ENABLED"].downcase == "true" && (isConfigReaderSidecar || currentControllerType == @replicasetControllerType) - corednsMetricsKeepListRegex = @regexHash["COREDNS_METRICS_KEEP_LIST_REGEX"] - corednsScrapeInterval = @intervalHash["COREDNS_SCRAPE_INTERVAL"] - UpdateScrapeIntervalConfig(@corednsDefaultFile, corednsScrapeInterval) - if !corednsMetricsKeepListRegex.nil? && !corednsMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@corednsDefaultFile, corednsMetricsKeepListRegex) - end - defaultConfigs.push(@corednsDefaultFile) - end - if !ENV["AZMON_PROMETHEUS_CADVISOR_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_CADVISOR_SCRAPING_ENABLED"].downcase == "true" - cadvisorMetricsKeepListRegex = @regexHash["CADVISOR_METRICS_KEEP_LIST_REGEX"] - cadvisorScrapeInterval = @intervalHash["CADVISOR_SCRAPE_INTERVAL"] - if (isConfigReaderSidecar || currentControllerType == @replicasetControllerType) - if advancedMode == false - UpdateScrapeIntervalConfig(@cadvisorDefaultFileRsSimple, cadvisorScrapeInterval) - if !cadvisorMetricsKeepListRegex.nil? && !cadvisorMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@cadvisorDefaultFileRsSimple, cadvisorMetricsKeepListRegex) - end - defaultConfigs.push(@cadvisorDefaultFileRsSimple) - elsif @sendDSUpMetric == true - UpdateScrapeIntervalConfig(@cadvisorDefaultFileRsAdvanced, cadvisorScrapeInterval) - defaultConfigs.push(@cadvisorDefaultFileRsAdvanced) - end - else - if advancedMode == true && ENV["OS_TYPE"].downcase == "linux" && currentControllerType == @daemonsetControllerType - UpdateScrapeIntervalConfig(@cadvisorDefaultFileDs, cadvisorScrapeInterval) - if !cadvisorMetricsKeepListRegex.nil? && !cadvisorMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@cadvisorDefaultFileDs, cadvisorMetricsKeepListRegex) - end - contents = File.read(@cadvisorDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@cadvisorDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@cadvisorDefaultFileDs) - end - end - end - if !ENV["AZMON_PROMETHEUS_KUBEPROXY_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_KUBEPROXY_SCRAPING_ENABLED"].downcase == "true" && (isConfigReaderSidecar || currentControllerType == @replicasetControllerType) - kubeproxyMetricsKeepListRegex = @regexHash["KUBEPROXY_METRICS_KEEP_LIST_REGEX"] - kubeproxyScrapeInterval = @intervalHash["KUBEPROXY_SCRAPE_INTERVAL"] - UpdateScrapeIntervalConfig(@kubeproxyDefaultFile, kubeproxyScrapeInterval) - if !kubeproxyMetricsKeepListRegex.nil? && !kubeproxyMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@kubeproxyDefaultFile, kubeproxyMetricsKeepListRegex) - end - defaultConfigs.push(@kubeproxyDefaultFile) - end - if !ENV["AZMON_PROMETHEUS_APISERVER_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_APISERVER_SCRAPING_ENABLED"].downcase == "true" && (isConfigReaderSidecar || currentControllerType == @replicasetControllerType) - apiserverMetricsKeepListRegex = @regexHash["APISERVER_METRICS_KEEP_LIST_REGEX"] - apiserverScrapeInterval = @intervalHash["APISERVER_SCRAPE_INTERVAL"] - UpdateScrapeIntervalConfig(@apiserverDefaultFile, apiserverScrapeInterval) - if !apiserverMetricsKeepListRegex.nil? && !apiserverMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@apiserverDefaultFile, apiserverMetricsKeepListRegex) - end - defaultConfigs.push(@apiserverDefaultFile) - end - if !ENV["AZMON_PROMETHEUS_KUBESTATE_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_KUBESTATE_SCRAPING_ENABLED"].downcase == "true" && (isConfigReaderSidecar || currentControllerType == @replicasetControllerType) - kubestateMetricsKeepListRegex = @regexHash["KUBESTATE_METRICS_KEEP_LIST_REGEX"] - kubestateScrapeInterval = @intervalHash["KUBESTATE_SCRAPE_INTERVAL"] - UpdateScrapeIntervalConfig(@kubestateDefaultFile, kubestateScrapeInterval) - if !kubestateMetricsKeepListRegex.nil? && !kubestateMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@kubestateDefaultFile, kubestateMetricsKeepListRegex) - end - contents = File.read(@kubestateDefaultFile) - contents = contents.gsub("$$KUBE_STATE_NAME$$", ENV["KUBE_STATE_NAME"]) - contents = contents.gsub("$$POD_NAMESPACE$$", ENV["POD_NAMESPACE"]) - File.open(@kubestateDefaultFile, "w") { |file| file.puts contents } - defaultConfigs.push(@kubestateDefaultFile) - end - if !ENV["AZMON_PROMETHEUS_NODEEXPORTER_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_NODEEXPORTER_SCRAPING_ENABLED"].downcase == "true" - nodeexporterMetricsKeepListRegex = @regexHash["NODEEXPORTER_METRICS_KEEP_LIST_REGEX"] - nodeexporterScrapeInterval = @intervalHash["NODEEXPORTER_SCRAPE_INTERVAL"] - if (isConfigReaderSidecar || currentControllerType == @replicasetControllerType) - if advancedMode == true && @sendDSUpMetric == true - UpdateScrapeIntervalConfig(@nodeexporterDefaultFileRsAdvanced, nodeexporterScrapeInterval) - contents = File.read(@nodeexporterDefaultFileRsAdvanced) - contents = contents.gsub("$$NODE_EXPORTER_NAME$$", ENV["NODE_EXPORTER_NAME"]) - contents = contents.gsub("$$POD_NAMESPACE$$", ENV["POD_NAMESPACE"]) - File.open(@nodeexporterDefaultFileRsAdvanced, "w") { |file| file.puts contents } - defaultConfigs.push(@nodeexporterDefaultFileRsAdvanced) - elsif advancedMode == false - UpdateScrapeIntervalConfig(@nodeexporterDefaultFileRsSimple, nodeexporterScrapeInterval) - if !nodeexporterMetricsKeepListRegex.nil? && !nodeexporterMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@nodeexporterDefaultFileRsSimple, nodeexporterMetricsKeepListRegex) - end - contents = File.read(@nodeexporterDefaultFileRsSimple) - contents = contents.gsub("$$NODE_EXPORTER_NAME$$", ENV["NODE_EXPORTER_NAME"]) - contents = contents.gsub("$$POD_NAMESPACE$$", ENV["POD_NAMESPACE"]) - File.open(@nodeexporterDefaultFileRsSimple, "w") { |file| file.puts contents } - defaultConfigs.push(@nodeexporterDefaultFileRsSimple) - end - else - if advancedMode == true && ENV["OS_TYPE"].downcase == "linux" && currentControllerType == @daemonsetControllerType - UpdateScrapeIntervalConfig(@nodeexporterDefaultFileDs, nodeexporterScrapeInterval) - if !nodeexporterMetricsKeepListRegex.nil? && !nodeexporterMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@nodeexporterDefaultFileDs, nodeexporterMetricsKeepListRegex) - end - contents = File.read(@nodeexporterDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_EXPORTER_TARGETPORT$$", ENV["NODE_EXPORTER_TARGETPORT"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@nodeexporterDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@nodeexporterDefaultFileDs) - end - end - end - - if !ENV["AZMON_PROMETHEUS_KAPPIEBASIC_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_KAPPIEBASIC_SCRAPING_ENABLED"].downcase == "true" - kappiebasicMetricsKeepListRegex = @regexHash["KAPPIEBASIC_METRICS_KEEP_LIST_REGEX"] - kappiebasicScrapeInterval = @intervalHash["KAPPIEBASIC_SCRAPE_INTERVAL"] - if (isConfigReaderSidecar || currentControllerType == @replicasetControllerType) - #do nothing -- kappie is not supported to be scrapped automatically outside ds. if needed, customer can disable this ds target, and enable rs scraping thru custom config map - elsif currentControllerType == @daemonsetControllerType #kappie scraping will be turned ON by default only when in MAC/addon mode (for both windows & linux) - if advancedMode == true && !ENV["MAC"].nil? && !ENV["MAC"].empty? && ENV["MAC"].strip.downcase == "true" #&& ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@kappiebasicDefaultFileDs, kappiebasicScrapeInterval) - if !kappiebasicMetricsKeepListRegex.nil? && !kappiebasicMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@kappiebasicDefaultFileDs, kappiebasicMetricsKeepListRegex) - end - contents = File.read(@kappiebasicDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@kappiebasicDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@kappiebasicDefaultFileDs) - end - end - end - - if !ENV["AZMON_PROMETHEUS_NETWORKOBSERVABILITYRETINA_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_NETWORKOBSERVABILITYRETINA_SCRAPING_ENABLED"].downcase == "true" - networkobservabilityRetinaMetricsKeepListRegex = @regexHash["NETWORKOBSERVABILITYRETINA_METRICS_KEEP_LIST_REGEX"] - networkobservabilityRetinaScrapeInterval = @intervalHash["NETWORKOBSERVABILITYRETINA_SCRAPE_INTERVAL"] - if (isConfigReaderSidecar || currentControllerType == @replicasetControllerType) - #do nothing -- kappie is not supported to be scrapped automatically outside ds. if needed, customer can disable this ds target, and enable rs scraping thru custom config map - else #networkobservabilityRetina scraping will be turned ON by default only when in MAC/addon mode (for both windows & linux) - if advancedMode == true && !ENV['MAC'].nil? && !ENV['MAC'].empty? && ENV['MAC'].strip.downcase == "true" #&& ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@networkobservabilityRetinaDefaultFileDs, networkobservabilityRetinaScrapeInterval) - if !networkobservabilityRetinaMetricsKeepListRegex.nil? && !networkobservabilityRetinaMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@networkobservabilityRetinaDefaultFileDs, networkobservabilityRetinaMetricsKeepListRegex) - end - contents = File.read(@networkobservabilityRetinaDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@networkobservabilityRetinaDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@networkobservabilityRetinaDefaultFileDs) - end - end - end - - if !ENV["AZMON_PROMETHEUS_NETWORKOBSERVABILITYHUBBLE_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_NETWORKOBSERVABILITYHUBBLE_SCRAPING_ENABLED"].downcase == "true" - networkobservabilityHubbleMetricsKeepListRegex = @regexHash["NETWORKOBSERVABILITYHUBBLE_METRICS_KEEP_LIST_REGEX"] - networkobservabilityHubbleScrapeInterval = @intervalHash["NETWORKOBSERVABILITYHUBBLE_SCRAPE_INTERVAL"] - if (isConfigReaderSidecar || currentControllerType == @replicasetControllerType) - #do nothing -- kappie is not supported to be scrapped automatically outside ds. if needed, customer can disable this ds target, and enable rs scraping thru custom config map - else #networkobservabilityHubble scraping will be turned ON by default only when in MAC/addon mode (for both windows & linux) - if advancedMode == true && !ENV['MAC'].nil? && !ENV['MAC'].empty? && ENV['MAC'].strip.downcase == "true" && ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@networkobservabilityHubbleDefaultFileDs, networkobservabilityHubbleScrapeInterval) - if !networkobservabilityHubbleMetricsKeepListRegex.nil? && !networkobservabilityHubbleMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@networkobservabilityHubbleDefaultFileDs, networkobservabilityHubbleMetricsKeepListRegex) - end - contents = File.read(@networkobservabilityHubbleDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@networkobservabilityHubbleDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@networkobservabilityHubbleDefaultFileDs) - end - end - end - - if !ENV["AZMON_PROMETHEUS_NETWORKOBSERVABILITYCILIUM_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_NETWORKOBSERVABILITYCILIUM_SCRAPING_ENABLED"].downcase == "true" - networkobservabilityCiliumMetricsKeepListRegex = @regexHash["NETWORKOBSERVABILITYCILIUM_METRICS_KEEP_LIST_REGEX"] - networkobservabilityCiliumScrapeInterval = @intervalHash["NETWORKOBSERVABILITYCILIUM_SCRAPE_INTERVAL"] - if (isConfigReaderSidecar || currentControllerType == @replicasetControllerType) - #do nothing -- kappie is not supported to be scrapped automatically outside ds. if needed, customer can disable this ds target, and enable rs scraping thru custom config map - else #networkobservabilityCilium scraping will be turned ON by default only when in MAC/addon mode (for both windows & linux) - if advancedMode == true && !ENV['MAC'].nil? && !ENV['MAC'].empty? && ENV['MAC'].strip.downcase == "true" && ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@networkobservabilityCiliumDefaultFileDs, networkobservabilityCiliumScrapeInterval) - if !networkobservabilityCiliumMetricsKeepListRegex.nil? && !networkobservabilityCiliumMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@networkobservabilityCiliumDefaultFileDs, networkobservabilityCiliumMetricsKeepListRegex) - end - contents = File.read(@networkobservabilityCiliumDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@networkobservabilityCiliumDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@networkobservabilityCiliumDefaultFileDs) - end - end - end - - - - # Collector health config should be enabled or disabled for both replicaset and daemonset - if !ENV["AZMON_PROMETHEUS_COLLECTOR_HEALTH_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_COLLECTOR_HEALTH_SCRAPING_ENABLED"].downcase == "true" - prometheusCollectorHealthInterval = @intervalHash["PROMETHEUS_COLLECTOR_HEALTH_SCRAPE_INTERVAL"] - UpdateScrapeIntervalConfig(@prometheusCollectorHealthDefaultFile, prometheusCollectorHealthInterval) - defaultConfigs.push(@prometheusCollectorHealthDefaultFile) - end - - if !ENV["AZMON_PROMETHEUS_WINDOWSEXPORTER_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_WINDOWSEXPORTER_SCRAPING_ENABLED"].downcase == "true" - winexporterMetricsKeepListRegex = @regexHash["WINDOWSEXPORTER_METRICS_KEEP_LIST_REGEX"] - windowsexporterScrapeInterval = @intervalHash["WINDOWSEXPORTER_SCRAPE_INTERVAL"] - # Not adding the isConfigReaderSidecar check instead of replicaset check since this is legacy 1P chart path and not relevant anymore. - if currentControllerType == @replicasetControllerType && advancedMode == false && ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@windowsexporterDefaultRsSimpleFile, windowsexporterScrapeInterval) - if !winexporterMetricsKeepListRegex.nil? && !winexporterMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@windowsexporterDefaultRsSimpleFile, winexporterMetricsKeepListRegex) - end - contents = File.read(@windowsexporterDefaultRsSimpleFile) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@windowsexporterDefaultRsSimpleFile, "w") { |file| file.puts contents } - defaultConfigs.push(@windowsexporterDefaultRsSimpleFile) - elsif currentControllerType == @daemonsetControllerType && advancedMode == true && windowsDaemonset == true && ENV["OS_TYPE"].downcase == "windows" - UpdateScrapeIntervalConfig(@windowsexporterDefaultDsFile, windowsexporterScrapeInterval) - if !winexporterMetricsKeepListRegex.nil? && !winexporterMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@windowsexporterDefaultDsFile, winexporterMetricsKeepListRegex) - end - contents = File.read(@windowsexporterDefaultDsFile) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@windowsexporterDefaultDsFile, "w") { |file| file.puts contents } - defaultConfigs.push(@windowsexporterDefaultDsFile) - end - end - - if !ENV["AZMON_PROMETHEUS_WINDOWSKUBEPROXY_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_WINDOWSKUBEPROXY_SCRAPING_ENABLED"].downcase == "true" - winkubeproxyMetricsKeepListRegex = @regexHash["WINDOWSKUBEPROXY_METRICS_KEEP_LIST_REGEX"] - windowskubeproxyScrapeInterval = @intervalHash["WINDOWSKUBEPROXY_SCRAPE_INTERVAL"] - # Not adding the isConfigReaderSidecar check instead of replicaset check since this is legacy 1P chart path and not relevant anymore. - if currentControllerType == @replicasetControllerType && advancedMode == false && ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@windowskubeproxyDefaultFileRsSimpleFile, windowskubeproxyScrapeInterval) - if !winkubeproxyMetricsKeepListRegex.nil? && !winkubeproxyMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@windowskubeproxyDefaultFileRsSimpleFile, winkubeproxyMetricsKeepListRegex) - end - contents = File.read(@windowskubeproxyDefaultFileRsSimpleFile) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@windowskubeproxyDefaultFileRsSimpleFile, "w") { |file| file.puts contents } - defaultConfigs.push(@windowskubeproxyDefaultFileRsSimpleFile) - elsif currentControllerType == @daemonsetControllerType && advancedMode == true && windowsDaemonset == true && ENV["OS_TYPE"].downcase == "windows" - UpdateScrapeIntervalConfig(@windowskubeproxyDefaultDsFile, windowskubeproxyScrapeInterval) - if !winkubeproxyMetricsKeepListRegex.nil? && !winkubeproxyMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@windowskubeproxyDefaultDsFile, winkubeproxyMetricsKeepListRegex) - end - contents = File.read(@windowskubeproxyDefaultDsFile) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@windowskubeproxyDefaultDsFile, "w") { |file| file.puts contents } - defaultConfigs.push(@windowskubeproxyDefaultDsFile) - end - end - - if !ENV["AZMON_PROMETHEUS_POD_ANNOTATION_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_POD_ANNOTATION_SCRAPING_ENABLED"].downcase == "true" && (isConfigReaderSidecar || currentControllerType == @replicasetControllerType) - podannotationNamespacesRegex = ENV["AZMON_PROMETHEUS_POD_ANNOTATION_NAMESPACES_REGEX"] - podannotationMetricsKeepListRegex = @regexHash["POD_ANNOTATION_METRICS_KEEP_LIST_REGEX"] - podannotationScrapeInterval = @intervalHash["POD_ANNOTATION_SCRAPE_INTERVAL"] - UpdateScrapeIntervalConfig(@podannotationsDefaultFile, podannotationScrapeInterval) - if !podannotationMetricsKeepListRegex.nil? && !podannotationMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@podannotationsDefaultFile, podannotationMetricsKeepListRegex) - end - if !podannotationNamespacesRegex.nil? && !podannotationNamespacesRegex.empty? - relabelConfig = [{ "source_labels" => ["__meta_kubernetes_namespace"], "action" => "keep", "regex" => podannotationNamespacesRegex }] - AppendRelabelConfig(@podannotationsDefaultFile, relabelConfig, podannotationNamespacesRegex) - end - defaultConfigs.push(@podannotationsDefaultFile) - end - - @mergedDefaultConfigs = mergeDefaultScrapeConfigs(defaultConfigs) - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while merging default scrape targets - #{errorStr}. No default scrape targets will be included") - @mergedDefaultConfigs = "" - end -end - -def mergeDefaultScrapeConfigs(defaultScrapeConfigs) - mergedDefaultConfigs = "" - begin - if defaultScrapeConfigs.length > 0 - mergedDefaultConfigs = YAML.load("scrape_configs:") - # Load each of the default scrape configs and merge them - defaultScrapeConfigs.each { |defaultScrapeConfig| - # Load yaml from default config - defaultConfigYaml = YAML.load(File.read(defaultScrapeConfig)) - mergedDefaultConfigs = mergedDefaultConfigs.deep_merge!(defaultConfigYaml) - } - end - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Done merging #{defaultScrapeConfigs.length} default prometheus config(s)") - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while adding default scrape config- #{errorStr}. No default scrape targets will be included") - mergedDefaultConfigs = "" - end - return mergedDefaultConfigs -end - -def mergeDefaultAndCustomScrapeConfigs(customPromConfig) - mergedConfigYaml = "" - begin - if !@mergedDefaultConfigs.nil? && !@mergedDefaultConfigs.empty? - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Merging default and custom scrape configs") - customPrometheusConfig = YAML.load(customPromConfig) - mergedConfigs = @mergedDefaultConfigs.deep_merge!(customPrometheusConfig) - mergedConfigYaml = YAML::dump(mergedConfigs) - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Done merging default scrape config(s) with custom prometheus config, writing them to file") - else - ConfigParseErrorLogger.logWarning(LOGGING_PREFIX, "The merged default scrape config is nil or empty, using only custom scrape config") - mergedConfigYaml = customPromConfig - end - File.open(@promMergedConfigPath, "w") { |file| file.puts mergedConfigYaml } - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while merging default and custom scrape configs- #{errorStr}") - end -end - -#this will enforce num labels, label name length & label value length for every scrape job to be with-in azure monitor supported limits -# by injecting these into every custom scrape job's config. For default scrape jobs, this is already included in them. We do this here, so the config validation can happen after we inject these into the custom scrape jobs . -def setLabelLimitsPerScrape(prometheusConfigString) - customConfig = prometheusConfigString - ConfigParseErrorLogger.log(LOGGING_PREFIX, "setLabelLimitsPerScrape()") - begin - if !customConfig.nil? && !customConfig.empty? - limitedCustomConfig = YAML.load(customConfig) - limitedCustomscrapes = limitedCustomConfig["scrape_configs"] - if !limitedCustomscrapes.nil? && !limitedCustomscrapes.empty? - limitedCustomscrapes.each { |scrape| - scrape["label_limit"] = 63 - scrape["label_name_length_limit"] = 511 - scrape["label_value_length_limit"] = 1023 - ConfigParseErrorLogger.log(LOGGING_PREFIX, " Successfully set label limits in custom scrape config for job #{scrape["job_name"]}") - } - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Done setting label limits for custom scrape config ...") - return YAML::dump(limitedCustomConfig) - else - ConfigParseErrorLogger.logWarning(LOGGING_PREFIX, "No Jobs found to set label limits while processing custom scrape config") - return prometheusConfigString - end - else - ConfigParseErrorLogger.logWarning(LOGGING_PREFIX, "Nothing to set for label limits while processing custom scrape config") - return prometheusConfigString - end - rescue => errStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception when setting label limits while processing custom scrape config - #{errStr}") - return prometheusConfigString - end -end - -# Populate default scrape config(s) if AZMON_PROMETHEUS_NO_DEFAULT_SCRAPING_ENABLED is set to false -# and write them as a collector config file, in case the custom config validation fails, -# and we need to fall back to defaults -def writeDefaultScrapeTargetsFile() - ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "Start Merging Default and Custom Prometheus Config") - if !ENV["AZMON_PROMETHEUS_NO_DEFAULT_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_NO_DEFAULT_SCRAPING_ENABLED"].downcase == "false" - begin - loadRegexHash - loadIntervalHash - populateDefaultPrometheusConfig - if !@mergedDefaultConfigs.nil? && !@mergedDefaultConfigs.empty? - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Starting to merge default prometheus config values in collector template as backup") - mergedDefaultConfigYaml = YAML::dump(@mergedDefaultConfigs) - File.open(@mergedDefaultConfigPath, "w") { |file| file.puts mergedDefaultConfigYaml } - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Error while populating default scrape targets and writing them to the default scrape targets file") - end - end -end - -def setDefaultFileScrapeInterval(scrapeInterval) - defaultFilesArray = [ - @kubeletDefaultFileRsSimple, @kubeletDefaultFileRsAdvanced, @kubeletDefaultFileDs, @kubeletDefaultFileRsAdvancedWindowsDaemonset, - @corednsDefaultFile, @cadvisorDefaultFileRsSimple, @cadvisorDefaultFileRsAdvanced, @cadvisorDefaultFileDs, @kubeproxyDefaultFile, - @apiserverDefaultFile, @kubestateDefaultFile, @nodeexporterDefaultFileRsSimple, @nodeexporterDefaultFileRsAdvanced, @nodeexporterDefaultFileDs, - @prometheusCollectorHealthDefaultFile, @windowsexporterDefaultRsSimpleFile, @windowsexporterDefaultDsFile, - @windowskubeproxyDefaultFileRsSimpleFile, @windowskubeproxyDefaultDsFile, @podannotationsDefaultFile, - ] - - defaultFilesArray.each { |currentFile| - contents = File.read(currentFile) - contents = contents.gsub("$$SCRAPE_INTERVAL$$", scrapeInterval) - File.open(currentFile, "w") { |file| file.puts contents } - } -end - -def setGlobalScrapeConfigInDefaultFilesIfExists(configString) - customConfig = YAML.load(configString) - # set scrape interval to 30s for updating the default merged config - scrapeInterval = "30s" - if customConfig.has_key?("global") && customConfig["global"].has_key?("scrape_interval") - scrapeInterval = customConfig["global"]["scrape_interval"] - # Checking to see if the duration matches the pattern specified in the prometheus config - # Link to documenation with regex pattern -> https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file - matched = /^((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0)$/.match(scrapeInterval) - if !matched - # set default global scrape interval to 1m if its not in the proper format - customConfig["global"]["scrape_interval"] = "1m" - scrapeInterval = "30s" - end - end - setDefaultFileScrapeInterval(scrapeInterval) - return YAML::dump(customConfig) -end - -prometheusConfigString = parseConfigMap -if !prometheusConfigString.nil? && !prometheusConfigString.empty? - modifiedPrometheusConfigString = setGlobalScrapeConfigInDefaultFilesIfExists(prometheusConfigString) - writeDefaultScrapeTargetsFile() - #set label limits for every custom scrape job, before merging the default & custom config - labellimitedconfigString = setLabelLimitsPerScrape(modifiedPrometheusConfigString) - mergeDefaultAndCustomScrapeConfigs(labellimitedconfigString) -else - setDefaultFileScrapeInterval("30s") - writeDefaultScrapeTargetsFile() -end -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "Done Merging Default and Custom Prometheus Config") diff --git a/otelcollector/configmapparser/prometheus-config-merger.rb b/otelcollector/configmapparser/prometheus-config-merger.rb deleted file mode 100644 index 8153ed024..000000000 --- a/otelcollector/configmapparser/prometheus-config-merger.rb +++ /dev/null @@ -1,621 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -require "tomlrb" -require "deep_merge" -require "yaml" -require_relative "ConfigParseErrorLogger" - -LOGGING_PREFIX = "prometheus-config-merger" -@configMapMountPath = "/etc/config/settings/prometheus/prometheus-config" -@promMergedConfigPath = "/opt/promMergedConfig.yml" -@mergedDefaultConfigPath = "/opt/defaultsMergedConfig.yml" -@replicasetControllerType = "replicaset" -@daemonsetControllerType = "daemonset" -@supportedSchemaVersion = true -@defaultPromConfigPathPrefix = "/opt/microsoft/otelcollector/default-prom-configs/" -@regexHashFile = "/opt/microsoft/configmapparser/config_def_targets_metrics_keep_list_hash" -@regexHash = {} -@sendDSUpMetric = false -@intervalHashFile = "/opt/microsoft/configmapparser/config_def_targets_scrape_intervals_hash" -@intervalHash = {} - -@kubeletDefaultFileRsSimple = @defaultPromConfigPathPrefix + "kubeletDefaultRsSimple.yml" -@kubeletDefaultFileRsAdvanced = @defaultPromConfigPathPrefix + "kubeletDefaultRsAdvanced.yml" -@kubeletDefaultFileDs = @defaultPromConfigPathPrefix + "kubeletDefaultDs.yml" -@kubeletDefaultFileRsAdvancedWindowsDaemonset = @defaultPromConfigPathPrefix + "kubeletDefaultRsAdvancedWindowsDaemonset.yml" -@corednsDefaultFile = @defaultPromConfigPathPrefix + "corednsDefault.yml" -@cadvisorDefaultFileRsSimple = @defaultPromConfigPathPrefix + "cadvisorDefaultRsSimple.yml" -@cadvisorDefaultFileRsAdvanced = @defaultPromConfigPathPrefix + "cadvisorDefaultRsAdvanced.yml" -@cadvisorDefaultFileDs = @defaultPromConfigPathPrefix + "cadvisorDefaultDs.yml" -@kubeproxyDefaultFile = @defaultPromConfigPathPrefix + "kubeproxyDefault.yml" -@apiserverDefaultFile = @defaultPromConfigPathPrefix + "apiserverDefault.yml" -@kubestateDefaultFile = @defaultPromConfigPathPrefix + "kubestateDefault.yml" -@nodeexporterDefaultFileRsSimple = @defaultPromConfigPathPrefix + "nodeexporterDefaultRsSimple.yml" -@nodeexporterDefaultFileRsAdvanced = @defaultPromConfigPathPrefix + "nodeexporterDefaultRsAdvanced.yml" -@nodeexporterDefaultFileDs = @defaultPromConfigPathPrefix + "nodeexporterDefaultDs.yml" -@prometheusCollectorHealthDefaultFile = @defaultPromConfigPathPrefix + "prometheusCollectorHealth.yml" -@windowsexporterDefaultRsSimpleFile = @defaultPromConfigPathPrefix + "windowsexporterDefaultRsSimple.yml" -@windowsexporterDefaultDsFile = @defaultPromConfigPathPrefix + "windowsexporterDefaultDs.yml" -@windowskubeproxyDefaultFileRsSimpleFile = @defaultPromConfigPathPrefix + "windowskubeproxyDefaultRsSimple.yml" -@windowskubeproxyDefaultDsFile = @defaultPromConfigPathPrefix + "windowskubeproxyDefaultDs.yml" -@podannotationsDefaultFile = @defaultPromConfigPathPrefix + "podannotationsDefault.yml" -@windowskubeproxyDefaultRsAdvancedFile = @defaultPromConfigPathPrefix + "windowskubeproxyDefaultRsAdvanced.yml" -@kappiebasicDefaultFileDs = @defaultPromConfigPathPrefix + "kappieBasicDefaultDs.yml" -@networkobservabilityRetinaDefaultFileDs = @defaultPromConfigPathPrefix + "networkobservabilityRetinaDefaultDs.yml" -@networkobservabilityHubbleDefaultFileDs = @defaultPromConfigPathPrefix + "networkobservabilityHubbleDefaultDs.yml" -@networkobservabilityCiliumDefaultFileDs = @defaultPromConfigPathPrefix + "networkobservabilityCiliumDefaultDs.yml" - -def parseConfigMap - begin - # Check to see if config map is created - if (File.file?(@configMapMountPath)) - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Custom prometheus config exists") - config = File.read(@configMapMountPath) - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Successfully parsed configmap for prometheus config") - return config - else - ConfigParseErrorLogger.logWarning(LOGGING_PREFIX, "Custom prometheus config does not exist, using only default scrape targets if they are enabled") - return "" - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while parsing configmap for prometheus config: #{errorStr}. Custom prometheus config will not be used. Please check configmap for errors") - return "" - end -end - -def loadRegexHash - begin - @regexHash = YAML.load_file(@regexHashFile) - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception in loadRegexHash for prometheus config: #{errorStr}. Keep list regexes will not be used") - end -end - -def loadIntervalHash - begin - @intervalHash = YAML.load_file(@intervalHashFile) - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception in loadIntervalHash for prometheus config: #{errorStr}. Scrape interval will not be used") - end -end - -def UpdateScrapeIntervalConfig(yamlConfigFile, scrapeIntervalSetting) - begin - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Updating scrape interval config for #{yamlConfigFile}") - config = YAML.load(File.read(yamlConfigFile)) - scrapeIntervalConfig = scrapeIntervalSetting - - # Iterate through each scrape config and update scrape interval config - if !config.nil? - scrapeConfigs = config["scrape_configs"] - if !scrapeConfigs.nil? && !scrapeConfigs.empty? - scrapeConfigs.each { |scfg| - scrapeCfgs = scfg["scrape_interval"] - if !scrapeCfgs.nil? - scfg["scrape_interval"] = scrapeIntervalConfig - end - } - cfgYamlWithScrapeConfig = YAML::dump(config) - File.open(yamlConfigFile, "w") { |file| file.puts cfgYamlWithScrapeConfig } - end - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while updating scrape interval config in default target file - #{yamlConfigFile} : #{errorStr}. The Scrape interval will not be used") - end -end - -def AppendMetricRelabelConfig(yamlConfigFile, keepListRegex) - begin - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Adding keep list regex or minimal ingestion regex for #{yamlConfigFile}") - config = YAML.load(File.read(yamlConfigFile)) - keepListMetricRelabelConfig = [{ "source_labels" => ["__name__"], "action" => "keep", "regex" => keepListRegex }] - - # Iterate through each scrape config and append metric relabel config for keep list - if !config.nil? - scrapeConfigs = config["scrape_configs"] - if !scrapeConfigs.nil? && !scrapeConfigs.empty? - scrapeConfigs.each { |scfg| - metricRelabelCfgs = scfg["metric_relabel_configs"] - if metricRelabelCfgs.nil? - scfg["metric_relabel_configs"] = keepListMetricRelabelConfig - else - scfg["metric_relabel_configs"] = metricRelabelCfgs.concat(keepListMetricRelabelConfig) - end - } - cfgYamlWithMetricRelabelConfig = YAML::dump(config) - File.open(yamlConfigFile, "w") { |file| file.puts cfgYamlWithMetricRelabelConfig } - end - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while appending metric relabel config in default target file - #{yamlConfigFile} : #{errorStr}. The keep list regex will not be used") - end -end - -def AppendRelabelConfig(yamlConfigFile, relabelConfig, keepRegex) - begin - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Adding relabel config for #{yamlConfigFile}") - config = YAML.load(File.read(yamlConfigFile)) - - # Iterate through each scrape config and append metric relabel config for keep list - if !config.nil? - scrapeConfigs = config["scrape_configs"] - if !scrapeConfigs.nil? && !scrapeConfigs.empty? - scrapeConfigs.each { |scfg| - relabelCfgs = scfg["relabel_configs"] - if relabelCfgs.nil? - scfg["relabel_configs"] = relabelConfig - else - scfg["relabel_configs"] = relabelCfgs.concat(relabelConfig) - end - } - cfgYamlWithRelabelConfig = YAML::dump(config) - File.open(yamlConfigFile, "w") { |file| file.puts cfgYamlWithRelabelConfig } - end - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while appending relabel config in default target file - #{yamlConfigFile} : #{errorStr}. The keep list regex will not be used") - end -end - -# Get the list of default configs to be included in the otel's prometheus config -def populateDefaultPrometheusConfig - begin - # check if running in daemonset or replicaset - currentControllerType = ENV["CONTROLLER_TYPE"].strip.downcase - - advancedMode = false #default is false - windowsDaemonset = false #default is false - - # get current mode (advanced or not...) - currentMode = ENV["MODE"].strip.downcase - if currentMode == "advanced" - advancedMode = true - end - - # get if windowsdaemonset is enabled or not (ie. WINMODE env = advanced or not...) - winMode = ENV["WINMODE"].strip.downcase - if winMode == "advanced" - windowsDaemonset = true - end - - defaultConfigs = [] - if !ENV["AZMON_PROMETHEUS_KUBELET_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_KUBELET_SCRAPING_ENABLED"].downcase == "true" - kubeletMetricsKeepListRegex = @regexHash["KUBELET_METRICS_KEEP_LIST_REGEX"] - kubeletScrapeInterval = @intervalHash["KUBELET_SCRAPE_INTERVAL"] - if currentControllerType == @replicasetControllerType - if advancedMode == false - UpdateScrapeIntervalConfig(@kubeletDefaultFileRsSimple, kubeletScrapeInterval) - if !kubeletMetricsKeepListRegex.nil? && !kubeletMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@kubeletDefaultFileRsSimple, kubeletMetricsKeepListRegex) - end - defaultConfigs.push(@kubeletDefaultFileRsSimple) - elsif windowsDaemonset == true && @sendDSUpMetric == true - UpdateScrapeIntervalConfig(@kubeletDefaultFileRsAdvancedWindowsDaemonset, kubeletScrapeInterval) - defaultConfigs.push(@kubeletDefaultFileRsAdvancedWindowsDaemonset) - elsif @sendDSUpMetric == true - UpdateScrapeIntervalConfig(@kubeletDefaultFileRsAdvanced, kubeletScrapeInterval) - defaultConfigs.push(@kubeletDefaultFileRsAdvanced) - end - else - if advancedMode == true && (windowsDaemonset == true || ENV["OS_TYPE"].downcase == "linux") - UpdateScrapeIntervalConfig(@kubeletDefaultFileDs, kubeletScrapeInterval) - if !kubeletMetricsKeepListRegex.nil? && !kubeletMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@kubeletDefaultFileDs, kubeletMetricsKeepListRegex) - end - contents = File.read(@kubeletDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - contents = contents.gsub("$$OS_TYPE$$", ENV["OS_TYPE"]) - File.open(@kubeletDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@kubeletDefaultFileDs) - end - end - end - if !ENV["AZMON_PROMETHEUS_COREDNS_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_COREDNS_SCRAPING_ENABLED"].downcase == "true" && currentControllerType == @replicasetControllerType - corednsMetricsKeepListRegex = @regexHash["COREDNS_METRICS_KEEP_LIST_REGEX"] - corednsScrapeInterval = @intervalHash["COREDNS_SCRAPE_INTERVAL"] - UpdateScrapeIntervalConfig(@corednsDefaultFile, corednsScrapeInterval) - if !corednsMetricsKeepListRegex.nil? && !corednsMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@corednsDefaultFile, corednsMetricsKeepListRegex) - end - defaultConfigs.push(@corednsDefaultFile) - end - if !ENV["AZMON_PROMETHEUS_CADVISOR_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_CADVISOR_SCRAPING_ENABLED"].downcase == "true" - cadvisorMetricsKeepListRegex = @regexHash["CADVISOR_METRICS_KEEP_LIST_REGEX"] - cadvisorScrapeInterval = @intervalHash["CADVISOR_SCRAPE_INTERVAL"] - if currentControllerType == @replicasetControllerType - if advancedMode == false - UpdateScrapeIntervalConfig(@cadvisorDefaultFileRsSimple, cadvisorScrapeInterval) - if !cadvisorMetricsKeepListRegex.nil? && !cadvisorMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@cadvisorDefaultFileRsSimple, cadvisorMetricsKeepListRegex) - end - defaultConfigs.push(@cadvisorDefaultFileRsSimple) - elsif @sendDSUpMetric == true - UpdateScrapeIntervalConfig(@cadvisorDefaultFileRsAdvanced, cadvisorScrapeInterval) - defaultConfigs.push(@cadvisorDefaultFileRsAdvanced) - end - else - if advancedMode == true && ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@cadvisorDefaultFileDs, cadvisorScrapeInterval) - if !cadvisorMetricsKeepListRegex.nil? && !cadvisorMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@cadvisorDefaultFileDs, cadvisorMetricsKeepListRegex) - end - contents = File.read(@cadvisorDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@cadvisorDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@cadvisorDefaultFileDs) - end - end - end - if !ENV["AZMON_PROMETHEUS_KUBEPROXY_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_KUBEPROXY_SCRAPING_ENABLED"].downcase == "true" && currentControllerType == @replicasetControllerType - kubeproxyMetricsKeepListRegex = @regexHash["KUBEPROXY_METRICS_KEEP_LIST_REGEX"] - kubeproxyScrapeInterval = @intervalHash["KUBEPROXY_SCRAPE_INTERVAL"] - UpdateScrapeIntervalConfig(@kubeproxyDefaultFile, kubeproxyScrapeInterval) - if !kubeproxyMetricsKeepListRegex.nil? && !kubeproxyMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@kubeproxyDefaultFile, kubeproxyMetricsKeepListRegex) - end - defaultConfigs.push(@kubeproxyDefaultFile) - end - if !ENV["AZMON_PROMETHEUS_APISERVER_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_APISERVER_SCRAPING_ENABLED"].downcase == "true" && currentControllerType == @replicasetControllerType - apiserverMetricsKeepListRegex = @regexHash["APISERVER_METRICS_KEEP_LIST_REGEX"] - apiserverScrapeInterval = @intervalHash["APISERVER_SCRAPE_INTERVAL"] - UpdateScrapeIntervalConfig(@apiserverDefaultFile, apiserverScrapeInterval) - if !apiserverMetricsKeepListRegex.nil? && !apiserverMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@apiserverDefaultFile, apiserverMetricsKeepListRegex) - end - defaultConfigs.push(@apiserverDefaultFile) - end - if !ENV["AZMON_PROMETHEUS_KUBESTATE_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_KUBESTATE_SCRAPING_ENABLED"].downcase == "true" && currentControllerType == @replicasetControllerType - kubestateMetricsKeepListRegex = @regexHash["KUBESTATE_METRICS_KEEP_LIST_REGEX"] - kubestateScrapeInterval = @intervalHash["KUBESTATE_SCRAPE_INTERVAL"] - UpdateScrapeIntervalConfig(@kubestateDefaultFile, kubestateScrapeInterval) - if !kubestateMetricsKeepListRegex.nil? && !kubestateMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@kubestateDefaultFile, kubestateMetricsKeepListRegex) - end - contents = File.read(@kubestateDefaultFile) - contents = contents.gsub("$$KUBE_STATE_NAME$$", ENV["KUBE_STATE_NAME"]) - contents = contents.gsub("$$POD_NAMESPACE$$", ENV["POD_NAMESPACE"]) - File.open(@kubestateDefaultFile, "w") { |file| file.puts contents } - defaultConfigs.push(@kubestateDefaultFile) - end - if !ENV["AZMON_PROMETHEUS_NODEEXPORTER_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_NODEEXPORTER_SCRAPING_ENABLED"].downcase == "true" - nodeexporterMetricsKeepListRegex = @regexHash["NODEEXPORTER_METRICS_KEEP_LIST_REGEX"] - nodeexporterScrapeInterval = @intervalHash["NODEEXPORTER_SCRAPE_INTERVAL"] - if currentControllerType == @replicasetControllerType - if advancedMode == true && @sendDSUpMetric == true - UpdateScrapeIntervalConfig(@nodeexporterDefaultFileRsAdvanced, nodeexporterScrapeInterval) - contents = File.read(@nodeexporterDefaultFileRsAdvanced) - contents = contents.gsub("$$NODE_EXPORTER_NAME$$", ENV["NODE_EXPORTER_NAME"]) - contents = contents.gsub("$$POD_NAMESPACE$$", ENV["POD_NAMESPACE"]) - File.open(@nodeexporterDefaultFileRsAdvanced, "w") { |file| file.puts contents } - defaultConfigs.push(@nodeexporterDefaultFileRsAdvanced) - elsif advancedMode == false - UpdateScrapeIntervalConfig(@nodeexporterDefaultFileRsSimple, nodeexporterScrapeInterval) - if !nodeexporterMetricsKeepListRegex.nil? && !nodeexporterMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@nodeexporterDefaultFileRsSimple, nodeexporterMetricsKeepListRegex) - end - contents = File.read(@nodeexporterDefaultFileRsSimple) - contents = contents.gsub("$$NODE_EXPORTER_NAME$$", ENV["NODE_EXPORTER_NAME"]) - contents = contents.gsub("$$POD_NAMESPACE$$", ENV["POD_NAMESPACE"]) - File.open(@nodeexporterDefaultFileRsSimple, "w") { |file| file.puts contents } - defaultConfigs.push(@nodeexporterDefaultFileRsSimple) - end - else - if advancedMode == true && ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@nodeexporterDefaultFileDs, nodeexporterScrapeInterval) - if !nodeexporterMetricsKeepListRegex.nil? && !nodeexporterMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@nodeexporterDefaultFileDs, nodeexporterMetricsKeepListRegex) - end - contents = File.read(@nodeexporterDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_EXPORTER_TARGETPORT$$", ENV["NODE_EXPORTER_TARGETPORT"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@nodeexporterDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@nodeexporterDefaultFileDs) - end - end - end - - if !ENV["AZMON_PROMETHEUS_KAPPIEBASIC_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_KAPPIEBASIC_SCRAPING_ENABLED"].downcase == "true" - kappiebasicMetricsKeepListRegex = @regexHash["KAPPIEBASIC_METRICS_KEEP_LIST_REGEX"] - kappiebasicScrapeInterval = @intervalHash["KAPPIEBASIC_SCRAPE_INTERVAL"] - if currentControllerType == @replicasetControllerType - #do nothing -- kappie is not supported to be scrapped automatically outside ds. if needed, customer can disable this ds target, and enable rs scraping thru custom config map - else #kappie scraping will be turned ON by default only when in MAC/addon mode (for both windows & linux) - if advancedMode == true && !ENV['MAC'].nil? && !ENV['MAC'].empty? && ENV['MAC'].strip.downcase == "true" #&& ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@kappiebasicDefaultFileDs, kappiebasicScrapeInterval) - if !kappiebasicMetricsKeepListRegex.nil? && !kappiebasicMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@kappiebasicDefaultFileDs, kappiebasicMetricsKeepListRegex) - end - contents = File.read(@kappiebasicDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@kappiebasicDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@kappiebasicDefaultFileDs) - end - end - end - - if !ENV["AZMON_PROMETHEUS_NETWORKOBSERVABILITYRETINA_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_NETWORKOBSERVABILITYRETINA_SCRAPING_ENABLED"].downcase == "true" - networkobservabilityRetinaMetricsKeepListRegex = @regexHash["NETWORKOBSERVABILITYRETINA_METRICS_KEEP_LIST_REGEX"] - networkobservabilityRetinaScrapeInterval = @intervalHash["NETWORKOBSERVABILITYRETINA_SCRAPE_INTERVAL"] - if currentControllerType == @replicasetControllerType - #do nothing -- kappie is not supported to be scrapped automatically outside ds. if needed, customer can disable this ds target, and enable rs scraping thru custom config map - else #networkobservabilityRetina scraping will be turned ON by default only when in MAC/addon mode (for both windows & linux) - if advancedMode == true && !ENV['MAC'].nil? && !ENV['MAC'].empty? && ENV['MAC'].strip.downcase == "true" #&& ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@networkobservabilityRetinaDefaultFileDs, networkobservabilityRetinaScrapeInterval) - if !networkobservabilityRetinaMetricsKeepListRegex.nil? && !networkobservabilityRetinaMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@networkobservabilityRetinaDefaultFileDs, networkobservabilityRetinaMetricsKeepListRegex) - end - contents = File.read(@networkobservabilityRetinaDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@networkobservabilityRetinaDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@networkobservabilityRetinaDefaultFileDs) - end - end - end - - if !ENV["AZMON_PROMETHEUS_NETWORKOBSERVABILITYHUBBLE_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_NETWORKOBSERVABILITYHUBBLE_SCRAPING_ENABLED"].downcase == "true" - networkobservabilityHubbleMetricsKeepListRegex = @regexHash["NETWORKOBSERVABILITYHUBBLE_METRICS_KEEP_LIST_REGEX"] - networkobservabilityHubbleScrapeInterval = @intervalHash["NETWORKOBSERVABILITYHUBBLE_SCRAPE_INTERVAL"] - if currentControllerType == @replicasetControllerType - #do nothing -- kappie is not supported to be scrapped automatically outside ds. if needed, customer can disable this ds target, and enable rs scraping thru custom config map - else #networkobservabilityHubble scraping will be turned ON by default only when in MAC/addon mode (for both windows & linux) - if advancedMode == true && !ENV['MAC'].nil? && !ENV['MAC'].empty? && ENV['MAC'].strip.downcase == "true" && ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@networkobservabilityHubbleDefaultFileDs, networkobservabilityHubbleScrapeInterval) - if !networkobservabilityHubbleMetricsKeepListRegex.nil? && !networkobservabilityHubbleMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@networkobservabilityHubbleDefaultFileDs, networkobservabilityHubbleMetricsKeepListRegex) - end - contents = File.read(@networkobservabilityHubbleDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@networkobservabilityHubbleDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@networkobservabilityHubbleDefaultFileDs) - end - end - end - - if !ENV["AZMON_PROMETHEUS_NETWORKOBSERVABILITYCILIUM_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_NETWORKOBSERVABILITYCILIUM_SCRAPING_ENABLED"].downcase == "true" - networkobservabilityCiliumMetricsKeepListRegex = @regexHash["NETWORKOBSERVABILITYCILIUM_METRICS_KEEP_LIST_REGEX"] - networkobservabilityCiliumScrapeInterval = @intervalHash["NETWORKOBSERVABILITYCILIUM_SCRAPE_INTERVAL"] - if currentControllerType == @replicasetControllerType - #do nothing -- kappie is not supported to be scrapped automatically outside ds. if needed, customer can disable this ds target, and enable rs scraping thru custom config map - else #networkobservabilityCilium scraping will be turned ON by default only when in MAC/addon mode (for both windows & linux) - if advancedMode == true && !ENV['MAC'].nil? && !ENV['MAC'].empty? && ENV['MAC'].strip.downcase == "true" && ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@networkobservabilityCiliumDefaultFileDs, networkobservabilityCiliumScrapeInterval) - if !networkobservabilityCiliumMetricsKeepListRegex.nil? && !networkobservabilityCiliumMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@networkobservabilityCiliumDefaultFileDs, networkobservabilityCiliumMetricsKeepListRegex) - end - contents = File.read(@networkobservabilityCiliumDefaultFileDs) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@networkobservabilityCiliumDefaultFileDs, "w") { |file| file.puts contents } - defaultConfigs.push(@networkobservabilityCiliumDefaultFileDs) - end - end - end - - - # Collector health config should be enabled or disabled for both replicaset and daemonset - if !ENV["AZMON_PROMETHEUS_COLLECTOR_HEALTH_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_COLLECTOR_HEALTH_SCRAPING_ENABLED"].downcase == "true" - prometheusCollectorHealthInterval = @intervalHash["PROMETHEUS_COLLECTOR_HEALTH_SCRAPE_INTERVAL"] - UpdateScrapeIntervalConfig(@prometheusCollectorHealthDefaultFile, prometheusCollectorHealthInterval) - defaultConfigs.push(@prometheusCollectorHealthDefaultFile) - end - - if !ENV["AZMON_PROMETHEUS_WINDOWSEXPORTER_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_WINDOWSEXPORTER_SCRAPING_ENABLED"].downcase == "true" - winexporterMetricsKeepListRegex = @regexHash["WINDOWSEXPORTER_METRICS_KEEP_LIST_REGEX"] - windowsexporterScrapeInterval = @intervalHash["WINDOWSEXPORTER_SCRAPE_INTERVAL"] - if currentControllerType == @replicasetControllerType && advancedMode == false && ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@windowsexporterDefaultRsSimpleFile, windowsexporterScrapeInterval) - if !winexporterMetricsKeepListRegex.nil? && !winexporterMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@windowsexporterDefaultRsSimpleFile, winexporterMetricsKeepListRegex) - end - contents = File.read(@windowsexporterDefaultRsSimpleFile) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@windowsexporterDefaultRsSimpleFile, "w") { |file| file.puts contents } - defaultConfigs.push(@windowsexporterDefaultRsSimpleFile) - elsif currentControllerType == @daemonsetControllerType && advancedMode == true && windowsDaemonset == true && ENV["OS_TYPE"].downcase == "windows" - UpdateScrapeIntervalConfig(@windowsexporterDefaultDsFile, windowsexporterScrapeInterval) - if !winexporterMetricsKeepListRegex.nil? && !winexporterMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@windowsexporterDefaultDsFile, winexporterMetricsKeepListRegex) - end - contents = File.read(@windowsexporterDefaultDsFile) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@windowsexporterDefaultDsFile, "w") { |file| file.puts contents } - defaultConfigs.push(@windowsexporterDefaultDsFile) - end - end - - if !ENV["AZMON_PROMETHEUS_WINDOWSKUBEPROXY_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_WINDOWSKUBEPROXY_SCRAPING_ENABLED"].downcase == "true" - winkubeproxyMetricsKeepListRegex = @regexHash["WINDOWSKUBEPROXY_METRICS_KEEP_LIST_REGEX"] - windowskubeproxyScrapeInterval = @intervalHash["WINDOWSKUBEPROXY_SCRAPE_INTERVAL"] - if currentControllerType == @replicasetControllerType && advancedMode == false && ENV["OS_TYPE"].downcase == "linux" - UpdateScrapeIntervalConfig(@windowskubeproxyDefaultFileRsSimpleFile, windowskubeproxyScrapeInterval) - if !winkubeproxyMetricsKeepListRegex.nil? && !winkubeproxyMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@windowskubeproxyDefaultFileRsSimpleFile, winkubeproxyMetricsKeepListRegex) - end - contents = File.read(@windowskubeproxyDefaultFileRsSimpleFile) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@windowskubeproxyDefaultFileRsSimpleFile, "w") { |file| file.puts contents } - defaultConfigs.push(@windowskubeproxyDefaultFileRsSimpleFile) - elsif currentControllerType == @daemonsetControllerType && advancedMode == true && windowsDaemonset == true && ENV["OS_TYPE"].downcase == "windows" - UpdateScrapeIntervalConfig(@windowskubeproxyDefaultDsFile, windowskubeproxyScrapeInterval) - if !winkubeproxyMetricsKeepListRegex.nil? && !winkubeproxyMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@windowskubeproxyDefaultDsFile, winkubeproxyMetricsKeepListRegex) - end - contents = File.read(@windowskubeproxyDefaultDsFile) - contents = contents.gsub("$$NODE_IP$$", ENV["NODE_IP"]) - contents = contents.gsub("$$NODE_NAME$$", ENV["NODE_NAME"]) - File.open(@windowskubeproxyDefaultDsFile, "w") { |file| file.puts contents } - defaultConfigs.push(@windowskubeproxyDefaultDsFile) - end - end - - if !ENV["AZMON_PROMETHEUS_POD_ANNOTATION_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_POD_ANNOTATION_SCRAPING_ENABLED"].downcase == "true" && currentControllerType == @replicasetControllerType - podannotationNamespacesRegex = ENV["AZMON_PROMETHEUS_POD_ANNOTATION_NAMESPACES_REGEX"] - podannotationMetricsKeepListRegex = @regexHash["POD_ANNOTATION_METRICS_KEEP_LIST_REGEX"] - podannotationScrapeInterval = @intervalHash["POD_ANNOTATION_SCRAPE_INTERVAL"] - UpdateScrapeIntervalConfig(@podannotationsDefaultFile, podannotationScrapeInterval) - if !podannotationMetricsKeepListRegex.nil? && !podannotationMetricsKeepListRegex.empty? - AppendMetricRelabelConfig(@podannotationsDefaultFile, podannotationMetricsKeepListRegex) - end - if !podannotationNamespacesRegex.nil? && !podannotationNamespacesRegex.empty? - relabelConfig = [{ "source_labels" => ["__meta_kubernetes_namespace"], "action" => "keep", "regex" => podannotationNamespacesRegex }] - AppendRelabelConfig(@podannotationsDefaultFile, relabelConfig, podannotationNamespacesRegex) - end - defaultConfigs.push(@podannotationsDefaultFile) - end - - @mergedDefaultConfigs = mergeDefaultScrapeConfigs(defaultConfigs) - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while merging default scrape targets - #{errorStr}. No default scrape targets will be included") - @mergedDefaultConfigs = "" - end -end - -def mergeDefaultScrapeConfigs(defaultScrapeConfigs) - mergedDefaultConfigs = "" - begin - if defaultScrapeConfigs.length > 0 - mergedDefaultConfigs = YAML.load("scrape_configs:") - # Load each of the default scrape configs and merge them - defaultScrapeConfigs.each { |defaultScrapeConfig| - # Load yaml from default config - defaultConfigYaml = YAML.load(File.read(defaultScrapeConfig)) - mergedDefaultConfigs = mergedDefaultConfigs.deep_merge!(defaultConfigYaml) - } - end - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Done merging #{defaultScrapeConfigs.length} default prometheus config(s)") - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while adding default scrape config- #{errorStr}. No default scrape targets will be included") - mergedDefaultConfigs = "" - end - return mergedDefaultConfigs -end - -def mergeDefaultAndCustomScrapeConfigs(customPromConfig) - mergedConfigYaml = "" - begin - if !@mergedDefaultConfigs.nil? && !@mergedDefaultConfigs.empty? - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Merging default and custom scrape configs") - customPrometheusConfig = YAML.load(customPromConfig) - mergedConfigs = @mergedDefaultConfigs.deep_merge!(customPrometheusConfig) - mergedConfigYaml = YAML::dump(mergedConfigs) - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Done merging default scrape config(s) with custom prometheus config, writing them to file") - else - ConfigParseErrorLogger.logWarning(LOGGING_PREFIX, "The merged default scrape config is nil or empty, using only custom scrape config") - mergedConfigYaml = customPromConfig - end - File.open(@promMergedConfigPath, "w") { |file| file.puts mergedConfigYaml } - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while merging default and custom scrape configs- #{errorStr}") - end -end - -#this will enforce num labels, label name length & label value length for every scrape job to be with-in azure monitor supported limits -# by injecting these into every custom scrape job's config. For default scrape jobs, this is already included in them. We do this here, so the config validation can happen after we inject these into the custom scrape jobs . -def setLabelLimitsPerScrape(prometheusConfigString) - customConfig = prometheusConfigString - ConfigParseErrorLogger.log(LOGGING_PREFIX, "setLabelLimitsPerScrape()") - begin - if !customConfig.nil? && !customConfig.empty? - limitedCustomConfig = YAML.load(customConfig) - limitedCustomscrapes = limitedCustomConfig["scrape_configs"] - if !limitedCustomscrapes.nil? && !limitedCustomscrapes.empty? - limitedCustomscrapes.each { |scrape| - scrape["label_limit"] = 63 - scrape["label_name_length_limit"] = 511 - scrape["label_value_length_limit"] = 1023 - ConfigParseErrorLogger.log(LOGGING_PREFIX, " Successfully set label limits in custom scrape config for job #{scrape["job_name"]}") - } - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Done setting label limits for custom scrape config ...") - return YAML::dump(limitedCustomConfig) - else - ConfigParseErrorLogger.logWarning(LOGGING_PREFIX, "No Jobs found to set label limits while processing custom scrape config") - return prometheusConfigString - end - else - ConfigParseErrorLogger.logWarning(LOGGING_PREFIX, "Nothing to set for label limits while processing custom scrape config") - return prometheusConfigString - end - rescue => errStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception when setting label limits while processing custom scrape config - #{errStr}") - return prometheusConfigString - end -end - -# Populate default scrape config(s) if AZMON_PROMETHEUS_NO_DEFAULT_SCRAPING_ENABLED is set to false -# and write them as a collector config file, in case the custom config validation fails, -# and we need to fall back to defaults -def writeDefaultScrapeTargetsFile() - ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "Start Merging Default and Custom Prometheus Config") - if !ENV["AZMON_PROMETHEUS_NO_DEFAULT_SCRAPING_ENABLED"].nil? && ENV["AZMON_PROMETHEUS_NO_DEFAULT_SCRAPING_ENABLED"].downcase == "false" - begin - loadRegexHash - loadIntervalHash - populateDefaultPrometheusConfig - if !@mergedDefaultConfigs.nil? && !@mergedDefaultConfigs.empty? - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Starting to merge default prometheus config values in collector template as backup") - mergedDefaultConfigYaml = YAML::dump(@mergedDefaultConfigs) - File.open(@mergedDefaultConfigPath, "w") { |file| file.puts mergedDefaultConfigYaml } - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Error while populating default scrape targets and writing them to the default scrape targets file") - end - end -end - -def setDefaultFileScrapeInterval(scrapeInterval) - defaultFilesArray = [ - @kubeletDefaultFileRsSimple, @kubeletDefaultFileRsAdvanced, @kubeletDefaultFileDs, @kubeletDefaultFileRsAdvancedWindowsDaemonset, - @corednsDefaultFile, @cadvisorDefaultFileRsSimple, @cadvisorDefaultFileRsAdvanced, @cadvisorDefaultFileDs, @kubeproxyDefaultFile, - @apiserverDefaultFile, @kubestateDefaultFile, @nodeexporterDefaultFileRsSimple, @nodeexporterDefaultFileRsAdvanced, @nodeexporterDefaultFileDs, - @prometheusCollectorHealthDefaultFile, @windowsexporterDefaultRsSimpleFile, @windowsexporterDefaultDsFile, - @windowskubeproxyDefaultFileRsSimpleFile, @windowskubeproxyDefaultDsFile, @podannotationsDefaultFile - ] - - defaultFilesArray.each { |currentFile| - contents = File.read(currentFile) - contents = contents.gsub("$$SCRAPE_INTERVAL$$", scrapeInterval) - File.open(currentFile, "w") { |file| file.puts contents } - } -end - -def setGlobalScrapeConfigInDefaultFilesIfExists(configString) - customConfig = YAML.load(configString) - # set scrape interval to 30s for updating the default merged config - scrapeInterval = "30s" - if customConfig.has_key?("global") && customConfig["global"].has_key?("scrape_interval") - scrapeInterval = customConfig["global"]["scrape_interval"] - # Checking to see if the duration matches the pattern specified in the prometheus config - # Link to documenation with regex pattern -> https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file - matched = /^((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0)$/.match(scrapeInterval) - if !matched - # set default global scrape interval to 1m if its not in the proper format - customConfig["global"]["scrape_interval"] = "1m" - scrapeInterval = "30s" - end - end - setDefaultFileScrapeInterval(scrapeInterval) - return YAML::dump(customConfig) -end - -prometheusConfigString = parseConfigMap -if !prometheusConfigString.nil? && !prometheusConfigString.empty? - modifiedPrometheusConfigString = setGlobalScrapeConfigInDefaultFilesIfExists(prometheusConfigString) - writeDefaultScrapeTargetsFile() - #set label limits for every custom scrape job, before merging the default & custom config - labellimitedconfigString = setLabelLimitsPerScrape(modifiedPrometheusConfigString) - mergeDefaultAndCustomScrapeConfigs(labellimitedconfigString) -else - setDefaultFileScrapeInterval("30s") - writeDefaultScrapeTargetsFile() -end -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "Done Merging Default and Custom Prometheus Config") diff --git a/otelcollector/configmapparser/tomlparser-debug-mode.rb b/otelcollector/configmapparser/tomlparser-debug-mode.rb deleted file mode 100644 index 58d25bd71..000000000 --- a/otelcollector/configmapparser/tomlparser-debug-mode.rb +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -require "tomlrb" -require "yaml" -require_relative "ConfigParseErrorLogger" - -LOGGING_PREFIX = "debug-mode-config" -@configMapMountPath = "/etc/config/settings/debug-mode" -@configVersion = "" -@configSchemaVersion = "" -@replicasetCollectorConfig = "/opt/microsoft/otelcollector/collector-config-replicaset.yml" - -# Setting default values which will be used in case they are not set in the configmap or if configmap doesnt exist -@defaultEnabled = false - -def parseConfigMap - begin - if (File.file?(@configMapMountPath)) - parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true) - return parsedConfig - else - return nil - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while parsing config map for debug mode: #{errorStr}, using defaults, please check config map for errors") - return nil - end -end - -def populateSettingValuesFromConfigMap(parsedConfig) - begin - if !parsedConfig.nil? && !parsedConfig[:enabled].nil? - @defaultEnabled = parsedConfig[:enabled] - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap setting for debug mode: #{@defaultEnabled}") - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while reading config map settings for debug mode- #{errorStr}, using defaults, please check config map for errors") - end -end - -@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"] -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "Start debug-mode Settings Processing") -if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version, so hardcoding it - configMapSettings = parseConfigMap - if !configMapSettings.nil? - populateSettingValuesFromConfigMap(configMapSettings) - end -else - if (File.file?(@configMapMountPath)) - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version") - end -end - -# Write the settings to file, so that they can be set as environment variables -file = File.open("/opt/microsoft/configmapparser/config_debug_mode_env_var", "w") - -if !file.nil? - if !ENV["OS_TYPE"].nil? && ENV["OS_TYPE"].downcase == "linux" - file.write("export DEBUG_MODE_ENABLED=#{@defaultEnabled}\n") - else - file.write("DEBUG_MODE_ENABLED=#{@defaultEnabled}\n") - end - - file.close -else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while opening file for writing prometheus-collector config environment variables") -end -# Adding logic to set otlp in service pipeline metrics when debug mode is enabled. This is done in promconfigvalidator for daemonset. -# We need to do this here for the replicaset since we don't run the promconfigvalidator for rs config. -if @defaultEnabled == true - begin - controllerType = ENV["CONTROLLER_TYPE"] - if !controllerType.nil? && !controllerType.empty? && controllerType == "ReplicaSet" - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Setting otlp in the exporter metrics for service pipeline since debug mode is enabled ...") - config = YAML.load(File.read(@replicasetCollectorConfig)) - if !config.nil? - config["service"]["pipelines"]["metrics"]["exporters"] = ["otlp", "prometheus"] - cfgYamlWithDebugModeSettings = YAML::dump(config) - File.open(@replicasetCollectorConfig, "w") { |file| file.puts cfgYamlWithDebugModeSettings } - end - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Done setting otlp in the exporter metrics for service pipeline.") - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while setting otlp in the exporter metrics for service pipeline when debug mode is enabled - #{errorStr}") - end -end - -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "End debug-mode Settings Processing") diff --git a/otelcollector/configmapparser/tomlparser-default-scrape-settings.rb b/otelcollector/configmapparser/tomlparser-default-scrape-settings.rb deleted file mode 100644 index 84441723f..000000000 --- a/otelcollector/configmapparser/tomlparser-default-scrape-settings.rb +++ /dev/null @@ -1,195 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -require "tomlrb" -require_relative "ConfigParseErrorLogger" - -LOGGING_PREFIX = "default-scrape-settings" - -@configMapMountPath = "/etc/config/settings/default-scrape-settings-enabled" -@configVersion = "" -@configSchemaVersion = "" - -@kubeletEnabled = true -@corednsEnabled = true -@cadvisorEnabled = true -@kubeproxyEnabled = true -@apiserverEnabled = true -@kubestateEnabled = true -@nodeexporterEnabled = true -@prometheusCollectorHealthEnabled = true -@podannotationEnabled = false -@windowsexporterEnabled = false -@windowskubeproxyEnabled = false -@kappiebasicEnabled = true -@networkobservabilityRetinaEnabled = true -@networkobservabilityHubbleEnabled = true -@networkobservabilityCiliumEnabled = true -@noDefaultsEnabled = false -@sendDSUpMetric = false - -# Use parser to parse the configmap toml file to a ruby structure -def parseConfigMap - begin - # Check to see if config map is created - if (File.file?(@configMapMountPath)) - parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true) - return parsedConfig - else - ConfigParseErrorLogger.logWarning(LOGGING_PREFIX, "configmapprometheus-collector-configmap for scrape targets not mounted, using defaults") - return nil - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while parsing config map for default scrape settings: #{errorStr}, using defaults, please check config map for errors") - return nil - end -end - -# Use the ruby structure created after config parsing to set the right values to be used for otel collector settings -def populateSettingValuesFromConfigMap(parsedConfig) - begin - if !parsedConfig[:kubelet].nil? - @kubeletEnabled = parsedConfig[:kubelet] - puts "config::Using configmap scrape settings for kubelet: #{@kubeletEnabled}" - end - if !parsedConfig[:coredns].nil? - @corednsEnabled = parsedConfig[:coredns] - puts "config::Using configmap scrape settings for coredns: #{@corednsEnabled}" - end - if !parsedConfig[:cadvisor].nil? - @cadvisorEnabled = parsedConfig[:cadvisor] - puts "config::Using configmap scrape settings for cadvisor: #{@cadvisorEnabled}" - end - if !parsedConfig[:kubeproxy].nil? - @kubeproxyEnabled = parsedConfig[:kubeproxy] - puts "config::Using configmap scrape settings for kubeproxy: #{@kubeproxyEnabled}" - end - if !parsedConfig[:apiserver].nil? - @apiserverEnabled = parsedConfig[:apiserver] - puts "config::Using configmap scrape settings for apiserver: #{@apiserverEnabled}" - end - if !parsedConfig[:kubestate].nil? - @kubestateEnabled = parsedConfig[:kubestate] - puts "config::Using configmap scrape settings for kubestate: #{@kubestateEnabled}" - end - if !parsedConfig[:nodeexporter].nil? - @nodeexporterEnabled = parsedConfig[:nodeexporter] - puts "config::Using configmap scrape settings for nodeexporter: #{@nodeexporterEnabled}" - end - if !parsedConfig[:prometheuscollectorhealth].nil? - @prometheusCollectorHealthEnabled = parsedConfig[:prometheuscollectorhealth] - puts "config::Using configmap scrape settings for prometheuscollectorhealth: #{@prometheusCollectorHealthEnabled}" - end - if !parsedConfig[:windowsexporter].nil? - @windowsexporterEnabled = parsedConfig[:windowsexporter] - puts "config::Using configmap scrape settings for windowsexporter: #{@windowsexporterEnabled}" - end - if !parsedConfig[:windowskubeproxy].nil? - @windowskubeproxyEnabled = parsedConfig[:windowskubeproxy] - puts "config::Using configmap scrape settings for windowskubeproxy: #{@windowskubeproxyEnabled}" - end - if !ENV['AZMON_PROMETHEUS_POD_ANNOTATION_NAMESPACES_REGEX'].nil? && !ENV['AZMON_PROMETHEUS_POD_ANNOTATION_NAMESPACES_REGEX'].empty? - @podannotationEnabled = "true" - puts "config::Using configmap scrape settings for podannotations: #{@podannotationEnabled}" - end - if !parsedConfig[:kappiebasic].nil? - @kappiebasicEnabled = parsedConfig[:kappiebasic] - puts "config::Using configmap scrape settings for kappiebasic: #{@kappiebasicEnabled}" - end - if !parsedConfig[:networkobservabilityRetina].nil? - @networkobservabilityRetinaEnabled = parsedConfig[:networkobservabilityRetina] - puts "config::Using configmap scrape settings for networkobservabilityRetina: #{@networkobservabilityRetinaEnabled}" - end - if !parsedConfig[:networkobservabilityHubble].nil? - @networkobservabilityHubbleEnabled = parsedConfig[:networkobservabilityHubble] - puts "config::Using configmap scrape settings for networkobservabilityHubble: #{@networkobservabilityHubbleEnabled}" - end - if !parsedConfig[:networkobservabilityCilium].nil? - @networkobservabilityCiliumEnabled = parsedConfig[:networkobservabilityCilium] - puts "config::Using configmap scrape settings for networkobservabilityCilium: #{@networkobservabilityCiliumEnabled}" - end - - windowsDaemonset = false - if ENV["WINMODE"].nil? && ENV["WINMODE"].strip.downcase == "advanced" - windowsDaemonset = true - end - - if ENV["MODE"].nil? && ENV["MODE"].strip.downcase == "advanced" - controllerType = ENV["CONTROLLER_TYPE"] - if controllerType == "DaemonSet" && ENV["OS_TYPE"].downcase == "windows" && !@windowsexporterEnabled && !@windowskubeproxyEnabled && !@kubeletEnabled && !@prometheusCollectorHealthEnabled && !@kappiebasicEnabled - @noDefaultsEnabled = true - elsif controllerType == "DaemonSet" && ENV["OS_TYPE"].downcase == "linux" && !@kubeletEnabled && !@cadvisorEnabled && !@nodeexporterEnabled && !@prometheusCollectorHealthEnabled && !kappiebasicEnabled - @noDefaultsEnabled = true - elsif controllerType == "ReplicaSet" && @sendDsUpMetric && !@kubeletEnabled && !@cadvisorEnabled && !@nodeexporterEnabled && !@corednsEnabled && !@kubeproxyEnabled && !@apiserverEnabled && !@kubestateEnabled && !@windowsexporterEnabled && !@windowskubeproxyEnabled && !@prometheusCollectorHealthEnabled && !@podannotationEnabled - @noDefaultsEnabled = true - elsif controllerType == "ReplicaSet" && !@sendDsUpMetric && windowsDaemonset && !@corednsEnabled && !@kubeproxyEnabled && !@apiserverEnabled && !@kubestateEnabled && !@prometheusCollectorHealthEnabled && !@podannotationEnabled - @noDefaultsEnabled = true - # Windows daemonset is not enabled so Windows kube-proxy and node-exporter are scraped from replica - elsif controllerType == "ReplicaSet" && !@sendDsUpMetric && !windowsDaemonset && !@corednsEnabled && !@kubeproxyEnabled && !@apiserverEnabled && !@kubestateEnabled && !@windowsexporterEnabled && !@windowskubeproxyEnabled && !@prometheusCollectorHealthEnabled && !@podannotationEnabled - @noDefaultsEnabled = true - end - elsif !@kubeletEnabled && !@corednsEnabled && !@cadvisorEnabled && !@kubeproxyEnabled && !@apiserverEnabled && !@kubestateEnabled && !@nodeexporterEnabled && !@windowsexporterEnabled && !@windowskubeproxyEnabled && !@prometheusCollectorHealthEnabled && !@podannotationEnabled - @noDefaultsEnabled = true - end - if @noDefaultsEnabled - ConfigParseErrorLogger.logWarning(LOGGING_PREFIX, "No default scrape configs enabled") - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while reading config map settings for default scrape settings - #{errorStr}, using defaults, please check config map for errors") - end -end - -@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"] -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "Start default-scrape-settings Processing") -# set default targets for MAC mode -if !ENV['MAC'].nil? && !ENV['MAC'].empty? && ENV['MAC'].strip.downcase == "true" - ConfigParseErrorLogger.logWarning(LOGGING_PREFIX, "MAC mode is enabled. Only enabling targets kubestate,cadvisor,kubelet,kappiebasic,networkobservabilityRetina,networkobservabilityHubble,networkobservabilityCilium & nodeexporter for linux before config map processing....") - - @corednsEnabled = false - @kubeproxyEnabled = false - @apiserverEnabled = false - @prometheusCollectorHealthEnabled = false - -end -if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version, so hardcoding it - configMapSettings = parseConfigMap - if !configMapSettings.nil? - populateSettingValuesFromConfigMap(configMapSettings) - end -else - if (File.file?(@configMapMountPath)) - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version") - end -end - -# Write the settings to file, so that they can be set as environment variables -file = File.open("/opt/microsoft/configmapparser/config_default_scrape_settings_env_var", "w") - -$export = "export " -if !ENV['OS_TYPE'].nil? && ENV['OS_TYPE'].downcase == "windows" - $export = ""; -end - -if !file.nil? - file.write($export + "AZMON_PROMETHEUS_KUBELET_SCRAPING_ENABLED=#{@kubeletEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_COREDNS_SCRAPING_ENABLED=#{@corednsEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_CADVISOR_SCRAPING_ENABLED=#{@cadvisorEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_KUBEPROXY_SCRAPING_ENABLED=#{@kubeproxyEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_APISERVER_SCRAPING_ENABLED=#{@apiserverEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_KUBESTATE_SCRAPING_ENABLED=#{@kubestateEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_NODEEXPORTER_SCRAPING_ENABLED=#{@nodeexporterEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_NO_DEFAULT_SCRAPING_ENABLED=#{@noDefaultsEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_COLLECTOR_HEALTH_SCRAPING_ENABLED=#{@prometheusCollectorHealthEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_WINDOWSEXPORTER_SCRAPING_ENABLED=#{@windowsexporterEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_WINDOWSKUBEPROXY_SCRAPING_ENABLED=#{@windowskubeproxyEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_KAPPIEBASIC_SCRAPING_ENABLED=#{@kappiebasicEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_NETWORKOBSERVABILITYRETINA_SCRAPING_ENABLED=#{@networkobservabilityRetinaEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_NETWORKOBSERVABILITYHUBBLE_SCRAPING_ENABLED=#{@networkobservabilityHubbleEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_NETWORKOBSERVABILITYCILIUM_SCRAPING_ENABLED=#{@networkobservabilityCiliumEnabled}\n") - file.write($export + "AZMON_PROMETHEUS_POD_ANNOTATION_SCRAPING_ENABLED=#{@podannotationEnabled}\n") - # Close file after writing all metric collection setting environment variables - file.close -else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while opening file for writing default-scrape-settings config environment variables") -end -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "End default-scrape-settings Processing") diff --git a/otelcollector/configmapparser/tomlparser-default-targets-metrics-keep-list.rb b/otelcollector/configmapparser/tomlparser-default-targets-metrics-keep-list.rb deleted file mode 100644 index 4de66f9cf..000000000 --- a/otelcollector/configmapparser/tomlparser-default-targets-metrics-keep-list.rb +++ /dev/null @@ -1,392 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -require "tomlrb" -if (!ENV["OS_TYPE"].nil? && ENV["OS_TYPE"].downcase == "linux") - require "re2" -end -require "yaml" -require_relative "ConfigParseErrorLogger" -require_relative "tomlparser-utils" - -LOGGING_PREFIX = "default-scrape-keep-lists" - -@configMapMountPath = "/etc/config/settings/default-targets-metrics-keep-list" -@configVersion = "" -@configSchemaVersion = "" - -@kubeletRegex = "" -@corednsRegex = "" -@cadvisorRegex = "" -@kubeproxyRegex = "" -@apiserverRegex = "" -@kubestateRegex = "" -@nodeexporterRegex = "" -@windowsexporterRegex = "" -@windowskubeproxyRegex = "" -@podannotationRegex = "" -@kappiebasicRegex = "" -@networkobservabilityRetinaRegex = "" -@networkobservabilityHubbleRegex = "" -@networkobservabilityCiliumRegex = "" - -#This will always be string "true" as we set the string value in the chart for both MAC and non MAC modes -@minimalIngestionProfile = ENV["MINIMAL_INGESTION_PROFILE"] - -@isMacMode = false -if !ENV["MAC"].nil? && !ENV["MAC"].empty? && ENV["MAC"].strip.downcase == "true" - @isMacMode = true -end - -# minimal profile -- list of metrics to white-list for each target for 1p mode (non MAC). This list includes metrics used by default dashboards + alerts. -@kubeletRegex_minimal = "kubelet_volume_stats_used_bytes|kubelet_node_name|kubelet_running_pods|kubelet_running_pod_count|kubelet_running_containers|kubelet_running_container_count|volume_manager_total_volumes|kubelet_node_config_error|kubelet_runtime_operations_total|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_duration_seconds|kubelet_runtime_operations_duration_seconds_bucket|kubelet_runtime_operations_duration_seconds_sum|kubelet_runtime_operations_duration_seconds_count|kubelet_pod_start_duration_seconds|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_sum|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_sum|kubelet_pod_worker_duration_seconds_count|storage_operation_duration_seconds|storage_operation_duration_seconds_bucket|storage_operation_duration_seconds_sum|storage_operation_duration_seconds_count|storage_operation_errors_total|kubelet_cgroup_manager_duration_seconds|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_sum|kubelet_cgroup_manager_duration_seconds_count|kubelet_pleg_relist_duration_seconds|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_sum|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_interval_seconds|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pleg_relist_interval_seconds_sum|kubelet_pleg_relist_interval_seconds_count|rest_client_requests_total|rest_client_request_duration_seconds|rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_available_bytes|kubelet_volume_stats_inodes_used|kubelet_volume_stats_inodes|kubernetes_build_info|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_server_expiration_renew_errors|kubelet_certificate_manager_server_ttl_seconds|kubelet_volume_stats_inodes_free" -@corednsRegex_minimal = "coredns_build_info|coredns_panics_total|coredns_dns_responses_total|coredns_forward_responses_total|coredns_dns_request_duration_seconds|coredns_dns_request_duration_seconds_bucket|coredns_dns_request_duration_seconds_sum|coredns_dns_request_duration_seconds_count|coredns_forward_request_duration_seconds|coredns_forward_request_duration_seconds_bucket|coredns_forward_request_duration_seconds_sum|coredns_forward_request_duration_seconds_count|coredns_dns_requests_total|coredns_forward_requests_total|coredns_cache_hits_total|coredns_cache_misses_total|coredns_cache_entries|coredns_plugin_enabled|coredns_dns_request_size_bytes|coredns_dns_request_size_bytes_bucket|coredns_dns_request_size_bytes_sum|coredns_dns_request_size_bytes_count|coredns_dns_response_size_bytes|coredns_dns_response_size_bytes_bucket|coredns_dns_response_size_bytes_sum|coredns_dns_response_size_bytes_count|coredns_dns_response_size_bytes_bucket|coredns_dns_response_size_bytes_sum|coredns_dns_response_size_bytes_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines|kubernetes_build_info" -@cadvisorRegex_minimal = "container_spec_cpu_period|container_spec_cpu_quota|container_cpu_usage_seconds_total|container_memory_rss|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_network_receive_packets_total|container_network_transmit_packets_total|container_network_receive_packets_dropped_total|container_network_transmit_packets_dropped_total|container_fs_reads_total|container_fs_writes_total|container_fs_reads_bytes_total|container_fs_writes_bytes_total|container_memory_working_set_bytes|container_memory_cache|container_memory_swap|container_cpu_cfs_throttled_periods_total|container_cpu_cfs_periods_total|container_memory_usage_bytes|kubernetes_build_info" -@kubeproxyRegex_minimal = "kubeproxy_sync_proxy_rules_duration_seconds|kubeproxy_sync_proxy_rules_duration_seconds_bucket|kubeproxy_sync_proxy_rules_duration_seconds_sum|kubeproxy_sync_proxy_rules_duration_seconds_count|kubeproxy_network_programming_duration_seconds|kubeproxy_network_programming_duration_seconds_bucket|kubeproxy_network_programming_duration_seconds_sum|kubeproxy_network_programming_duration_seconds_count|rest_client_requests_total|rest_client_request_duration_seconds|rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines|kubernetes_build_info" -@apiserverRegex_minimal = "apiserver_request_duration_seconds|apiserver_request_duration_seconds_bucket|apiserver_request_duration_seconds_sum|apiserver_request_duration_seconds_count|apiserver_request_total|workqueue_adds_total|workqueue_depth|workqueue_queue_duration_seconds|workqueue_queue_duration_seconds_bucket|workqueue_queue_duration_seconds_sum|workqueue_queue_duration_seconds_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines|kubernetes_build_info|apiserver_request_slo_duration_seconds_bucket|apiserver_request_slo_duration_seconds_sum|apiserver_request_slo_duration_seconds_count" -@kubestateRegex_minimal = "kube_horizontalpodautoscaler_spec_min_replicas|kube_horizontalpodautoscaler_status_desired_replicas|kube_job_status_active|kube_node_status_capacity|kube_job_status_succeeded|kube_job_spec_completions|kube_daemonset_status_number_misscheduled|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_current_number_scheduled|kube_daemonset_status_number_ready|kube_deployment_spec_replicas|kube_deployment_status_replicas_ready|kube_pod_container_status_last_terminated_reason|kube_node_status_condition|kube_pod_container_status_restarts_total|kube_pod_container_resource_requests|kube_pod_status_phase|kube_pod_container_resource_limits|kube_node_status_allocatable|kube_pod_info|kube_pod_owner|kube_resourcequota|kube_statefulset_replicas|kube_statefulset_status_replicas|kube_statefulset_status_replicas_ready|kube_statefulset_status_replicas_current|kube_statefulset_status_replicas_updated|kube_namespace_status_phase|kube_node_info|kube_statefulset_metadata_generation|kube_pod_labels|kube_pod_annotations|kube_horizontalpodautoscaler_status_current_replicas|kube_horizontalpodautoscaler_spec_max_replicas|kube_node_spec_taint|kube_pod_container_status_waiting_reason|kube_job_failed|kube_job_status_start_time|kube_deployment_status_replicas_available|kube_deployment_status_replicas_updated|kube_replicaset_owner|kubernetes_build_info|kube_pod_container_info|kube_persistentvolumeclaim_access_mode|kube_persistentvolumeclaim_labels|kube_persistentvolume_status_phase" -@nodeexporterRegex_minimal = "node_filesystem_readonly|node_cpu_seconds_total|node_memory_MemAvailable_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemFree_bytes|node_memory_Slab_bytes|node_memory_MemTotal_bytes|node_netstat_Tcp_RetransSegs|node_netstat_Tcp_OutSegs|node_netstat_TcpExt_TCPSynRetrans|node_load1|node_load5|node_load15|node_disk_read_bytes_total|node_disk_written_bytes_total|node_disk_io_time_seconds_total|node_filesystem_size_bytes|node_filesystem_avail_bytes|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_vmstat_pgmajfault|node_network_receive_drop_total|node_network_transmit_drop_total|node_disk_io_time_weighted_seconds_total|node_exporter_build_info|node_time_seconds|node_uname_info|kubernetes_build_info" -@kappiebasicRegex_minimal = "kappie.*" -@networkobservabilityRetinaRegex_minimal = "networkobservability.*" -@networkobservabilityHubbleRegex_minimal = "hubble_dns_queries_total|hubble_dns_responses_total|hubble_drop_total|hubble_tcp_flags_total" -@networkobservabilityCiliumRegex_minimal = "cilium_drop.*|cilium_forward.*" -@windowsexporterRegex_minimal = "windows_system_system_up_time|windows_cpu_time_total|windows_memory_available_bytes|windows_os_visible_memory_bytes|windows_memory_cache_bytes|windows_memory_modified_page_list_bytes|windows_memory_standby_cache_core_bytes|windows_memory_standby_cache_normal_priority_bytes|windows_memory_standby_cache_reserve_bytes|windows_memory_swap_page_operations_total|windows_logical_disk_read_seconds_total|windows_logical_disk_write_seconds_total|windows_logical_disk_size_bytes|windows_logical_disk_free_bytes|windows_net_bytes_total|windows_net_packets_received_discarded_total|windows_net_packets_outbound_discarded_total|windows_container_available|windows_container_cpu_usage_seconds_total|windows_container_memory_usage_commit_bytes|windows_container_memory_usage_private_working_set_bytes|windows_container_network_receive_bytes_total|windows_container_network_transmit_bytes_total" -@windowskubeproxyRegex_minimal = "kubeproxy_sync_proxy_rules_duration_seconds|kubeproxy_sync_proxy_rules_duration_seconds_bucket|kubeproxy_sync_proxy_rules_duration_seconds_sum|kubeproxy_sync_proxy_rules_duration_seconds_count|rest_client_requests_total|rest_client_request_duration_seconds|rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines" - -# minimal profile when MAC mode is enabled. This list includes metrics used by default dashboards + rec rules + alerts, when MAC mode is enabled. -@kubeletRegex_minimal_mac = "kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_used_bytes|kubelet_node_name|kubelet_running_pods|kubelet_running_pod_count|kubelet_running_sum_containers|kubelet_running_containers|kubelet_running_container_count|volume_manager_total_volumes|kubelet_node_config_error|kubelet_runtime_operations_total|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_duration_seconds_bucket|kubelet_runtime_operations_duration_seconds_sum|kubelet_runtime_operations_duration_seconds_count|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_sum|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_sum|kubelet_pod_worker_duration_seconds_count|storage_operation_duration_seconds_bucket|storage_operation_duration_seconds_sum|storage_operation_duration_seconds_count|storage_operation_errors_total|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_sum|kubelet_cgroup_manager_duration_seconds_count|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pleg_relist_interval_seconds_count|kubelet_pleg_relist_interval_seconds_sum|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_duration_seconds_sum|rest_client_requests_total|rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines|kubernetes_build_info|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_server_expiration_renew_errors|kubelet_certificate_manager_server_ttl_seconds|kubelet_volume_stats_available_bytes|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_inodes_free|kubelet_volume_stats_inodes_used|kubelet_volume_stats_inodes" -@corednsRegex_minimal_mac = "coredns_build_info|coredns_panics_total|coredns_dns_responses_total|coredns_forward_responses_total|coredns_dns_request_duration_seconds|coredns_dns_request_duration_seconds_bucket|coredns_dns_request_duration_seconds_sum|coredns_dns_request_duration_seconds_count|coredns_forward_request_duration_seconds|coredns_forward_request_duration_seconds_bucket|coredns_forward_request_duration_seconds_sum|coredns_forward_request_duration_seconds_count|coredns_dns_requests_total|coredns_forward_requests_total|coredns_cache_hits_total|coredns_cache_misses_total|coredns_cache_entries|coredns_plugin_enabled|coredns_dns_request_size_bytes|coredns_dns_request_size_bytes_bucket|coredns_dns_request_size_bytes_sum|coredns_dns_request_size_bytes_count|coredns_dns_response_size_bytes|coredns_dns_response_size_bytes_bucket|coredns_dns_response_size_bytes_sum|coredns_dns_response_size_bytes_count|coredns_dns_response_size_bytes_bucket|coredns_dns_response_size_bytes_sum|coredns_dns_response_size_bytes_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines|kubernetes_build_info" -@cadvisorRegex_minimal_mac = "container_spec_cpu_quota|container_spec_cpu_period|container_memory_rss|container_network_receive_bytes_total|container_network_transmit_bytes_total|container_network_receive_packets_total|container_network_transmit_packets_total|container_network_receive_packets_dropped_total|container_network_transmit_packets_dropped_total|container_fs_reads_total|container_fs_writes_total|container_fs_reads_bytes_total|container_fs_writes_bytes_total|container_cpu_usage_seconds_total|container_memory_working_set_bytes|container_memory_cache|container_memory_swap|container_cpu_cfs_throttled_periods_total|container_cpu_cfs_periods_total|container_memory_rss|kubernetes_build_info|container_start_time_seconds" -@kubeproxyRegex_minimal_mac = "kubeproxy_sync_proxy_rules_duration_seconds|kubeproxy_sync_proxy_rules_duration_seconds_bucket|kubeproxy_sync_proxy_rules_duration_seconds_sum|kubeproxy_sync_proxy_rules_duration_seconds_count|kubeproxy_network_programming_duration_seconds|kubeproxy_network_programming_duration_seconds_bucket|kubeproxy_network_programming_duration_seconds_sum|kubeproxy_network_programming_duration_seconds_count|rest_client_requests_total|rest_client_request_duration_seconds|rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines|kubernetes_build_info" -@apiserverRegex_minimal_mac = "apiserver_request_duration_seconds|apiserver_request_duration_seconds_bucket|apiserver_request_duration_seconds_sum|apiserver_request_duration_seconds_count|apiserver_request_total|workqueue_adds_total|workqueue_depth|workqueue_queue_duration_seconds|workqueue_queue_duration_seconds_bucket|workqueue_queue_duration_seconds_sum|workqueue_queue_duration_seconds_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines|kubernetes_build_info|apiserver_request_slo_duration_seconds_bucket|apiserver_request_slo_duration_seconds_sum|apiserver_request_slo_duration_seconds_count" -@kubestateRegex_minimal_mac = "kube_job_status_succeeded|kube_job_spec_completions|kube_daemonset_status_desired_number_scheduled|kube_daemonset_status_current_number_scheduled|kube_daemonset_status_number_misscheduled|kube_daemonset_status_number_ready|kube_deployment_status_replicas_ready|kube_pod_container_status_last_terminated_reason|kube_pod_container_status_waiting_reason|kube_pod_container_status_restarts_total|kube_node_status_allocatable|kube_pod_owner|kube_pod_container_resource_requests|kube_pod_status_phase|kube_pod_container_resource_limits|kube_replicaset_owner|kube_resourcequota|kube_namespace_status_phase|kube_node_status_capacity|kube_node_info|kube_pod_info|kube_deployment_spec_replicas|kube_deployment_status_replicas_available|kube_deployment_status_replicas_updated|kube_statefulset_status_replicas_ready|kube_statefulset_status_replicas|kube_statefulset_status_replicas_updated|kube_job_status_start_time|kube_job_status_active|kube_job_failed|kube_horizontalpodautoscaler_status_desired_replicas|kube_horizontalpodautoscaler_status_current_replicas|kube_horizontalpodautoscaler_spec_min_replicas|kube_horizontalpodautoscaler_spec_max_replicas|kubernetes_build_info|kube_node_status_condition|kube_node_spec_taint|kube_pod_container_info|kube_.*_labels|kube_.*_annotations|kube_service_info|kube_pod_container_status_running|kube_pod_container_status_waiting|kube_pod_container_status_terminated|kube_pod_container_state_started|kube_pod_created|kube_pod_start_time|kube_pod_init_container_info|kube_pod_init_container_status_terminated|kube_pod_init_container_status_terminated_reason|kube_pod_init_container_status_ready|kube_pod_init_container_resource_limits|kube_pod_init_container_status_running|kube_pod_init_container_status_waiting|kube_pod_init_container_status_restarts_total|kube_pod_container_status_ready|kube_pod_init_container_*|kube_pod_deletion_timestamp|kube_pod_status_reason|kube_pod_init_container_resource_requests|kube_persistentvolumeclaim_access_mode|kube_persistentvolumeclaim_labels|kube_persistentvolume_status_phase" -@nodeexporterRegex_minimal_mac = "node_filesystem_readonly|node_memory_MemTotal_bytes|node_cpu_seconds_total|node_memory_MemAvailable_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_MemFree_bytes|node_memory_Slab_bytes|node_filesystem_avail_bytes|node_filesystem_size_bytes|node_time_seconds|node_exporter_build_info|node_load1|node_vmstat_pgmajfault|node_network_receive_bytes_total|node_network_transmit_bytes_total|node_network_receive_drop_total|node_network_transmit_drop_total|node_disk_io_time_seconds_total|node_disk_io_time_weighted_seconds_total|node_load5|node_load15|node_disk_read_bytes_total|node_disk_written_bytes_total|node_uname_info|kubernetes_build_info|node_boot_time_seconds" -@kappiebasicRegex_minimal_mac = "kappie.*" -@networkobservabilityRetinaRegex_minimal_mac = "networkobservability.*" -@networkobservabilityHubbleRegex_minimal_mac = "hubble_dns_queries_total|hubble_dns_responses_total|hubble_drop_total|hubble_tcp_flags_total" -@networkobservabilityCiliumRegex_minimal_mac = "cilium_drop.*|cilium_forward.*" -@windowsexporterRegex_minimal_mac = "windows_system_system_up_time|windows_cpu_time_total|windows_memory_available_bytes|windows_os_visible_memory_bytes|windows_memory_cache_bytes|windows_memory_modified_page_list_bytes|windows_memory_standby_cache_core_bytes|windows_memory_standby_cache_normal_priority_bytes|windows_memory_standby_cache_reserve_bytes|windows_memory_swap_page_operations_total|windows_logical_disk_read_seconds_total|windows_logical_disk_write_seconds_total|windows_logical_disk_size_bytes|windows_logical_disk_free_bytes|windows_net_bytes_total|windows_net_packets_received_discarded_total|windows_net_packets_outbound_discarded_total|windows_container_available|windows_container_cpu_usage_seconds_total|windows_container_memory_usage_commit_bytes|windows_container_memory_usage_private_working_set_bytes|windows_container_network_receive_bytes_total|windows_container_network_transmit_bytes_total" -@windowskubeproxyRegex_minimal_mac = "kubeproxy_sync_proxy_rules_duration_seconds|kubeproxy_sync_proxy_rules_duration_seconds_bucket|kubeproxy_sync_proxy_rules_duration_seconds_sum|kubeproxy_sync_proxy_rules_duration_seconds_count|rest_client_requests_total|rest_client_request_duration_seconds|rest_client_request_duration_seconds_bucket|rest_client_request_duration_seconds_sum|rest_client_request_duration_seconds_count|process_resident_memory_bytes|process_cpu_seconds_total|go_goroutines" - -# Use parser to parse the configmap toml file to a ruby structure -def parseConfigMap - begin - # Check to see if config map is created - if (File.file?(@configMapMountPath)) - parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true) - return parsedConfig - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "configmap prometheus-collector-configmap for default-targets-metrics-keep-list not mounted, using defaults") - return nil - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while parsing config map for default-targets-metrics-keep-list: #{errorStr}, using defaults, please check config map for errors") - return nil - end -end - -# Use the ruby structure created after config parsing to set the right values to be used for otel collector settings -def populateSettingValuesFromConfigMap(parsedConfig) - begin - kubeletRegex = parsedConfig[:kubelet] - if !kubeletRegex.nil? && kubeletRegex.kind_of?(String) - if !kubeletRegex.empty? - if isValidRegex(kubeletRegex) == true - @kubeletRegex = kubeletRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for kubelet") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for kubelet") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "kubeletRegex either not specified or not of type string") - end - - corednsRegex = parsedConfig[:coredns] - if !corednsRegex.nil? && corednsRegex.kind_of?(String) - if !corednsRegex.empty? - if isValidRegex(corednsRegex) == true - @corednsRegex = corednsRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for coredns") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for coredns") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "corednsRegex either not specified or not of type string") - end - - cadvisorRegex = parsedConfig[:cadvisor] - if !cadvisorRegex.nil? && cadvisorRegex.kind_of?(String) - if !cadvisorRegex.empty? - if isValidRegex(cadvisorRegex) == true - @cadvisorRegex = cadvisorRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for cadvisor") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for cadvisor") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "cadvisorRegex either not specified or not of type string") - end - - kubeproxyRegex = parsedConfig[:kubeproxy] - if !kubeproxyRegex.nil? && kubeproxyRegex.kind_of?(String) - if !kubeproxyRegex.empty? - if isValidRegex(kubeproxyRegex) == true - @kubeproxyRegex = kubeproxyRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for kubeproxy") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for kubeproxy") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "kubeproxyRegex either not specified or not of type string") - end - - apiserverRegex = parsedConfig[:apiserver] - if !apiserverRegex.nil? && apiserverRegex.kind_of?(String) - if !apiserverRegex.empty? - if isValidRegex(apiserverRegex) == true - @apiserverRegex = apiserverRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for apiserver") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for apiserver") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "apiserverRegex either not specified or not of type string") - end - - kubestateRegex = parsedConfig[:kubestate] - if !kubestateRegex.nil? && kubestateRegex.kind_of?(String) - if !kubestateRegex.empty? - if isValidRegex(kubestateRegex) == true - @kubestateRegex = kubestateRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for kubestate") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for kubestate") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "kubestateRegex either not specified or not of type string") - end - - nodeexporterRegex = parsedConfig[:nodeexporter] - if !nodeexporterRegex.nil? && nodeexporterRegex.kind_of?(String) - if !nodeexporterRegex.empty? - if isValidRegex(nodeexporterRegex) == true - @nodeexporterRegex = nodeexporterRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for nodeexporter") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for nodeexporter") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "nodeexporterRegex either not specified or not of type string") - end - - kappiebasicRegex = parsedConfig[:kappiebasic] - if !kappiebasicRegex.nil? && kappiebasicRegex.kind_of?(String) - if !kappiebasicRegex.empty? - if isValidRegex(kappiebasicRegex) == true - @kappiebasicRegex = kappiebasicRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for kappiebasic") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for kappiebasic") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "kappiebasicRegex either not specified or not of type string") - end - - networkobservabilityRetinaRegex = parsedConfig[:networkobservabilityRetina] - if !networkobservabilityRetinaRegex.nil? && networkobservabilityRetinaRegex.kind_of?(String) - if !networkobservabilityRetinaRegex.empty? - if isValidRegex(networkobservabilityRetinaRegex) == true - @networkobservabilityRetinaRegex = networkobservabilityRetinaRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for networkobservabilityRetina") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for networkobservabilityRetina") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "networkobservabilityRetinaRegex either not specified or not of type string") - end - - networkobservabilityHubbleRegex = parsedConfig[:networkobservabilityHubble] - if !networkobservabilityHubbleRegex.nil? && networkobservabilityHubbleRegex.kind_of?(String) - if !networkobservabilityHubbleRegex.empty? - if isValidRegex(networkobservabilityHubbleRegex) == true - @networkobservabilityHubbleRegex = networkobservabilityHubbleRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for networkobservabilityHubble") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for networkobservabilityHubble") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "networkobservabilityHubbleRegex either not specified or not of type string") - end - - networkobservabilityCiliumRegex = parsedConfig[:networkobservabilityCilium] - if !networkobservabilityCiliumRegex.nil? && networkobservabilityCiliumRegex.kind_of?(String) - if !networkobservabilityCiliumRegex.empty? - if isValidRegex(networkobservabilityCiliumRegex) == true - @networkobservabilityCiliumRegex = networkobservabilityCiliumRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for networkobservabilityCilium") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for networkobservabilityCilium") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "networkobservabilityCiliumRegex either not specified or not of type string") - end - - - - windowsexporterRegex = parsedConfig[:windowsexporter] - if !windowsexporterRegex.nil? && windowsexporterRegex.kind_of?(String) - if !windowsexporterRegex.empty? - if isValidRegex(windowsexporterRegex) == true - @windowsexporterRegex = windowsexporterRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for windowsexporter") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for windowsexporter") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "windowsexporterRegex either not specified or not of type string") - end - - windowskubeproxyRegex = parsedConfig[:windowskubeproxy] - if !windowskubeproxyRegex.nil? && windowskubeproxyRegex.kind_of?(String) - if !windowskubeproxyRegex.empty? - if isValidRegex(windowskubeproxyRegex) == true - @windowskubeproxyRegex = windowskubeproxyRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for windowskubeproxy") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for windowskubeproxy") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "windowskubeproxyRegex either not specified or not of type string") - end - - podannotationRegex = parsedConfig[:podannotations] - if !podannotationRegex.nil? && podannotationRegex.kind_of?(String) - if !podannotationRegex.empty? - if isValidRegex(podannotationRegex) == true - @podannotationRegex = podannotationRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap metrics keep list regex for podannotations") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid keep list regex for podannotations") - end - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "podannotationRegex either not specified or not of type string") - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while reading config map settings for default targets metrics keep list - #{errorStr}, using defaults, please check config map for errors") - end - - # Provide for overwriting the chart setting for minimal ingestion profile using configmap for MAC mode - if @isMacMode == true - ConfigParseErrorLogger.log(LOGGING_PREFIX, "MAC mode set to true - Reading configmap setting for minimalingestionprofile") - minimalIngestionProfileSetting = parsedConfig[:minimalingestionprofile] - if !minimalIngestionProfileSetting.nil? - @minimalIngestionProfile = minimalIngestionProfileSetting.to_s.downcase #Doing this to keep it consistent in the check below for helm chart and configmap - end - end -end - -# -------Apply profile for ingestion-------- -# Logical OR-ing profile regex with customer provided regex -# so the theory here is -- -# if customer provided regex is valid, our regex validation for that will pass, and when minimal ingestion profile is true, a OR of customer provided regex with our minimal profile regex would be a valid regex as well, so we dont check again for the wholistic validation of merged regex -# if customer provided regex is invalid, our regex validation for customer provided regex will fail, and if minimal ingestion profile is enabled, we will use that and ignore customer provided one -def populateRegexValuesWithMinimalIngestionProfile - begin - if @minimalIngestionProfile == "true" - if @isMacMode == true - ConfigParseErrorLogger.log(LOGGING_PREFIX, "minimalIngestionProfile=true, MAC is enabled. Applying appropriate MAC Regexes") - @kubeletRegex = @kubeletRegex + "|" + @kubeletRegex_minimal_mac - @corednsRegex = @corednsRegex + "|" + @corednsRegex_minimal_mac - @cadvisorRegex = @cadvisorRegex + "|" + @cadvisorRegex_minimal_mac - @kubeproxyRegex = @kubeproxyRegex + "|" + @kubeproxyRegex_minimal_mac - @apiserverRegex = @apiserverRegex + "|" + @apiserverRegex_minimal_mac - @kubestateRegex = @kubestateRegex + "|" + @kubestateRegex_minimal_mac - @nodeexporterRegex = @nodeexporterRegex + "|" + @nodeexporterRegex_minimal_mac - @kappiebasicRegex = @kappiebasicRegex + "|" + @kappiebasicRegex_minimal_mac - @networkobservabilityRetinaRegex = @networkobservabilityRetinaRegex + "|" + @networkobservabilityRetinaRegex_minimal_mac - @networkobservabilityHubbleRegex = @networkobservabilityHubbleRegex + "|" + @networkobservabilityHubbleRegex_minimal_mac - @networkobservabilityCiliumRegex = @networkobservabilityCiliumRegex + "|" + @networkobservabilityCiliumRegex_minimal_mac - @windowsexporterRegex = @windowsexporterRegex + "|" + @windowsexporterRegex_minimal_mac - @windowskubeproxyRegex = @windowskubeproxyRegex + "|" + @windowskubeproxyRegex_minimal_mac - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "minimalIngestionProfile=true, MAC is not enabled. Applying appropriate non-MAC Regexes") - @kubeletRegex = @kubeletRegex + "|" + @kubeletRegex_minimal - @corednsRegex = @corednsRegex + "|" + @corednsRegex_minimal - @cadvisorRegex = @cadvisorRegex + "|" + @cadvisorRegex_minimal - @kubeproxyRegex = @kubeproxyRegex + "|" + @kubeproxyRegex_minimal - @apiserverRegex = @apiserverRegex + "|" + @apiserverRegex_minimal - @kubestateRegex = @kubestateRegex + "|" + @kubestateRegex_minimal - @nodeexporterRegex = @nodeexporterRegex + "|" + @nodeexporterRegex_minimal - @kappiebasicRegex = @kappiebasicRegex + "|" + @kappiebasicRegex_minimal - @networkobservabilityRetinaRegex = @networkobservabilityRetinaRegex + "|" + @networkobservabilityRetinaRegex_minimal - @networkobservabilityHubbleRegex = @networkobservabilityHubbleRegex + "|" + @networkobservabilityHubbleRegex_minimal - @networkobservabilityCiliumRegex = @networkobservabilityCiliumRegex + "|" + @networkobservabilityCiliumRegex_minimal - @windowsexporterRegex = @windowsexporterRegex + "|" + @windowsexporterRegex_minimal - @windowskubeproxyRegex = @windowskubeproxyRegex + "|" + @windowskubeproxyRegex_minimal - end - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while populating regex values with minimal ingestion profile - #{errorStr}, skipping applying minimal ingestion profile regexes") - end -end - -# ----End applying profile for ingestion-------- - -@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"] -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "Start default-targets-metrics-keep-list Processing") -if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version, so hardcoding it - configMapSettings = parseConfigMap - if !configMapSettings.nil? - populateSettingValuesFromConfigMap(configMapSettings) - end -else - if (File.file?(@configMapMountPath)) - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version") - end -end - -# Populate the regex values after reading the configmap settings based on the minimal ingestion profile value -populateRegexValuesWithMinimalIngestionProfile - -# Write the settings to file, so that they can be set as environment variables -file = File.open("/opt/microsoft/configmapparser/config_def_targets_metrics_keep_list_hash", "w") - -regexHash = {} -regexHash["KUBELET_METRICS_KEEP_LIST_REGEX"] = @kubeletRegex -regexHash["COREDNS_METRICS_KEEP_LIST_REGEX"] = @corednsRegex -regexHash["CADVISOR_METRICS_KEEP_LIST_REGEX"] = @cadvisorRegex -regexHash["KUBEPROXY_METRICS_KEEP_LIST_REGEX"] = @kubeproxyRegex -regexHash["APISERVER_METRICS_KEEP_LIST_REGEX"] = @apiserverRegex -regexHash["KUBESTATE_METRICS_KEEP_LIST_REGEX"] = @kubestateRegex -regexHash["NODEEXPORTER_METRICS_KEEP_LIST_REGEX"] = @nodeexporterRegex -regexHash["WINDOWSEXPORTER_METRICS_KEEP_LIST_REGEX"] = @windowsexporterRegex -regexHash["WINDOWSKUBEPROXY_METRICS_KEEP_LIST_REGEX"] = @windowskubeproxyRegex -regexHash["POD_ANNOTATION_METRICS_KEEP_LIST_REGEX"] = @podannotationRegex -regexHash["KAPPIEBASIC_METRICS_KEEP_LIST_REGEX"] = @kappiebasicRegex -regexHash["NETWORKOBSERVABILITYRETINA_METRICS_KEEP_LIST_REGEX"] = @networkobservabilityRetinaRegex -regexHash["NETWORKOBSERVABILITYHUBBLE_METRICS_KEEP_LIST_REGEX"] = @networkobservabilityHubbleRegex -regexHash["NETWORKOBSERVABILITYCILIUM_METRICS_KEEP_LIST_REGEX"] = @networkobservabilityCiliumRegex - -if !file.nil? - # Close file after writing regex keep list hash - # Writing it as yaml as it is easy to read and write hash - file.write(regexHash.to_yaml) - file.close -else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while opening file for writing default-targets-metrics-keep-list regex config hash") -end -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "End default-targets-metrics-keep-list Processing") diff --git a/otelcollector/configmapparser/tomlparser-pod-annotation-based-scraping.rb b/otelcollector/configmapparser/tomlparser-pod-annotation-based-scraping.rb deleted file mode 100644 index 81924a907..000000000 --- a/otelcollector/configmapparser/tomlparser-pod-annotation-based-scraping.rb +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -require "tomlrb" -require "yaml" -require_relative "ConfigParseErrorLogger" -require_relative "tomlparser-utils" - -LOGGING_PREFIX = "pod-annotation-based-scraping" -@configMapMountPath = "/etc/config/settings/pod-annotation-based-scraping" -@podannotationNamespaceRegex = "" - -# Use parser to parse the configmap toml file to a ruby structure -def parseConfigMap - begin - # Check to see if config map is created - if (File.file?(@configMapMountPath)) - parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true) - return parsedConfig - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "configmap section not mounted, using defaults") - return nil - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while parsing config map: #{errorStr}, using defaults, please check config map for errors") - return nil - end -end - -# Use the ruby structure created after config parsing to set the right values to be used for otel collector settings -def populateSettingValuesFromConfigMap(parsedConfig) - begin - podannotationRegex = parsedConfig[:podannotationnamespaceregex] - # Make backwards compatible - if podannotationRegex.nil? || podannotationRegex.empty? - podannotationRegex = parsedConfig[:podannotationnamepsaceregex] - end - if !podannotationRegex.nil? && podannotationRegex.kind_of?(String) && !podannotationRegex.empty? - if isValidRegex(podannotationRegex) == true - @podannotationNamespaceRegex = podannotationRegex - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap namepace regex for podannotations") - else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Invalid namespace regex for podannotations") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "podannotations namespace regex either not specified or not of type string") - end - end -end - -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "Start Processing") -configMapSettings = parseConfigMap -if !configMapSettings.nil? - populateSettingValuesFromConfigMap(configMapSettings) -elsif (File.file?(@configMapMountPath)) - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Error loading configmap section - using defaults") -end - -# Write the settings to file, so that they can be set as environment variables -file = File.open("/opt/microsoft/configmapparser/config_def_pod_annotation_based_scraping", "w") - -namespaceRegexHash = {} -namespaceRegexHash["POD_ANNOTATION_NAMESPACES_REGEX"] = @podannotationNamespaceRegex - -if !file.nil? - # Close file after writing scrape interval list hash - # Writing it as yaml as it is easy to read and write hash - file.write("export AZMON_PROMETHEUS_POD_ANNOTATION_NAMESPACES_REGEX='#{@podannotationNamespaceRegex}'\n") - file.close -else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while opening file for writing regex config hash") -end -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "End default-targets-namespace-keep-list-regex-settings Processing") diff --git a/otelcollector/configmapparser/tomlparser-prometheus-collector-settings.rb b/otelcollector/configmapparser/tomlparser-prometheus-collector-settings.rb deleted file mode 100644 index 14cc528b4..000000000 --- a/otelcollector/configmapparser/tomlparser-prometheus-collector-settings.rb +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -require "tomlrb" -require_relative "ConfigParseErrorLogger" - -LOGGING_PREFIX = "config" - -@configMapMountPath = "/etc/config/settings/prometheus-collector-settings" -@configVersion = "" -@configSchemaVersion = "" - -# Setting default values which will be used in case they are not set in the configmap or if configmap doesnt exist -@defaultMetricAccountName = "NONE" - -@clusterAlias = "" # user provided alias (thru config map or chart param) -@clusterLabel = "" # value of the 'cluster' label in every time series scraped -@isOperatorEnabled = "" -@isOperatorEnabledChartSetting = "" - -# Use parser to parse the configmap toml file to a ruby structure -def parseConfigMap - begin - # Check to see if config map is created - if (File.file?(@configMapMountPath)) - parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true) - return parsedConfig - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "configmapprometheus-collector-configmap for prometheus collector settings not mounted, using defaults") - return nil - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while parsing config map for prometheus collector settings: #{errorStr}, using defaults, please check config map for errors") - return nil - end -end - -# Use the ruby structure created after config parsing to set the right values to be used for otel collector settings -def populateSettingValuesFromConfigMap(parsedConfig) - # Get if otel collector prometheus scraping is enabled - begin - if !parsedConfig.nil? && !parsedConfig[:default_metric_account_name].nil? - @defaultMetricAccountName = parsedConfig[:default_metric_account_name] - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap setting for default metric account name: #{@defaultMetricAccountName}") - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while reading config map settings for prometheus collector settings- #{errorStr}, using defaults, please check config map for errors") - end - - begin - if !parsedConfig.nil? && !parsedConfig[:cluster_alias].nil? - @clusterAlias = parsedConfig[:cluster_alias].strip - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Got configmap setting for cluster_alias:#{@clusterAlias}") - @clusterAlias = @clusterAlias.gsub(/[^0-9a-z]/i, "_") #replace all non alpha-numeric characters with "_" -- this is to ensure that all down stream places where this is used (like collector, telegraf config etc are keeping up with sanity) - ConfigParseErrorLogger.log(LOGGING_PREFIX, "After g-subing configmap setting for cluster_alias:#{@clusterAlias}") - end - rescue => errorStr - @clusterAlias = "" - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while reading config map settings for cluster_alias in prometheus collector settings- #{errorStr}, using defaults, please check config map for errors") - end - - # Safeguard to fall back to non operator model, enable to set to true or false only when toggle is enabled - if !ENV["AZMON_OPERATOR_ENABLED"].nil? && ENV["AZMON_OPERATOR_ENABLED"].downcase == "true" - begin - @isOperatorEnabledChartSetting = "true" - if !parsedConfig.nil? && !parsedConfig[:operator_enabled].nil? - @isOperatorEnabled = parsedConfig[:operator_enabled] - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Configmap setting enabling operator: #{@isOperatorEnabled}") - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while reading config map settings for prometheus collector settings- #{errorStr}, using defaults, please check config map for errors") - end - else - @isOperatorEnabledChartSetting = "false" - end -end - -@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"] -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "Start prometheus-collector-settings Processing") -if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version, so hardcoding it - configMapSettings = parseConfigMap - if !configMapSettings.nil? - populateSettingValuesFromConfigMap(configMapSettings) - end -else - if (File.file?(@configMapMountPath)) - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version") - end -end - -# get clustername from cluster's full ARM resourceid (to be used for mac mode as 'cluster' label) -begin - if !ENV["MAC"].nil? && !ENV["MAC"].empty? && ENV["MAC"].strip.downcase == "true" - resourceArray = ENV["CLUSTER"].strip.split("/") - @clusterLabel = resourceArray[resourceArray.length - 1] - else - @clusterLabel = ENV["CLUSTER"] - end -rescue => errorStr - @clusterLabel = ENV["CLUSTER"] - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while parsing to determine cluster label from full cluster resource id in prometheus collector settings- #{errorStr}, using default as full CLUSTER passed-in '#{@clusterLabel}'") -end - -#override cluster label with cluster alias, if alias is specified - -if !@clusterAlias.nil? && !@clusterAlias.empty? && @clusterAlias.length > 0 - @clusterLabel = @clusterAlias - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using clusterLabel from cluster_alias:#{@clusterAlias}") -end - -ConfigParseErrorLogger.log(LOGGING_PREFIX, "AZMON_CLUSTER_ALIAS:'#{@clusterAlias}'") -ConfigParseErrorLogger.log(LOGGING_PREFIX, "AZMON_CLUSTER_LABEL:#{@clusterLabel}") - -# Write the settings to file, so that they can be set as environment variables -file = File.open("/opt/microsoft/configmapparser/config_prometheus_collector_settings_env_var", "w") - -if !file.nil? - if !ENV["OS_TYPE"].nil? && ENV["OS_TYPE"].downcase == "linux" - file.write("export AZMON_DEFAULT_METRIC_ACCOUNT_NAME=#{@defaultMetricAccountName}\n") - file.write("export AZMON_CLUSTER_LABEL=#{@clusterLabel}\n") #used for cluster label value when scraping - file.write("export AZMON_CLUSTER_ALIAS=#{@clusterAlias}\n") #used only for telemetry - file.write("export AZMON_OPERATOR_ENABLED_CHART_SETTING=#{@isOperatorEnabledChartSetting}\n") - if !@isOperatorEnabled.nil? && !@isOperatorEnabled.empty? && @isOperatorEnabled.length > 0 - file.write("export AZMON_OPERATOR_ENABLED=#{@isOperatorEnabled}\n") - file.write("export AZMON_OPERATOR_ENABLED_CFG_MAP_SETTING=#{@isOperatorEnabled}\n") - end - else - file.write("AZMON_DEFAULT_METRIC_ACCOUNT_NAME=#{@defaultMetricAccountName}\n") - file.write("AZMON_CLUSTER_LABEL=#{@clusterLabel}\n") #used for cluster label value when scraping - file.write("AZMON_CLUSTER_ALIAS=#{@clusterAlias}\n") #used only for telemetry - end - - file.close -else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while opening file for writing prometheus-collector config environment variables") -end -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "End prometheus-collector-settings Processing") diff --git a/otelcollector/configmapparser/tomlparser-scrape-interval.rb b/otelcollector/configmapparser/tomlparser-scrape-interval.rb deleted file mode 100644 index d7cbbce3d..000000000 --- a/otelcollector/configmapparser/tomlparser-scrape-interval.rb +++ /dev/null @@ -1,337 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -require "tomlrb" -require "yaml" -require_relative "ConfigParseErrorLogger" - -LOGGING_PREFIX = "default-scrape-interval-settings" - -# Checking to see if the duration matches the pattern specified in the prometheus config -# Link to documenation with regex pattern -> https://prometheus.io/docs/prometheus/latest/configuration/configuration/#configuration-file -MATCHER = /^((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0)$/ - -@configMapMountPath = "/etc/config/settings/default-targets-scrape-interval-settings" -@configVersion = "" -@configSchemaVersion = "" - -@kubeletScrapeInterval = "30s" -@corednsScrapeInterval = "30s" -@cadvisorScrapeInterval = "30s" -@kubeproxyScrapeInterval = "30s" -@apiserverScrapeInterval = "30s" -@kubestateScrapeInterval = "30s" -@nodeexporterScrapeInterval = "30s" -@windowsexporterScrapeInterval = "30s" -@windowskubeproxyScrapeInterval = "30s" -@prometheusCollectorHealthInterval = "30s" -@podannotationScrapeInterval = "30s" -@kappiebasicScrapeInterval = "30s" -@networkobservabilityRetinaScrapeInterval = "30s" -@networkobservabilityHubbleScrapeInterval = "30s" -@networkobservabilityCiliumScrapeInterval = "30s" - -# Use parser to parse the configmap toml file to a ruby structure -def parseConfigMap - begin - # Check to see if config map is created - if (File.file?(@configMapMountPath)) - parsedConfig = Tomlrb.load_file(@configMapMountPath, symbolize_keys: true) - return parsedConfig - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "configmap prometheus-collector-configmap for default-targets-scrape-interval-settings not mounted, using defaults") - return nil - end - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while parsing config map for default-targets-scrape-interval-settings: #{errorStr}, using defaults, please check config map for errors") - return nil - end -end - -# Use the ruby structure created after config parsing to set the right values to be used for otel collector settings -def populateSettingValuesFromConfigMap(parsedConfig) - begin - kubeletScrapeInterval = parsedConfig[:kubelet] - if !kubeletScrapeInterval.nil? - matched = MATCHER.match(kubeletScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - kubeletScrapeInterval = "30s" - @kubeletScrapeInterval = kubeletScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s") - else - @kubeletScrapeInterval = kubeletScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for kubeletScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "kubeletScrapeInterval override not specified in configmap") - end - - corednsScrapeInterval = parsedConfig[:coredns] - if !corednsScrapeInterval.nil? - matched = MATCHER.match(corednsScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - corednsScrapeInterval = "30s" - @corednsScrapeInterval = corednsScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s") - else - @corednsScrapeInterval = corednsScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for corednsScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "corednsScrapeInterval override not specified in configmap") - end - - cadvisorScrapeInterval = parsedConfig[:cadvisor] - if !cadvisorScrapeInterval.nil? - matched = MATCHER.match(cadvisorScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - cadvisorScrapeInterval = "30s" - @cadvisorScrapeInterval = cadvisorScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s") - else - @cadvisorScrapeInterval = cadvisorScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for cadvisorScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "cadvisorScrapeInterval override not specified in configmap") - end - - kubeproxyScrapeInterval = parsedConfig[:kubeproxy] - if !kubeproxyScrapeInterval.nil? - matched = MATCHER.match(kubeproxyScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - kubeproxyScrapeInterval = "30s" - @kubeproxyScrapeInterval = kubeproxyScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s") - else - @kubeproxyScrapeInterval = kubeproxyScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for kubeproxyScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "kubeproxyScrapeInterval override not specified in configmap") - end - - apiserverScrapeInterval = parsedConfig[:apiserver] - if !apiserverScrapeInterval.nil? - matched = MATCHER.match(apiserverScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - apiserverScrapeInterval = "30s" - @apiserverScrapeInterval = apiserverScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s") - else - @apiserverScrapeInterval = apiserverScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for apiserverScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "apiserverScrapeInterval override not specified in configmap") - end - - kubestateScrapeInterval = parsedConfig[:kubestate] - if !kubestateScrapeInterval.nil? - matched = MATCHER.match(kubestateScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - kubestateScrapeInterval = "30s" - @kubestateScrapeInterval = kubestateScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s") - else - @kubestateScrapeInterval = kubestateScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for kubestateScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "kubestateScrapeInterval override not specified in configmap") - end - - nodeexporterScrapeInterval = parsedConfig[:nodeexporter] - if !nodeexporterScrapeInterval.nil? - matched = MATCHER.match(nodeexporterScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - nodeexporterScrapeInterval = "30s" - @nodeexporterScrapeInterval = nodeexporterScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s") - else - @nodeexporterScrapeInterval = nodeexporterScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for nodeexporterScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "nodeexporterScrapeInterval override not specified in configmap") - end - - windowsexporterScrapeInterval = parsedConfig[:windowsexporter] - if !windowsexporterScrapeInterval.nil? - matched = MATCHER.match(windowsexporterScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - windowsexporterScrapeInterval = "30s" - @windowsexporterScrapeInterval = windowsexporterScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s") - else - @windowsexporterScrapeInterval = windowsexporterScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for windowsexporterScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "windowsexporterScrapeInterval override not specified in configmap") - end - - windowskubeproxyScrapeInterval = parsedConfig[:windowskubeproxy] - if !windowskubeproxyScrapeInterval.nil? - matched = MATCHER.match(windowskubeproxyScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - windowskubeproxyScrapeInterval = "30s" - @windowskubeproxyScrapeInterval = windowskubeproxyScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s") - else - @windowskubeproxyScrapeInterval = windowskubeproxyScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for windowskubeproxyScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "windowskubeproxyScrapeInterval override not specified in configmap") - end - - kappiebasicScrapeInterval = parsedConfig[:kappiebasic] - if !kappiebasicScrapeInterval.nil? - matched = MATCHER.match(kappiebasicScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - kappiebasicScrapeInterval = "30s" - @kappiebasicScrapeInterval = kappiebasicScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s for kappie") - else - @kappiebasicScrapeInterval = kappiebasicScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for kappiebasicScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "kappiebasicScrapeInterval override not specified in configmap") - end - - networkobservabilityRetinaScrapeInterval = parsedConfig[:networkobservabilityRetina] - if !networkobservabilityRetinaScrapeInterval.nil? - matched = MATCHER.match(networkobservabilityRetinaScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - networkobservabilityRetinaScrapeInterval = "30s" - @networkobservabilityRetinaScrapeInterval = networkobservabilityRetinaScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s for networkobservabilityRetina") - else - @networkobservabilityRetinaScrapeInterval = networkobservabilityRetinaScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for networkobservabilityRetinaScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "networkobservabilityRetinaScrapeInterval override not specified in configmap") - end - - networkobservabilityHubbleScrapeInterval = parsedConfig[:networkobservabilityHubble] - if !networkobservabilityHubbleScrapeInterval.nil? - matched = MATCHER.match(networkobservabilityHubbleScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - networkobservabilityHubbleScrapeInterval = "30s" - @networkobservabilityHubbleScrapeInterval = networkobservabilityHubbleScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s for networkobservabilityRetina") - else - @networkobservabilityHubbleScrapeInterval = networkobservabilityHubbleScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for networkobservabilityHubbleScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "networkobservabilityHubbleScrapeInterval override not specified in configmap") - end - - networkobservabilityCiliumScrapeInterval = parsedConfig[:networkobservabilityCilium] - if !networkobservabilityCiliumScrapeInterval.nil? - matched = MATCHER.match(networkobservabilityCiliumScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - networkobservabilityCiliumScrapeInterval = "30s" - @networkobservabilityCiliumScrapeInterval = networkobservabilityCiliumScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s for networkobservabilityCilium") - else - @networkobservabilityCiliumScrapeInterval = networkobservabilityCiliumScrapeInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for networkobservabilityCiliumScrapeInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "networkobservabilityCiliumScrapeInterval override not specified in configmap") - end - - prometheusCollectorHealthInterval = parsedConfig[:prometheuscollectorhealth] - if !prometheusCollectorHealthInterval.nil? - matched = MATCHER.match(prometheusCollectorHealthInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - prometheusCollectorHealthInterval = "30s" - @prometheusCollectorHealthInterval = prometheusCollectorHealthInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s") - else - @prometheusCollectorHealthInterval = prometheusCollectorHealthInterval - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for prometheusCollectorHealthInterval") - end - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "prometheusCollectorHealthInterval override not specified in configmap") - end - - podannotationScrapeInterval = parsedConfig[:podannotations] - if !podannotationScrapeInterval.nil? - matched = MATCHER.match(podannotationScrapeInterval) - if !matched - # set default scrape interval to 30s if its not in the proper format - podannotationScrapeInterval = "30s" - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Incorrect regex pattern for duration, set default scrape interval to 30s") - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "Using configmap scrape settings for podannotationScrapeInterval") - end - @podannotationScrapeInterval = podannotationScrapeInterval - else - ConfigParseErrorLogger.log(LOGGING_PREFIX, "podannotationScrapeInterval override not specified in configmap") - end - end -end - -@configSchemaVersion = ENV["AZMON_AGENT_CFG_SCHEMA_VERSION"] -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "Start default-targets-scrape-interval-settings Processing") -if !@configSchemaVersion.nil? && !@configSchemaVersion.empty? && @configSchemaVersion.strip.casecmp("v1") == 0 #note v1 is the only supported schema version, so hardcoding it - configMapSettings = parseConfigMap - if !configMapSettings.nil? - populateSettingValuesFromConfigMap(configMapSettings) - end -else - if (File.file?(@configMapMountPath)) - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Unsupported/missing config schema version - '#{@configSchemaVersion}' , using defaults, please use supported schema version") - end -end - -# Write the settings to file, so that they can be set as environment variables -file = File.open("/opt/microsoft/configmapparser/config_def_targets_scrape_intervals_hash", "w") - -intervalHash = {} -intervalHash["KUBELET_SCRAPE_INTERVAL"] = @kubeletScrapeInterval -intervalHash["COREDNS_SCRAPE_INTERVAL"] = @corednsScrapeInterval -intervalHash["CADVISOR_SCRAPE_INTERVAL"] = @cadvisorScrapeInterval -intervalHash["KUBEPROXY_SCRAPE_INTERVAL"] = @kubeproxyScrapeInterval -intervalHash["APISERVER_SCRAPE_INTERVAL"] = @apiserverScrapeInterval -intervalHash["KUBESTATE_SCRAPE_INTERVAL"] = @kubestateScrapeInterval -intervalHash["NODEEXPORTER_SCRAPE_INTERVAL"] = @nodeexporterScrapeInterval -intervalHash["WINDOWSEXPORTER_SCRAPE_INTERVAL"] = @windowsexporterScrapeInterval -intervalHash["WINDOWSKUBEPROXY_SCRAPE_INTERVAL"] = @windowskubeproxyScrapeInterval -intervalHash["PROMETHEUS_COLLECTOR_HEALTH_SCRAPE_INTERVAL"] = @prometheusCollectorHealthInterval -intervalHash["POD_ANNOTATION_SCRAPE_INTERVAL"] = @podannotationScrapeInterval -intervalHash["KAPPIEBASIC_SCRAPE_INTERVAL"] = @kappiebasicScrapeInterval -intervalHash["NETWORKOBSERVABILITYRETINA_SCRAPE_INTERVAL"] = @networkobservabilityRetinaScrapeInterval -intervalHash["NETWORKOBSERVABILITYHUBBLE_SCRAPE_INTERVAL"] = @networkobservabilityHubbleScrapeInterval -intervalHash["NETWORKOBSERVABILITYCILIUM_SCRAPE_INTERVAL"] = @networkobservabilityCiliumScrapeInterval - - -if !file.nil? - # Close file after writing scrape interval list hash - # Writing it as yaml as it is easy to read and write hash - file.write(intervalHash.to_yaml) - file.close -else - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while opening file for writing default-targets-scrape-interval-settings regex config hash") -end -ConfigParseErrorLogger.logSection(LOGGING_PREFIX, "End default-targets-scrape-interval-settings Processing") diff --git a/otelcollector/configmapparser/tomlparser-utils.rb b/otelcollector/configmapparser/tomlparser-utils.rb deleted file mode 100644 index 5ff3548f8..000000000 --- a/otelcollector/configmapparser/tomlparser-utils.rb +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/local/bin/ruby -# frozen_string_literal: true - -require_relative "ConfigParseErrorLogger" - -if (!ENV["OS_TYPE"].nil? && ENV["OS_TYPE"].downcase == "linux") - require "re2" -end - -# RE2 is not supported for windows -def isValidRegex_linux(str) - begin - # invalid regex example -> 'sel/\\' - re2Regex = RE2::Regexp.new(str) - return re2Regex.ok? - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while validating regex for target metric keep list - #{errorStr}, regular expression str - #{str}") - return false - end -end - -def isValidRegex_windows(str) - begin - # invalid regex example -> 'sel/\\' - re2Regex = Regexp.new(str) - return true - rescue => errorStr - ConfigParseErrorLogger.logError(LOGGING_PREFIX, "Exception while validating regex for target metric keep list - #{errorStr}, regular expression str - #{str}") - return false - end -end - -def isValidRegex(str) - if ENV["OS_TYPE"] == "linux" - return isValidRegex_linux(str) - else - return isValidRegex_windows(str) - end -end \ No newline at end of file diff --git a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/Chart-template.yaml b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/Chart-template.yaml index 0a247406a..e1427b28d 100644 --- a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/Chart-template.yaml +++ b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/Chart-template.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: ${IMAGE_TAG} +version: ${IMAGE_TAG} # This is the version number of the application being deployed (basically, imagetag for the image built/compatible with this chart semver above). This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/local_testing_aks.ps1 b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/local_testing_aks.ps1 index bac24d734..0165921a7 100644 --- a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/local_testing_aks.ps1 +++ b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/local_testing_aks.ps1 @@ -6,9 +6,9 @@ # Chart-template.yaml file. # Define variables -$ImageTag = "6.8.14-kaveesh-q-07-19-2024-e7626ca2" -$AKSRegion = "westeurope" -$AKSResourceId = "/subscriptions/ce4d1293-71c0-4c72-bc55-133553ee9e50/resourceGroups/kaveeshtest/providers/Microsoft.ContainerService/managedClusters/kaveeshtest" +$ImageTag = "6.9.0-kaveesh-golang-windows-09-25-2024-5d45d385" +$AKSRegion = "northeurope" +$AKSResourceId = "/subscriptions/ce4d1293-71c0-4c72-bc55-133553ee9e50/resourceGroups/kaveeshwin/providers/Microsoft.ContainerService/managedClusters/kaveeshwin" # Read files $chartTemplatePath = ".\Chart-template.yaml" diff --git a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-daemonset.yaml b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-daemonset.yaml index bfb751ae2..2ea59fb8d 100644 --- a/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-daemonset.yaml +++ b/otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates/ama-metrics-daemonset.yaml @@ -438,13 +438,11 @@ spec: readOnly: true mountPath: /var/log/pods livenessProbe: - exec: - command: - - cmd - - /c - - C:\opt\microsoft\scripts\livenessprobe.cmd - periodSeconds: 60 + httpGet: + path: /health + port: 8080 initialDelaySeconds: 300 + periodSeconds: 60 timeoutSeconds: 60 failureThreshold: 3 - name: addon-token-adapter-win diff --git a/otelcollector/go.sum b/otelcollector/go.sum index 158c536db..052a81018 100644 --- a/otelcollector/go.sum +++ b/otelcollector/go.sum @@ -1,11 +1,31 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg= +github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= +github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw= +golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= diff --git a/otelcollector/main/main.go b/otelcollector/main/main.go index 6c6daee73..c4c8d817d 100644 --- a/otelcollector/main/main.go +++ b/otelcollector/main/main.go @@ -22,10 +22,20 @@ func main() { clusterOverride := shared.GetEnv("CLUSTER_OVERRIDE", "") aksRegion := shared.GetEnv("AKSREGION", "") ccpMetricsEnabled := shared.GetEnv("CCP_METRICS_ENABLED", "false") + osType := os.Getenv("OS_TYPE") - outputFile := "/opt/inotifyoutput.txt" - if err := shared.Inotify(outputFile, "/etc/config/settings", "/etc/prometheus/certs"); err != nil { - log.Fatal(err) + if osType == "windows" { + shared.SetEnvVariablesForWindows() + } + + if osType == "linux" { + outputFile := "/opt/inotifyoutput.txt" + if err := shared.Inotify(outputFile, "/etc/config/settings", "/etc/prometheus/certs"); err != nil { + log.Fatal(err) + } + } else if osType == "windows" { + fmt.Println("Starting filesystemwatcher.ps1") + shared.StartCommand("powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", "C:\\opt\\scripts\\filesystemwatcher.ps1") } if ccpMetricsEnabled != "true" { @@ -53,7 +63,7 @@ func main() { configmapsettings.Configmapparser() } - if ccpMetricsEnabled != "true" { + if ccpMetricsEnabled != "true" && osType == "linux" { shared.StartCronDaemon() } @@ -67,8 +77,8 @@ func main() { shared.WaitForTokenAdapter(ccpMetricsEnabled) if ccpMetricsEnabled != "true" { - shared.SetEnvAndSourceBashrc("ME_CONFIG_FILE", meConfigFile, true) - shared.SetEnvAndSourceBashrc("customResourceId", cluster, true) + shared.SetEnvAndSourceBashrcOrPowershell("ME_CONFIG_FILE", meConfigFile, true) + shared.SetEnvAndSourceBashrcOrPowershell("customResourceId", cluster, true) } else { os.Setenv("ME_CONFIG_FILE", meConfigFile) os.Setenv("customResourceId", cluster) @@ -76,7 +86,7 @@ func main() { trimmedRegion := strings.ToLower(strings.ReplaceAll(aksRegion, " ", "")) if ccpMetricsEnabled != "true" { - shared.SetEnvAndSourceBashrc("customRegion", trimmedRegion, true) + shared.SetEnvAndSourceBashrcOrPowershell("customRegion", trimmedRegion, true) } else { os.Setenv("customRegion", trimmedRegion) } @@ -84,15 +94,22 @@ func main() { fmt.Println("Waiting for 10s for token adapter sidecar to be up and running so that it can start serving IMDS requests") time.Sleep(10 * time.Second) - fmt.Println("Starting MDSD") if ccpMetricsEnabled != "true" { - shared.StartMdsdForOverlay() + if osType == "linux" { + fmt.Println("Starting MDSD") + shared.StartMdsdForOverlay() + } else { + fmt.Println("Starting MA") + shared.StartMA() + } } else { shared.StartMdsdForUnderlay() } - // update this to use color coding - shared.PrintMdsdVersion() + if osType == "linux" { + // update this to use color coding + shared.PrintMdsdVersion() + } fmt.Println("Waiting for 30s for MDSD to get the config and put them in place for ME") time.Sleep(30 * time.Second) @@ -157,48 +174,53 @@ func main() { // fmt.Printf("Error modifying config file: %v\n", err) // } - shared.LogVersionInfo() + if osType == "linux" { + shared.LogVersionInfo() + } if ccpMetricsEnabled != "true" { shared.StartFluentBit(fluentBitConfigFile) - // Run the command and capture the output - cmd := exec.Command("fluent-bit", "--version") - fluentBitVersion, err := cmd.Output() - if err != nil { - log.Fatalf("failed to run command: %v", err) + if osType == "linux" { + cmd := exec.Command("fluent-bit", "--version") + fluentBitVersion, err := cmd.Output() + if err != nil { + log.Fatalf("failed to run command: %v", err) + } + shared.EchoVar("FLUENT_BIT_VERSION", string(fluentBitVersion)) } - shared.EchoVar("FLUENT_BIT_VERSION", string(fluentBitVersion)) shared.StartTelegraf() } - // Start inotify to watch for changes - fmt.Println("Starting inotify for watching mdsd config update") + if osType == "linux" { + // Start inotify to watch for changes + fmt.Println("Starting inotify for watching mdsd config update") - // Create an output file for inotify events - outputFile = "/opt/inotifyoutput-mdsd-config.txt" - _, err = os.Create(outputFile) - if err != nil { - log.Fatalf("Error creating output file: %v\n", err) - } + // Create an output file for inotify events + outputFile := "/opt/inotifyoutput-mdsd-config.txt" + _, err = os.Create(outputFile) + if err != nil { + log.Fatalf("Error creating output file: %v\n", err) + } - // Define the command to start inotify - inotifyCommand := exec.Command( - "inotifywait", - "/etc/mdsd.d/config-cache/metricsextension/TokenConfig.json", - "--daemon", - "--outfile", outputFile, - "--event", "ATTRIB", - "--format", "%e : %T", - "--timefmt", "+%s", - ) - - // Start the inotify process - err = inotifyCommand.Start() - if err != nil { - log.Fatalf("Error starting inotify process: %v\n", err) + // Define the command to start inotify + inotifyCommand := exec.Command( + "inotifywait", + "/etc/mdsd.d/config-cache/metricsextension/TokenConfig.json", + "--daemon", + "--outfile", outputFile, + "--event", "ATTRIB", + "--format", "%e : %T", + "--timefmt", "+%s", + ) + + // Start the inotify process + err = inotifyCommand.Start() + if err != nil { + log.Fatalf("Error starting inotify process: %v\n", err) + } } // Setting time at which the container started running @@ -229,11 +251,18 @@ func main() { } func healthHandler(w http.ResponseWriter, r *http.Request) { + osType := os.Getenv("OS_TYPE") status := http.StatusOK message := "prometheuscollector is running." + processToCheck := "" + + tokenConfigFileLocation := "/etc/mdsd.d/config-cache/metricsextension/TokenConfig.json" + if osType == "windows" { + tokenConfigFileLocation = "C:\\opt\\genevamonitoringagent\\datadirectory\\mcs\\metricsextension\\TokenConfig.json" + } // Checking if TokenConfig file exists - if _, err := os.Stat("/etc/mdsd.d/config-cache/metricsextension/TokenConfig.json"); os.IsNotExist(err) { + if _, err := os.Stat(tokenConfigFileLocation); os.IsNotExist(err) { fmt.Println("TokenConfig.json does not exist") if _, err := os.Stat("/opt/microsoft/liveness/azmon-container-start-time"); err == nil { fmt.Println("azmon-container-start-time file exists, reading start time") @@ -272,38 +301,57 @@ func healthHandler(w http.ResponseWriter, r *http.Request) { fmt.Println("azmon-container-start-time file does not exist") } } else { - if !shared.IsProcessRunning("/usr/sbin/MetricsExtension") { + processToCheck = "/usr/sbin/MetricsExtension" + if osType == "windows" { + processToCheck = "MetricsExtension.Native.exe" + } + if !shared.IsProcessRunning(processToCheck) { status = http.StatusServiceUnavailable message = "Metrics Extension is not running (configuration exists)" fmt.Println(message) goto response } - if !shared.IsProcessRunning("/usr/sbin/mdsd") { + processToCheck = "/usr/sbin/mdsd" + if osType == "windows" { + processToCheck = "MonAgentLauncher.exe" + } + if !shared.IsProcessRunning(processToCheck) { status = http.StatusServiceUnavailable message = "mdsd not running (configuration exists)" fmt.Println(message) goto response } } - - if shared.HasConfigChanged("/opt/inotifyoutput-mdsd-config.txt") { - status = http.StatusServiceUnavailable - message = "inotifyoutput-mdsd-config.txt has been updated - mdsd config changed" - fmt.Println(message) - goto response + if osType == "linux" { + if shared.HasConfigChanged("/opt/inotifyoutput-mdsd-config.txt") { + status = http.StatusServiceUnavailable + message = "inotifyoutput-mdsd-config.txt has been updated - mdsd config changed" + fmt.Println(message) + goto response + } + if shared.HasConfigChanged("/opt/inotifyoutput.txt") { + status = http.StatusServiceUnavailable + message = "inotifyoutput.txt has been updated - config changed" + fmt.Println(message) + goto response + } + } else { + if shared.HasConfigChanged("C:\\opt\\microsoft\\scripts\\filesystemwatcher.txt") { + status = http.StatusServiceUnavailable + message = "Config Map Updated or DCR/DCE updated since agent started" + fmt.Println(message) + goto response + } } - if !shared.IsProcessRunning("/opt/microsoft/otelcollector/otelcollector") { - status = http.StatusServiceUnavailable - message = "OpenTelemetryCollector is not running." - fmt.Println(message) - goto response + processToCheck = "/opt/microsoft/otelcollector/otelcollector" + if osType == "windows" { + processToCheck = "otelcollector.exe" } - - if shared.HasConfigChanged("/opt/inotifyoutput.txt") { + if !shared.IsProcessRunning(processToCheck) { status = http.StatusServiceUnavailable - message = "inotifyoutput.txt has been updated - config changed" + message = "OpenTelemetryCollector is not running." fmt.Println(message) goto response } diff --git a/otelcollector/opentelemetry-collector-builder/makefile_windows.ps1 b/otelcollector/opentelemetry-collector-builder/makefile_windows.ps1 index 37c10b9bb..9032781c9 100644 --- a/otelcollector/opentelemetry-collector-builder/makefile_windows.ps1 +++ b/otelcollector/opentelemetry-collector-builder/makefile_windows.ps1 @@ -36,4 +36,39 @@ Set-Location prom-config-validator-builder Set-Location .. Set-Location opentelemetry-collector-builder -Write-Output "FINISHED building promconfigvalidator" \ No newline at end of file +Write-Output "FINISHED building promconfigvalidator" + +Set-Location .. +Set-Location main + +# Create directories +New-Item -Path "./shared/configmap/mp/" -ItemType Directory -Force +New-Item -Path "./shared/configmap/ccp/" -ItemType Directory -Force +# New-Item -Path "./main/" -ItemType Directory -Force + +# Copy shared Go files +Copy-Item -Path "../shared/*.go" -Destination "./shared/" +Copy-Item -Path "../shared/go.mod" -Destination "./shared/" +Copy-Item -Path "../shared/go.sum" -Destination "./shared/" +Copy-Item -Path "../shared/configmap/mp/*.go" -Destination "./shared/configmap/mp/" +Copy-Item -Path "../shared/configmap/mp/go.mod" -Destination "./shared/configmap/mp/" +Copy-Item -Path "../shared/configmap/mp/go.sum" -Destination "./shared/configmap/mp/" +Copy-Item -Path "../shared/configmap/ccp/*.go" -Destination "./shared/configmap/ccp/" +Copy-Item -Path "../shared/configmap/ccp/go.mod" -Destination "./shared/configmap/ccp/" +Copy-Item -Path "../shared/configmap/ccp/go.sum" -Destination "./shared/configmap/ccp/" + +# # Copy main Go files +# Copy-Item -Path "./main/*.go" -Destination "./main/" +# Copy-Item -Path "./go.mod" -Destination "./main/" +# Copy-Item -Path "./go.sum" -Destination "./main/" +# Set-Location main + +go version +go mod tidy +go build -buildmode=pie -o "main.exe" "./main.go" + +Write-Output "Build main executable completed" + +Set-Location .. +Set-Location opentelemetry-collector-builder + diff --git a/otelcollector/shared/configmap/ccp/configmapparserforccp.go b/otelcollector/shared/configmap/ccp/configmapparserforccp.go index d60eac601..4eae827e4 100644 --- a/otelcollector/shared/configmap/ccp/configmapparserforccp.go +++ b/otelcollector/shared/configmap/ccp/configmapparserforccp.go @@ -25,7 +25,7 @@ func Configmapparserforccp() { configVersion = configVersion[:10] } // Set the environment variable - shared.SetEnvAndSourceBashrc("AZMON_AGENT_CFG_FILE_VERSION", configVersion, true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_AGENT_CFG_FILE_VERSION", configVersion, true) } // Set agent config file version @@ -41,7 +41,7 @@ func Configmapparserforccp() { configSchemaVersion = configSchemaVersion[:10] } // Set the environment variable - shared.SetEnvAndSourceBashrc("AZMON_AGENT_CFG_SCHEMA_VERSION", configSchemaVersion, true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_AGENT_CFG_SCHEMA_VERSION", configSchemaVersion, true) } // Parse the configmap to set the right environment variables for prometheus collector settings @@ -65,11 +65,11 @@ func Configmapparserforccp() { prometheusCcpConfigMerger() - shared.SetEnvAndSourceBashrc("AZMON_INVALID_CUSTOM_PROMETHEUS_CONFIG", "false", true) - shared.SetEnvAndSourceBashrc("CONFIG_VALIDATOR_RUNNING_IN_AGENT", "true", true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_INVALID_CUSTOM_PROMETHEUS_CONFIG", "false", true) + shared.SetEnvAndSourceBashrcOrPowershell("CONFIG_VALIDATOR_RUNNING_IN_AGENT", "true", true) // No need to merge custom prometheus config, only merging in the default configs - shared.SetEnvAndSourceBashrc("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", true) shared.StartCommandAndWait("/opt/promconfigvalidator", "--config", "/opt/defaultsMergedConfig.yml", "--output", "/opt/ccp-collector-config-with-defaults.yml", "--otelTemplate", "/opt/microsoft/otelcollector/ccp-collector-config-template.yml") if !shared.Exists("/opt/ccp-collector-config-with-defaults.yml") { fmt.Printf("prom-config-validator::Prometheus default scrape config validation failed. No scrape configs will be used") diff --git a/otelcollector/shared/configmap/mp/configmapparser.go b/otelcollector/shared/configmap/mp/configmapparser.go index b66d5cd84..fea3485c2 100644 --- a/otelcollector/shared/configmap/mp/configmapparser.go +++ b/otelcollector/shared/configmap/mp/configmapparser.go @@ -18,13 +18,13 @@ const ( func setConfigSchemaVersionEnv() { fileInfo, err := os.Stat(schemaVersionFile) if err != nil || fileInfo.Size() == 0 { - shared.SetEnvAndSourceBashrc("AZMON_AGENT_CFG_SCHEMA_VERSION", defaultConfigSchemaVersion, true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_AGENT_CFG_SCHEMA_VERSION", defaultConfigSchemaVersion, true) return } content, err := os.ReadFile(schemaVersionFile) if err != nil { shared.EchoError("Error reading schema version file:" + err.Error()) - shared.SetEnvAndSourceBashrc("AZMON_AGENT_CFG_SCHEMA_VERSION", defaultConfigSchemaVersion, true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_AGENT_CFG_SCHEMA_VERSION", defaultConfigSchemaVersion, true) return } trimmedContent := strings.TrimSpace(string(content)) @@ -32,19 +32,19 @@ func setConfigSchemaVersionEnv() { if len(configSchemaVersion) > 10 { configSchemaVersion = configSchemaVersion[:10] } - shared.SetEnvAndSourceBashrc("AZMON_AGENT_CFG_SCHEMA_VERSION", configSchemaVersion, true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_AGENT_CFG_SCHEMA_VERSION", configSchemaVersion, true) } func setConfigFileVersionEnv() { fileInfo, err := os.Stat(configVersionFile) if err != nil || fileInfo.Size() == 0 { - shared.SetEnvAndSourceBashrc("AZMON_AGENT_CFG_FILE_VERSION", defaultConfigFileVersion, true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_AGENT_CFG_FILE_VERSION", defaultConfigFileVersion, true) return } content, err := os.ReadFile(configVersionFile) if err != nil { shared.EchoError("Error reading config version file:" + err.Error()) - shared.SetEnvAndSourceBashrc("AZMON_AGENT_CFG_FILE_VERSION", defaultConfigFileVersion, true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_AGENT_CFG_FILE_VERSION", defaultConfigFileVersion, true) return } trimmedContent := strings.TrimSpace(string(content)) @@ -52,7 +52,7 @@ func setConfigFileVersionEnv() { if len(configFileVersion) > 10 { configFileVersion = configFileVersion[:10] } - shared.SetEnvAndSourceBashrc("AZMON_AGENT_CFG_FILE_VERSION", configFileVersion, true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_AGENT_CFG_FILE_VERSION", configFileVersion, true) } func parseSettingsForPodAnnotations() { @@ -95,9 +95,9 @@ func handlePodAnnotationsFile(filename string) { value := line[index+1:] if key == "AZMON_PROMETHEUS_POD_ANNOTATION_NAMESPACES_REGEX" { - shared.SetEnvAndSourceBashrc(key, value, false) + shared.SetEnvAndSourceBashrcOrPowershell(key, value, false) } else { - shared.SetEnvAndSourceBashrc(key, value, false) + shared.SetEnvAndSourceBashrcOrPowershell(key, value, false) } } @@ -158,8 +158,8 @@ func Configmapparser() { prometheusConfigMerger(false) } - shared.SetEnvAndSourceBashrc("AZMON_INVALID_CUSTOM_PROMETHEUS_CONFIG", "false", true) - shared.SetEnvAndSourceBashrc("CONFIG_VALIDATOR_RUNNING_IN_AGENT", "true", true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_INVALID_CUSTOM_PROMETHEUS_CONFIG", "false", true) + shared.SetEnvAndSourceBashrcOrPowershell("CONFIG_VALIDATOR_RUNNING_IN_AGENT", "true", true) // Running promconfigvalidator if promMergedConfig.yml exists if shared.FileExists("/opt/promMergedConfig.yml") { @@ -172,7 +172,7 @@ func Configmapparser() { if err != nil { fmt.Println("prom-config-validator::Prometheus custom config validation failed. The custom config will not be used") fmt.Printf("Command execution failed: %v\n", err) - shared.SetEnvAndSourceBashrc("AZMON_INVALID_CUSTOM_PROMETHEUS_CONFIG", "true", true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_INVALID_CUSTOM_PROMETHEUS_CONFIG", "true", true) if shared.FileExists(mergedDefaultConfigPath) { fmt.Println("prom-config-validator::Running validator on just default scrape configs") shared.StartCommandAndWait("/opt/promconfigvalidator", "--config", mergedDefaultConfigPath, "--output", "/opt/collector-config-with-defaults.yml", "--otelTemplate", "/opt/microsoft/otelcollector/collector-config-template.yml") @@ -182,7 +182,7 @@ func Configmapparser() { shared.CopyFile("/opt/collector-config-with-defaults.yml", "/opt/microsoft/otelcollector/collector-config-default.yml") } } - shared.SetEnvAndSourceBashrc("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", true) } } } else if _, err := os.Stat(mergedDefaultConfigPath); err == nil { @@ -195,11 +195,11 @@ func Configmapparser() { fmt.Println("prom-config-validator::Prometheus default scrape config validation succeeded, using this as collector config") shared.CopyFile("/opt/collector-config-with-defaults.yml", "/opt/microsoft/otelcollector/collector-config-default.yml") } - shared.SetEnvAndSourceBashrc("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", true) } else { // This else block is needed, when there is no custom config mounted as config map or default configs enabled fmt.Println("prom-config-validator::No custom config via configmap or default scrape configs enabled.") - shared.SetEnvAndSourceBashrc("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", true) + shared.SetEnvAndSourceBashrcOrPowershell("AZMON_USE_DEFAULT_PROMETHEUS_CONFIG", "true", true) } if _, err := os.Stat("/opt/microsoft/prom_config_validator_env_var"); err == nil { diff --git a/otelcollector/shared/file_utilities.go b/otelcollector/shared/file_utilities.go index 899b37ffc..c601fdcb1 100644 --- a/otelcollector/shared/file_utilities.go +++ b/otelcollector/shared/file_utilities.go @@ -127,7 +127,7 @@ func SetEnvVarsFromFile(filename string) error { key := parts[0] value := parts[1] - SetEnvAndSourceBashrc(key, value, false) + SetEnvAndSourceBashrcOrPowershell(key, value, false) } if err := scanner.Err(); err != nil { diff --git a/otelcollector/shared/helpers.go b/otelcollector/shared/helpers.go index 99e9c39b2..3984fb270 100644 --- a/otelcollector/shared/helpers.go +++ b/otelcollector/shared/helpers.go @@ -1,12 +1,13 @@ package shared import ( + "fmt" + "log" "os" + "os/exec" "regexp" "strings" - "log" - "os/exec" - "fmt" + "time" ) func GetEnv(key, defaultValue string) string { @@ -53,9 +54,9 @@ func DetermineConfigFiles(controllerType, clusterOverride string) (string, strin default: fluentBitConfigFile = "/opt/fluent-bit/fluent-bit-windows.conf" if clusterOverride == "true" { - meConfigFile = "/usr/sbin/me_ds_internal_win.config" + meConfigFile = "/opt/metricextension/me_ds_internal_win.config" } else { - meConfigFile = "/usr/sbin/me_ds_win.config" + meConfigFile = "/opt/metricextension/me_ds_win.config" } } @@ -92,29 +93,193 @@ func StartTelegraf() { fmt.Println("Starting Telegraf") if telemetryDisabled := os.Getenv("TELEMETRY_DISABLED"); telemetryDisabled != "true" { - controllerType := os.Getenv("CONTROLLER_TYPE") - azmonOperatorEnabled := os.Getenv("AZMON_OPERATOR_ENABLED") - - var telegrafConfig string - - switch { - case controllerType == "ReplicaSet" && azmonOperatorEnabled == "true": - telegrafConfig = "/opt/telegraf/telegraf-prometheus-collector-ta-enabled.conf" - case controllerType == "ReplicaSet": - telegrafConfig = "/opt/telegraf/telegraf-prometheus-collector.conf" - default: - telegrafConfig = "/opt/telegraf/telegraf-prometheus-collector-ds.conf" + if os.Getenv("OS_TYPE") == "linux" { + controllerType := os.Getenv("CONTROLLER_TYPE") + azmonOperatorEnabled := os.Getenv("AZMON_OPERATOR_ENABLED") + + var telegrafConfig string + + switch { + case controllerType == "ReplicaSet" && azmonOperatorEnabled == "true": + telegrafConfig = "/opt/telegraf/telegraf-prometheus-collector-ta-enabled.conf" + case controllerType == "ReplicaSet": + telegrafConfig = "/opt/telegraf/telegraf-prometheus-collector.conf" + default: + telegrafConfig = "/opt/telegraf/telegraf-prometheus-collector-ds.conf" + } + + telegrafCmd := exec.Command("/usr/bin/telegraf", "--config", telegrafConfig) + telegrafCmd.Stdout = os.Stdout + telegrafCmd.Stderr = os.Stderr + if err := telegrafCmd.Start(); err != nil { + fmt.Println("Error starting telegraf:", err) + return + } + + telegrafVersion, _ := os.ReadFile("/opt/telegrafversion.txt") + fmt.Printf("TELEGRAF_VERSION=%s\n", string(telegrafVersion)) + } + } else { + telegrafPath := "C:\\opt\\telegraf\\telegraf.exe" + configPath := "C:\\opt\\telegraf\\telegraf-prometheus-collector-windows.conf" + + // Install Telegraf service + installCmd := exec.Command(telegrafPath, "--service", "install", "--config", configPath) + if err := installCmd.Run(); err != nil { + log.Fatalf("Error installing Telegraf service: %v\n", err) + } + + // Set delayed start if POD_NAME is set + serverName := os.Getenv("POD_NAME") + if serverName != "" { + setDelayCmd := exec.Command("sc.exe", fmt.Sprintf("\\\\%s", serverName), "config", "telegraf", "start= delayed-auto") + if err := setDelayCmd.Run(); err != nil { + log.Printf("Failed to set delayed start for Telegraf: %v\n", err) + } else { + fmt.Println("Successfully set delayed start for Telegraf") + } + } else { + fmt.Println("Failed to get environment variable POD_NAME to set delayed Telegraf start") + } + + // Run Telegraf in test mode + testCmd := exec.Command(telegrafPath, "--config", configPath, "--test") + testCmd.Stdout = os.Stdout + testCmd.Stderr = os.Stderr + if err := testCmd.Run(); err != nil { + log.Printf("Error running Telegraf in test mode: %v\n", err) + } + + // Start Telegraf service + startCmd := exec.Command(telegrafPath, "--service", "start") + if err := startCmd.Run(); err != nil { + log.Printf("Error starting Telegraf service: %v\n", err) + } + + // Check if Telegraf is running, retry if necessary + for { + statusCmd := exec.Command("sc.exe", "query", "telegraf") + output, err := statusCmd.CombinedOutput() + if err != nil { + log.Printf("Error checking Telegraf service status: %v\n", err) + time.Sleep(30 * time.Second) + continue + } + + if string(output) != "" { + fmt.Println("Telegraf is running") + break + } + + fmt.Println("Trying to start Telegraf again in 30 seconds, since it might not have been ready...") + time.Sleep(30 * time.Second) + startCmd := exec.Command(telegrafPath, "--service", "start") + if err := startCmd.Run(); err != nil { + log.Printf("Error starting Telegraf service again: %v\n", err) + } } + } +} + +func SetEnvVariablesForWindows() { + // Set Windows version (Microsoft Windows Server 2019 Datacenter or 2022 Datacenter) + out, err := exec.Command("wmic", "os", "get", "Caption").Output() + if err != nil { + log.Fatalf("Failed to get Windows version: %v", err) + } + windowsVersion := strings.TrimSpace(string(out)) + windowsVersion = strings.Split(windowsVersion, "\n")[1] // Extract version name + + // Set environment variables for process and machine + os.Setenv("windowsVersion", windowsVersion) + SetEnvAndSourceBashrcOrPowershell("windowsVersion", windowsVersion, true) - telegrafCmd := exec.Command("/usr/bin/telegraf", "--config", telegrafConfig) - telegrafCmd.Stdout = os.Stdout - telegrafCmd.Stderr = os.Stderr - if err := telegrafCmd.Start(); err != nil { - fmt.Println("Error starting telegraf:", err) - return + // Resource ID override + mac := os.Getenv("MAC") + cluster := os.Getenv("CLUSTER") + nodeName := os.Getenv("NODE_NAME") + if mac == "" { + if cluster == "" { + fmt.Printf("CLUSTER is empty or not set. Using %s as CLUSTER\n", nodeName) + os.Setenv("customResourceId", nodeName) + SetEnvAndSourceBashrcOrPowershell("customResourceId", nodeName, true) + } else { + os.Setenv("customResourceId", cluster) + SetEnvAndSourceBashrcOrPowershell("customResourceId", cluster, true) } + } else { + SetEnvAndSourceBashrcOrPowershell("customResourceId", cluster, true) + + aksRegion := os.Getenv("AKSREGION") + SetEnvAndSourceBashrcOrPowershell("customRegion", aksRegion, true) + + // Set variables for Telegraf + SetTelegrafVariables(aksRegion, cluster) + } + + // Set monitoring-related variables + SetMonitoringVariables() - telegrafVersion, _ := os.ReadFile("/opt/telegrafversion.txt") - fmt.Printf("TELEGRAF_VERSION=%s\n", string(telegrafVersion)) + // Handle custom environment settings + customEnvironment := strings.ToLower(os.Getenv("customEnvironment")) + mcsEndpoint, mcsGlobalEndpoint := GetMcsEndpoints(customEnvironment) + + // Set MCS endpoint environment variables + SetEnvAndSourceBashrcOrPowershell("MCS_AZURE_RESOURCE_ENDPOINT", mcsEndpoint, true) + SetEnvAndSourceBashrcOrPowershell("MCS_GLOBAL_ENDPOINT", mcsGlobalEndpoint, true) +} + +func SetTelegrafVariables(aksRegion, cluster string) { + SetEnvAndSourceBashrcOrPowershell("AKSREGION", aksRegion, true) + SetEnvAndSourceBashrcOrPowershell("CLUSTER", cluster, true) + azmonClusterAlias := os.Getenv("AZMON_CLUSTER_ALIAS") + SetEnvAndSourceBashrcOrPowershell("AZMON_CLUSTER_ALIAS", azmonClusterAlias, true) +} + +func SetMonitoringVariables() { + SetEnvAndSourceBashrcOrPowershell("MONITORING_ROLE_INSTANCE", "cloudAgentRoleInstanceIdentity", true) + SetEnvAndSourceBashrcOrPowershell("MA_RoleEnvironment_OsType", "Windows", true) + SetEnvAndSourceBashrcOrPowershell("MONITORING_VERSION", "2.0", true) + SetEnvAndSourceBashrcOrPowershell("MONITORING_ROLE", "cloudAgentRoleIdentity", true) + SetEnvAndSourceBashrcOrPowershell("MONITORING_IDENTITY", "use_ip_address", true) + SetEnvAndSourceBashrcOrPowershell("MONITORING_USE_GENEVA_CONFIG_SERVICE", "false", true) + SetEnvAndSourceBashrcOrPowershell("SKIP_IMDS_LOOKUP_FOR_LEGACY_AUTH", "true", true) + SetEnvAndSourceBashrcOrPowershell("ENABLE_MCS", "true", true) + SetEnvAndSourceBashrcOrPowershell("MDSD_USE_LOCAL_PERSISTENCY", "false", true) + SetEnvAndSourceBashrcOrPowershell("MA_RoleEnvironment_Location", os.Getenv("AKSREGION"), true) + SetEnvAndSourceBashrcOrPowershell("MA_RoleEnvironment_ResourceId", os.Getenv("CLUSTER"), true) + SetEnvAndSourceBashrcOrPowershell("MCS_CUSTOM_RESOURCE_ID", os.Getenv("CLUSTER"), true) +} + +func GetMcsEndpoints(customEnvironment string) (string, string) { + var mcsEndpoint, mcsGlobalEndpoint string + + switch customEnvironment { + case "azurepubliccloud": + aksRegion := strings.ToLower(os.Getenv("AKSREGION")) + if aksRegion == "eastus2euap" || aksRegion == "centraluseuap" { + mcsEndpoint = "https://monitor.azure.com/" + mcsGlobalEndpoint = "https://global.handler.canary.control.monitor.azure.com" + } else { + mcsEndpoint = "https://monitor.azure.com/" + mcsGlobalEndpoint = "https://global.handler.control.monitor.azure.com" + } + case "azureusgovernmentcloud": + mcsEndpoint = "https://monitor.azure.us/" + mcsGlobalEndpoint = "https://global.handler.control.monitor.azure.us" + case "azurechinacloud": + mcsEndpoint = "https://monitor.azure.cn/" + mcsGlobalEndpoint = "https://global.handler.control.monitor.azure.cn" + case "usnat": + mcsEndpoint = "https://monitor.azure.eaglex.ic.gov/" + mcsGlobalEndpoint = "https://global.handler.control.monitor.azure.eaglex.ic.gov" + case "ussec": + mcsEndpoint = "https://monitor.azure.microsoft.scloud/" + mcsGlobalEndpoint = "https://global.handler.control.monitor.azure.microsoft.scloud/" + default: + fmt.Printf("Unknown customEnvironment: %s, setting mcs endpoint to default azurepubliccloud values\n", customEnvironment) + mcsEndpoint = "https://monitor.azure.com/" + mcsGlobalEndpoint = "https://global.handler.control.monitor.azure.com" } + return mcsEndpoint, mcsGlobalEndpoint } diff --git a/otelcollector/shared/process_utilities.go b/otelcollector/shared/process_utilities_linux.go similarity index 97% rename from otelcollector/shared/process_utilities.go rename to otelcollector/shared/process_utilities_linux.go index ec7027d75..c4f2b68a4 100644 --- a/otelcollector/shared/process_utilities.go +++ b/otelcollector/shared/process_utilities_linux.go @@ -43,9 +43,9 @@ func IsProcessRunning(processName string) bool { return false } -// SetEnvAndSourceBashrc sets a key-value pair as an environment variable in the .bashrc file +// SetEnvAndSourceBashrcOrPowershell sets a key-value pair as an environment variable in the .bashrc file // and sources the file to apply changes immediately. If echo is true, it calls EchoVar -func SetEnvAndSourceBashrc(key, value string, echo bool) error { +func SetEnvAndSourceBashrcOrPowershell(key, value string, echo bool) error { // Set the environment variable err := os.Setenv(key, value) @@ -106,7 +106,6 @@ func SetEnvAndSourceBashrc(key, value string, echo bool) error { func StartCommandWithOutputFile(command string, args []string, outputFile string) (int, error) { cmd := exec.Command(command, args...) - // Set environment variables from os.Environ() cmd.Env = append(os.Environ()) // Create file to write stdout and stderr @@ -330,6 +329,10 @@ func StartMetricsExtensionWithConfigOverridesForUnderlay(configOverrides string) } } +func StartMA() { + fmt.Println("Should never reach here, defining function since main.go expects it") +} + func StartMdsdForOverlay() { mdsdLog := os.Getenv("MDSD_LOG") if mdsdLog == "" { diff --git a/otelcollector/shared/process_utilities_windows.go b/otelcollector/shared/process_utilities_windows.go new file mode 100644 index 000000000..31a0d875f --- /dev/null +++ b/otelcollector/shared/process_utilities_windows.go @@ -0,0 +1,574 @@ +package shared + +import ( + "fmt" + "io" + "log" + "net/http" + "os" + "os/exec" + "strings" + "sync" + "syscall" + "time" + "unsafe" +) + +// IsProcessRunning checks if a process with the given name is running on the system +func IsProcessRunning(processName string) bool { + osType := os.Getenv("OS_TYPE") + + switch osType { + case "linux": + return isProcessRunningLinux(processName) + case "windows": + return isProcessRunningWindows(processName) + default: + fmt.Println("Unsupported OS_TYPE:", osType) + return false + } +} + +// Linux implementation using the /proc directory +func isProcessRunningLinux(processName string) bool { + pid := os.Getpid() + dir, err := os.Open("/proc") + if err != nil { + fmt.Println("Error opening /proc:", err) + return false + } + defer dir.Close() + + procs, err := dir.Readdirnames(0) + if err != nil { + fmt.Println("Error reading /proc:", err) + return false + } + + for _, proc := range procs { + if _, err := os.Stat("/proc/" + proc + "/cmdline"); err == nil { + cmdline, err := os.ReadFile("/proc/" + proc + "/cmdline") + if err == nil && strings.Contains(string(cmdline), processName) { + if proc != fmt.Sprintf("%d", pid) { + return true + } + } + } + } + return false +} + +type ProcessEntry32 struct { + Size uint32 + CntUsage uint32 + ProcessID uint32 + DefaultHeapID uintptr + ModuleID uint32 + CntThreads uint32 + ParentProcessID uint32 + PriorityClassBase int32 + Flags uint32 + ExeFile [260]uint16 // Process name +} + +// Windows implementation using syscalls +func isProcessRunningWindows(processName string) bool { + kernel32 := syscall.NewLazyDLL("kernel32.dll") + procSnapshot := kernel32.NewProc("CreateToolhelp32Snapshot") + procProcessFirst := kernel32.NewProc("Process32FirstW") + procProcessNext := kernel32.NewProc("Process32NextW") + handle, _, _ := procSnapshot.Call(2, 0) // TH32CS_SNAPPROCESS + if handle == 0 { + fmt.Println("Error getting snapshot of processes") + return false + } + defer syscall.CloseHandle(syscall.Handle(handle)) + var entry ProcessEntry32 + entry.Size = uint32(unsafe.Sizeof(entry)) + // Get the first process + ret, _, _ := procProcessFirst.Call(handle, uintptr(unsafe.Pointer(&entry))) + for ret != 0 { + // Convert UTF-16 file name to string + exeFile := syscall.UTF16ToString(entry.ExeFile[:]) + + // Case-insensitive comparison + if strings.EqualFold(exeFile, processName) { + return true + } + // Move to the next process + ret, _, _ = procProcessNext.Call(handle, uintptr(unsafe.Pointer(&entry))) + } + return false +} + +// SetEnvAndSourceBashrcOrPowershell sets a key-value pair as an environment variable. +// If OS_TYPE is 'linux', it sets the variable in the .bashrc file and sources it. +// If OS_TYPE is 'windows', it sets the variable in the system environment. +func SetEnvAndSourceBashrcOrPowershell(key, value string, echo bool) error { + // Get the OS_TYPE from environment variables + osType := os.Getenv("OS_TYPE") + + if osType == "linux" { + fmt.Println(("Should never reach here as this is the windows file")) + } else if osType == "windows" { + // On Windows, set the environment variable for the machine (persistent across sessions) + cmd := exec.Command("setx", key, value, "/M") // "/M" flag sets the variable for the machine + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to set environment variable on Windows (Machine scope): %v", err) + } + + // Set the environment variable for the current process + err := os.Setenv(key, value) + if err != nil { + return fmt.Errorf("failed to set environment variable for current session: %v", err) + } + } else { + return fmt.Errorf("unsupported OS_TYPE: %s", osType) + } + + // Conditionally call EchoVar + if echo { + EchoVar(key, value) + } + + return nil +} + +func StartCommandWithOutputFile(command string, args []string, outputFile string) (int, error) { + cmd := exec.Command(command, args...) + + // Set environment variables from os.Environ() + cmd.Env = append(os.Environ()) + + // Create file to write stdout and stderr + file, err := os.Create(outputFile) + if err != nil { + return 0, fmt.Errorf("error creating output file: %v", err) + } + + // Create pipes to capture stdout and stderr + stdout, err := cmd.StdoutPipe() + if err != nil { + return 0, fmt.Errorf("error creating stdout pipe: %v", err) + } + + stderr, err := cmd.StderrPipe() + if err != nil { + return 0, fmt.Errorf("error creating stderr pipe: %v", err) + } + + // Start the command + if err := cmd.Start(); err != nil { + return 0, fmt.Errorf("error starting command: %v", err) + } + + // Create a wait group to wait for goroutines + var wg sync.WaitGroup + wg.Add(2) + + // Create goroutines to continuously read and write stdout and stderr + go func() { + defer wg.Done() + if _, err := io.Copy(file, stdout); err != nil { + fmt.Printf("Error copying stdout to file: %v\n", err) + } + }() + + go func() { + defer wg.Done() + if _, err := io.Copy(file, stderr); err != nil { + fmt.Printf("Error copying stderr to file: %v\n", err) + } + }() + + // Wait for both goroutines to finish before closing the file + go func() { + wg.Wait() + file.Close() + }() + + // Get the PID of the started process + process_pid := cmd.Process.Pid + + return process_pid, nil +} + +func StartCommand(command string, args ...string) { + cmd := exec.Command(command, args...) + + // Set environment variables from os.Environ() + cmd.Env = append(os.Environ()) + + // Create pipes to capture stdout and stderr + stdout, err := cmd.StdoutPipe() + if err != nil { + fmt.Printf("Error creating stdout pipe: %v\n", err) + return + } + + stderr, err := cmd.StderrPipe() + if err != nil { + fmt.Printf("Error creating stderr pipe: %v\n", err) + return + } + + // Start the command + err = cmd.Start() + if err != nil { + fmt.Printf("Error starting command: %v\n", err) + return + } + + // Create goroutines to capture and print stdout and stderr + go func() { + stdoutBytes, _ := io.ReadAll(io.Reader(stdout)) + fmt.Print(string(stdoutBytes)) + }() + + go func() { + stderrBytes, _ := io.ReadAll(io.Reader(stderr)) + fmt.Print(string(stderrBytes)) + }() +} + +func StartCommandAndWait(command string, args ...string) error { + cmd := exec.Command(command, args...) + + // Set environment variables from os.Environ() + cmd.Env = append(os.Environ()) + + // Create pipes to capture stdout and stderr + stdout, err := cmd.StdoutPipe() + if err != nil { + return fmt.Errorf("error creating stdout pipe: %v", err) + } + + stderr, err := cmd.StderrPipe() + if err != nil { + return fmt.Errorf("error creating stderr pipe: %v", err) + } + + // Start the command + err = cmd.Start() + if err != nil { + return fmt.Errorf("error starting command: %v", err) + } + + // Create goroutines to capture and print stdout and stderr + go func() { + stdoutBytes, _ := io.ReadAll(io.Reader(stdout)) + fmt.Print(string(stdoutBytes)) + }() + + go func() { + stderrBytes, _ := io.ReadAll(io.Reader(stderr)) + fmt.Print(string(stderrBytes)) + }() + + // Wait for the command to finish + err = cmd.Wait() + if err != nil { + return fmt.Errorf("error waiting for command: %v", err) + } + + return nil +} + +func copyOutputMulti(src io.Reader, dest io.Writer, file *os.File) { + // Create a multi-writer to write to both the file and os.Stdout/os.Stderr + multiWriter := io.MultiWriter(dest, file) + + _, err := io.Copy(multiWriter, src) + if err != nil { + fmt.Printf("Error copying output: %v\n", err) + } +} + +func copyOutputPipe(src io.Reader, dest io.Writer) { + _, err := io.Copy(dest, src) + + if err != nil { + fmt.Printf("Error copying output: %v\n", err) + } +} + +func copyOutputFile(src io.Reader, file *os.File) { + _, err := io.Copy(file, src) + + if err != nil { + fmt.Printf("Error copying output: %v\n", err) + } +} + +// StartMetricsExtensionForOverlay starts the MetricsExtension process based on the OS +func StartMetricsExtensionForOverlay(meConfigFile string) (int, error) { + osType := os.Getenv("OS_TYPE") + var cmd *exec.Cmd + + switch osType { + case "linux": + cmd = exec.Command("/usr/sbin/MetricsExtension", "-Logger", "File", "-LogLevel", "Info", "-LocalControlChannel", "-TokenSource", "AMCS", "-DataDirectory", "/etc/mdsd.d/config-cache/metricsextension", "-Input", "otlp_grpc_prom", "-ConfigOverridesFilePath", meConfigFile) + + case "windows": + // Prepare the command and its arguments + cmd = exec.Command( + "C:\\opt\\metricextension\\MetricsExtension\\MetricsExtension.Native.exe", + "-Logger", "File", + "-LogLevel", "Info", + "-LocalControlChannel", + "-TokenSource", "AMCS", + "-DataDirectory", "C:\\opt\\genevamonitoringagent\\datadirectory\\mcs\\metricsextension\\", + "-Input", "otlp_grpc_prom", + "-ConfigOverridesFilePath", meConfigFile, + ) + } + + cmd.Env = append(os.Environ()) + + err := cmd.Start() + if err != nil { + return 0, fmt.Errorf("error starting MetricsExtension: %v", err) + } + + return cmd.Process.Pid, nil +} + +func StartMetricsExtensionWithConfigOverridesForUnderlay(configOverrides string) { + cmd := exec.Command("/usr/sbin/MetricsExtension", "-Logger", "Console", "-LogLevel", "Error", "-LocalControlChannel", "-TokenSource", "AMCS", "-DataDirectory", "/etc/mdsd.d/config-cache/metricsextension", "-Input", "otlp_grpc_prom", "-ConfigOverridesFilePath", "/usr/sbin/me.config") + + // Create a file to store the stdoutput + // metricsextension_stdout_file, err := os.Create("metricsextension_stdout.log") + // if err != nil { + // fmt.Printf("Error creating output file for metrics extension: %v\n", err) + // return + // } + + // // Create a file to store the stderr + // metricsextension_stderr_file, err := os.Create("metricsextension_stderr.log") + // if err != nil { + // fmt.Printf("Error creating output file for metrics extension: %v\n", err) + // return + // } + + // Create pipes to capture stdout and stderr + stdout, err := cmd.StdoutPipe() + if err != nil { + fmt.Printf("Error creating stdout pipe: %v\n", err) + return + } + stderr, err := cmd.StderrPipe() + if err != nil { + fmt.Printf("Error creating stderr pipe: %v\n", err) + return + } + + // Goroutines to copy stdout and stderr to parent process + // Copy output to only stdout & stderr + go copyOutputPipe(stdout, os.Stdout) + go copyOutputPipe(stderr, os.Stderr) + + // Copy output to both stdout & stderr and file + // go copyOutputMulti(stdout, os.Stdout, metricsextension_stdout_file) + // go copyOutputMulti(stderr, os.Stderr, metricsextension_stderr_file) + + // Copy output only to file + // go copyOutputFile(stdout, metricsextension_stdout_file) + // go copyOutputFile(stderr, metricsextension_stderr_file) + + // Start the command + err = cmd.Start() + if err != nil { + fmt.Printf("Error starting MetricsExtension: %v\n", err) + return + } +} + +func StartMA() { + osType := os.Getenv("OS_TYPE") + var cmd *exec.Cmd + + switch osType { + case "linux": + fmt.Println("Should never reach here") + + case "windows": + cmd = exec.Command("C:\\opt\\genevamonitoringagent\\genevamonitoringagent\\Monitoring\\Agent\\MonAgentLauncher.exe", "-useenv") + // On Windows, stderr redirection is not needed as `cmd.Start()` handles it internally + } + + // Start the command + err := cmd.Start() + if err != nil { + fmt.Printf("Error starting mdsd/MonAgentLauncher: %v\n", err) + return + } + + fmt.Printf("%s process started successfully.\n", cmd.Path) +} + +// StartMdsdForOverlay starts the mdsd process based on the OS +func StartMdsdForOverlay() { + osType := os.Getenv("OS_TYPE") + var cmd *exec.Cmd + + switch osType { + case "linux": + mdsdLog := os.Getenv("MDSD_LOG") + if mdsdLog == "" { + fmt.Println("MDSD_LOG environment variable is not set") + return + } + cmd = exec.Command("/usr/sbin/mdsd", "-a", "-A", "-e", mdsdLog+"/mdsd.err", "-w", mdsdLog+"/mdsd.warn", "-o", mdsdLog+"/mdsd.info", "-q", mdsdLog+"/mdsd.qos") + // Redirect stderr to /dev/null + cmd.Stderr = nil + + case "windows": + cmd = exec.Command("C:\\opt\\genevamonitoringagent\\genevamonitoringagent\\Monitoring\\Agent\\MonAgentLauncher.exe", "-useenv") + // On Windows, stderr redirection is not needed as `cmd.Start()` handles it internally + } + + // Start the command + err := cmd.Start() + if err != nil { + fmt.Printf("Error starting mdsd/MonAgentLauncher: %v\n", err) + return + } + + fmt.Printf("%s process started successfully.\n", cmd.Path) +} + +func StartMdsdForUnderlay() { + cmd := exec.Command("/usr/sbin/mdsd", "-a", "-A", "-D") + // // Create a file to store the stdoutput + // mdsd_stdout_file, err := os.Create("mdsd_stdout.log") + // if err != nil { + // fmt.Printf("Error creating output file for mdsd: %v\n", err) + // return + // } + + // // Create a file to store the stderr + // mdsd_stderr_file, err := os.Create("mdsd_stderr.log") + // if err != nil { + // fmt.Printf("Error creating output file for mdsd: %v\n", err) + // return + // } + + // Create pipes to capture stdout and stderr + stdout, err := cmd.StdoutPipe() + if err != nil { + fmt.Printf("Error creating stdout pipe: %v\n", err) + return + } + + stderr, err := cmd.StderrPipe() + if err != nil { + fmt.Printf("Error creating stderr pipe: %v\n", err) + return + } + + // Goroutines to copy stdout and stderr to parent process + // Copy output to only stdout and stderr + go copyOutputPipe(stdout, os.Stdout) + go copyOutputPipe(stderr, os.Stderr) + + // Copy output to both stdout and file + // go copyOutputMulti(stdout, os.Stdout, mdsd_stdout_file) + // go copyOutputMulti(stderr, os.Stderr, mdsd_stderr_file) + + // Copy output only to file + // go copyOutputFile(stdout, mdsd_stdout_file) + // go copyOutputFile(stderr, mdsd_stderr_file) + + // Start the command + err = cmd.Start() + if err != nil { + fmt.Printf("Error starting mdsd: %v\n", err) + return + } +} + +func StartCronDaemon() { + cmd := exec.Command("/usr/sbin/crond", "-n", "-s") + if err := cmd.Start(); err != nil { + log.Fatal(err) + } +} + +func WaitForTokenAdapter(ccpMetricsEnabled string) { + tokenAdapterWaitSecs := 60 + if ccpMetricsEnabled == "true" { + tokenAdapterWaitSecs = 20 + } + waitedSecsSoFar := 1 + + var resp *http.Response + var err error + + client := &http.Client{Timeout: time.Duration(2) * time.Second} + + req, err := http.NewRequest("GET", "http://localhost:9999/healthz", nil) + if err != nil { + log.Printf("Unable to create http request for the healthz endpoint") + return + } + for { + if waitedSecsSoFar > tokenAdapterWaitSecs { + if resp, err = client.Do(req); err != nil { + log.Printf("giving up waiting for token adapter to become healthy after %d secs\n", waitedSecsSoFar) + log.Printf("export tokenadapterUnhealthyAfterSecs=%d\n", waitedSecsSoFar) + break + } + } else { + log.Printf("checking health of token adapter after %d secs\n", waitedSecsSoFar) + resp, err = client.Do(req) + if err == nil && resp.StatusCode == http.StatusOK { + log.Printf("found token adapter to be healthy after %d secs\n", waitedSecsSoFar) + log.Printf("export tokenadapterHealthyAfterSecs=%d\n", waitedSecsSoFar) + break + } + } + time.Sleep(1 * time.Second) + waitedSecsSoFar++ + } + + if resp != nil && resp.Body != nil { + defer resp.Body.Close() + } +} + +func StartFluentBit(fluentBitConfigFile string) { + fmt.Println("Starting fluent-bit") + if os.Getenv("OS_TYPE") == "linux" { + if err := os.Mkdir("/opt/microsoft/fluent-bit", 0755); err != nil && !os.IsExist(err) { + log.Fatalf("Error creating directory: %v\n", err) + } + + logFile, err := os.Create("/opt/microsoft/fluent-bit/fluent-bit-out-appinsights-runtime.log") + if err != nil { + log.Fatalf("Error creating log file: %v\n", err) + } + defer logFile.Close() + + fluentBitCmd := exec.Command("fluent-bit", "-c", fluentBitConfigFile, "-e", "/opt/fluent-bit/bin/out_appinsights.so") + fluentBitCmd.Stdout = os.Stdout + fluentBitCmd.Stderr = os.Stderr + if err := fluentBitCmd.Start(); err != nil { + log.Fatalf("Error starting fluent-bit: %v\n", err) + } + } else { + fluentBitCmd := exec.Command("C:\\opt\\fluent-bit\\bin\\fluent-bit.exe", "-c", "C:\\opt\\fluent-bit\\fluent-bit-windows.conf", "-e", "C:\\opt\\fluent-bit\\bin\\out_appinsights.so") + fluentBitCmd.Stdout = os.Stdout + fluentBitCmd.Stderr = os.Stderr + + if err := fluentBitCmd.Start(); err != nil { + log.Fatalf("Error starting fluent-bit: %v\n", err) + } + + // Run fluent-bit as a background process + go func() { + if err := fluentBitCmd.Wait(); err != nil { + log.Printf("Fluent-bit exited with error: %v\n", err) + } + }() + } +} diff --git a/otelcollector/shared/proxy_settings.go b/otelcollector/shared/proxy_settings.go index f94fd9e57..edf6b54aa 100644 --- a/otelcollector/shared/proxy_settings.go +++ b/otelcollector/shared/proxy_settings.go @@ -51,8 +51,8 @@ func addNoProxy(target string) { noProxy := os.Getenv("NO_PROXY") noProxy = strings.TrimSpace(noProxy) noProxy += "," + target - SetEnvAndSourceBashrc("NO_PROXY", noProxy, true) - SetEnvAndSourceBashrc("no_proxy", noProxy, true) + SetEnvAndSourceBashrcOrPowershell("NO_PROXY", noProxy, true) + SetEnvAndSourceBashrcOrPowershell("no_proxy", noProxy, true) } func setHTTPProxyEnabled() { @@ -60,7 +60,7 @@ func setHTTPProxyEnabled() { if os.Getenv("HTTP_PROXY") != "" { httpProxyEnabled = "true" } - SetEnvAndSourceBashrc("HTTP_PROXY_ENABLED", httpProxyEnabled, true) + SetEnvAndSourceBashrcOrPowershell("HTTP_PROXY_ENABLED", httpProxyEnabled, true) } func ConfigureEnvironment() error { @@ -69,7 +69,7 @@ func ConfigureEnvironment() error { // Remove trailing '/' character from HTTP_PROXY and HTTPS_PROXY proxyVariables := []string{"http_proxy", "HTTP_PROXY", "https_proxy", "HTTPS_PROXY"} for _, v := range proxyVariables { - SetEnvAndSourceBashrc(v, removeTrailingSlash(os.Getenv(v)), true) + SetEnvAndSourceBashrcOrPowershell(v, removeTrailingSlash(os.Getenv(v)), true) } addNoProxy("ama-metrics-operator-targets.kube-system.svc.cluster.local") @@ -92,11 +92,11 @@ func ConfigureEnvironment() error { password := base64.StdEncoding.EncodeToString([]byte(strings.SplitN(urlParts[0], ":", 2)[1])) os.WriteFile("/opt/microsoft/proxy_password", []byte(password), 0644) - SetEnvAndSourceBashrc("MDSD_PROXY_MODE", "application", true) - SetEnvAndSourceBashrc("MDSD_PROXY_ADDRESS", os.Getenv("HTTPS_PROXY"), true) + SetEnvAndSourceBashrcOrPowershell("MDSD_PROXY_MODE", "application", true) + SetEnvAndSourceBashrcOrPowershell("MDSD_PROXY_ADDRESS", os.Getenv("HTTPS_PROXY"), true) if user := strings.SplitN(urlParts[0], ":", 2)[0]; user != "" { - SetEnvAndSourceBashrc("MDSD_PROXY_USERNAME", user, true) - SetEnvAndSourceBashrc("MDSD_PROXY_PASSWORD_FILE", "/opt/microsoft/proxy_password", true) + SetEnvAndSourceBashrcOrPowershell("MDSD_PROXY_USERNAME", user, true) + SetEnvAndSourceBashrcOrPowershell("MDSD_PROXY_PASSWORD_FILE", "/opt/microsoft/proxy_password", true) } } diff --git a/otelcollector/shared/telemetry.go b/otelcollector/shared/telemetry.go index 53896e766..508145177 100644 --- a/otelcollector/shared/telemetry.go +++ b/otelcollector/shared/telemetry.go @@ -45,7 +45,7 @@ func SetupTelemetry(customEnvironment string) { } // Export APPLICATIONINSIGHTS_AUTH - err := SetEnvAndSourceBashrc("APPLICATIONINSIGHTS_AUTH", encodedAIKey, false) + err := SetEnvAndSourceBashrcOrPowershell("APPLICATIONINSIGHTS_AUTH", encodedAIKey, false) if err != nil { fmt.Println("Error setting APPLICATIONINSIGHTS_AUTH environment variable:", err) return @@ -84,7 +84,7 @@ func SetupTelemetry(customEnvironment string) { } aiKey = string(aiKeyBytes) - err = SetEnvAndSourceBashrc("TELEMETRY_APPLICATIONINSIGHTS_KEY", aiKey, false) + err = SetEnvAndSourceBashrcOrPowershell("TELEMETRY_APPLICATIONINSIGHTS_KEY", aiKey, false) if err != nil { fmt.Println("Error setting TELEMETRY_APPLICATIONINSIGHTS_KEY environment variable:", err) return