From 1c49d9dc2df49173783011dd8f085a89e6082010 Mon Sep 17 00:00:00 2001 From: Vishwanath Date: Wed, 4 Dec 2024 14:52:59 -0800 Subject: [PATCH] Fix for CCP configmap processing issue (#1023) [comment]: # (Note that your PR title should follow the conventional commit format: https://conventionalcommits.org/en/v1.0.0/#summary) # PR Description [comment]: # (The below checklist is for PRs adding new features. If a box is not checked, add a reason why it's not needed.) # New Feature Checklist - [ ] List telemetry added about the feature. - [ ] Link to the one-pager about the feature. - [ ] List any tasks necessary for release (3P docs, AKS RP chart changes, etc.) after merging the PR. - [ ] Attach results of scale and perf testing. [comment]: # (The below checklist is for code changes. Not all boxes necessarily need to be checked. Build, doc, and template changes do not need to fill out the checklist.) # Tests Checklist - [ ] Have end-to-end Ginkgo tests been run on your cluster and passed? To bootstrap your cluster to run the tests, follow [these instructions](/otelcollector/test/README.md#bootstrap-a-dev-cluster-to-run-ginkgo-tests). - Labels used when running the tests on your cluster: - [ ] `operator` - [ ] `windows` - [ ] `arm64` - [ ] `arc-extension` - [ ] `fips` - [ ] Have new tests been added? For features, have tests been added for this feature? For fixes, is there a test that could have caught this issue and could validate that the fix works? - [ ] Is a new scrape job needed? - [ ] The scrape job was added to the folder [test-cluster-yamls](/otelcollector/test/test-cluster-yamls/) in the correct configmap or as a CR. - [ ] Was a new test label added? - [ ] A string constant for the label was added to [constants.go](/otelcollector/test/utils/constants.go). - [ ] The label and description was added to the [test README](/otelcollector/test/README.md). - [ ] The label was added to this [PR checklist](/.github/pull_request_template). - [ ] The label was added as needed to [testkube-test-crs.yaml](/otelcollector/test/testkube/testkube-test-crs.yaml). - [ ] Are additional API server permissions needed for the new tests? - [ ] These permissions have been added to [api-server-permissions.yaml](/otelcollector/test/testkube/api-server-permissions.yaml). - [ ] Was a new test suite (a new folder under `/tests`) added? - [ ] The new test suite is included in [testkube-test-crs.yaml](/otelcollector/test/testkube/testkube-test-crs.yaml). --- .pipelines/azure-pipeline-build.yml | 1 - RELEASENOTES.md | 6 +++ otelcollector/VERSION | 2 +- otelcollector/build/linux/ccp/Dockerfile | 4 ++ .../configuration-reader-builder/main.go | 5 +- otelcollector/main/main.go | 24 ++++++++-- .../configmap/ccp/configmapparserforccp.go | 23 +++++++++ .../tomlparser-ccp-default-scrape-settings.go | 1 + otelcollector/shared/file_utilities.go | 47 +++++++++++++++++-- .../shared/process_utilities_linux.go | 1 + 10 files changed, 104 insertions(+), 10 deletions(-) diff --git a/.pipelines/azure-pipeline-build.yml b/.pipelines/azure-pipeline-build.yml index 5e2ba7f78..af2d20474 100644 --- a/.pipelines/azure-pipeline-build.yml +++ b/.pipelines/azure-pipeline-build.yml @@ -2,7 +2,6 @@ trigger: branches: include: - main - pr: autoCancel: true branches: diff --git a/RELEASENOTES.md b/RELEASENOTES.md index 87ac58b3e..3196f5f5e 100644 --- a/RELEASENOTES.md +++ b/RELEASENOTES.md @@ -1,5 +1,11 @@ # Azure Monitor Metrics for AKS clusters +## Release 12-04-2024 (hot-fix for ccp config map issue ) - CCP release only - +* CCP image - + +* Changelog - + - Fix for CCP Config map processing issue - (https://github.com/Azure/prometheus-collector/pull/1017) + ## Release 10-21-2024 * Linux image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.11.0-main-10-21-2024-91ec49e3` * Windows image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.11.0-main-10-21-2024-91ec49e3-win` diff --git a/otelcollector/VERSION b/otelcollector/VERSION index 1de66e5ff..d4e6cb429 100644 --- a/otelcollector/VERSION +++ b/otelcollector/VERSION @@ -1 +1 @@ -6.11.0 +6.12.0 diff --git a/otelcollector/build/linux/ccp/Dockerfile b/otelcollector/build/linux/ccp/Dockerfile index 8ba0aeb77..c4c0ca5f4 100644 --- a/otelcollector/build/linux/ccp/Dockerfile +++ b/otelcollector/build/linux/ccp/Dockerfile @@ -156,9 +156,13 @@ COPY --from=builder /usr/lib/libdl.so.2 /usr/lib/librt.so.1 /usr/lib/libpthread. # COPY --from=builder /usr/bin/curl /usr/bin/ # COPY --from=builder /lib/libcurl.so.4 /lib/libz.so.1 /lib/libc.so.6 /lib/libnghttp2.so.14 /lib/libssh2.so.1 /lib/libgssapi_krb5.so.2 /lib/libzstd.so.1 /lib/ # COPY --from=builder /usr/lib/libkrb5.so.3 /usr/lib/libk5crypto.so.3 /usr/lib/libcom_err.so.2 /usr/lib/libkrb5support.so.0 /usr/lib/libresolv.so.2 /usr/lib/ +# RUN chmod 777 /opt/; # Expose the port on which the application listens EXPOSE 8080 +# Run as root to access /etc +# USER root + # Run the Go executable, entrypoint ENTRYPOINT ["./opt/main/ccpmain"] diff --git a/otelcollector/configuration-reader-builder/main.go b/otelcollector/configuration-reader-builder/main.go index a410ee7d5..8dc364ae3 100644 --- a/otelcollector/configuration-reader-builder/main.go +++ b/otelcollector/configuration-reader-builder/main.go @@ -245,6 +245,7 @@ func main() { _, err := os.Create("/opt/inotifyoutput.txt") if err != nil { log.Fatalf("Error creating output file: %v\n", err) + fmt.Println("Error creating inotify output file:", err) } // Define the command to start inotify for config reader's liveness probe @@ -254,7 +255,8 @@ func main() { "--daemon", "--recursive", "--outfile", "/opt/inotifyoutput.txt", - "--event", "create,delete", + "--event", "create", + "--event", "delete", "--format", "%e : %T", "--timefmt", "+%s", ) @@ -263,6 +265,7 @@ func main() { err = inotifyCommandCfg.Start() if err != nil { log.Fatalf("Error starting inotify process for config reader's liveness probe: %v\n", err) + fmt.Println("Error starting inotify process:", err) } configmapsettings.Configmapparser() diff --git a/otelcollector/main/main.go b/otelcollector/main/main.go index 401f7dc8a..6d8eb4782 100644 --- a/otelcollector/main/main.go +++ b/otelcollector/main/main.go @@ -17,6 +17,9 @@ import ( ) func main() { + + + controllerType := shared.GetControllerType() cluster := shared.GetEnv("CLUSTER", "") clusterOverride := shared.GetEnv("CLUSTER_OVERRIDE", "") @@ -29,9 +32,21 @@ func main() { } if osType == "linux" { - outputFile := "/opt/inotifyoutput.txt" - if err := shared.Inotify(outputFile, "/etc/config/settings", "/etc/prometheus/certs"); err != nil { - log.Fatal(err) + outputFile := "/opt/inotifyoutput.txt" + + if ccpMetricsEnabled != "true" { //data-plane + + if err := shared.Inotify(outputFile, "/etc/config/settings"); err != nil { + log.Fatal(err) + } + + if err := shared.Inotify(outputFile, "/etc/prometheus/certs"); err != nil { + log.Fatal(err) + } + } else { //control-plane + if err := shared.InotifyCCP(outputFile, "/etc/config/settings"); err != nil { + log.Fatal(err) + } } } else if osType == "windows" { fmt.Println("Starting filesystemwatcher.ps1") @@ -213,6 +228,9 @@ func main() { "--daemon", "--outfile", outputFile, "--event", "ATTRIB", + "--event", "create", + "--event", "delete", + "--event", "modify", "--format", "%e : %T", "--timefmt", "+%s", ) diff --git a/otelcollector/shared/configmap/ccp/configmapparserforccp.go b/otelcollector/shared/configmap/ccp/configmapparserforccp.go index 4eae827e4..453be9c4f 100644 --- a/otelcollector/shared/configmap/ccp/configmapparserforccp.go +++ b/otelcollector/shared/configmap/ccp/configmapparserforccp.go @@ -3,6 +3,8 @@ package ccpconfigmapsettings import ( "fmt" "strings" + "os" + "time" // "prometheus-collector/shared" "github.com/prometheus-collector/shared" @@ -10,8 +12,23 @@ import ( func Configmapparserforccp() { fmt.Printf("in configmapparserforccp") + fmt.Printf("waiting for 30 secs...") + time.Sleep(30 * time.Second) //needed to save a restart at times when config watcher sidecar starts up later than us and hence config map wasn't yet projected into emptydir volume yet during pod startups. + configVersionPath := "/etc/config/settings/config-version" configSchemaPath := "/etc/config/settings/schema-version" + + entries, er := os.ReadDir("/etc/config/settings") + if er != nil { + fmt.Println("error listing /etc/config/settings", er) + } + + for _, e := range entries { + fmt.Println(e.Name()) + } + + fmt.Println("done listing /etc/config/settings") + // Set agent config schema version if shared.ExistsAndNotEmpty(configSchemaPath) { configVersion, err := shared.ReadAndTrim(configVersionPath) @@ -25,7 +42,10 @@ func Configmapparserforccp() { configVersion = configVersion[:10] } // Set the environment variable + fmt.Println("Configmapparserforccp setting env var AZMON_AGENT_CFG_FILE_VERSION:", configVersion) shared.SetEnvAndSourceBashrcOrPowershell("AZMON_AGENT_CFG_FILE_VERSION", configVersion, true) + } else { + fmt.Println("Configmapparserforccp fileversion file doesn't exist. or configmap doesn't exist:", configVersionPath) } // Set agent config file version @@ -41,7 +61,10 @@ func Configmapparserforccp() { configSchemaVersion = configSchemaVersion[:10] } // Set the environment variable + fmt.Println("Configmapparserforccp setting env var AZMON_AGENT_CFG_SCHEMA_VERSION:", configSchemaVersion) shared.SetEnvAndSourceBashrcOrPowershell("AZMON_AGENT_CFG_SCHEMA_VERSION", configSchemaVersion, true) + } else { + fmt.Println("Configmapparserforccp schemaversion file doesn't exist. or configmap doesn't exist:", configSchemaPath) } // Parse the configmap to set the right environment variables for prometheus collector settings diff --git a/otelcollector/shared/configmap/ccp/tomlparser-ccp-default-scrape-settings.go b/otelcollector/shared/configmap/ccp/tomlparser-ccp-default-scrape-settings.go index 8e859752e..c056a73b0 100644 --- a/otelcollector/shared/configmap/ccp/tomlparser-ccp-default-scrape-settings.go +++ b/otelcollector/shared/configmap/ccp/tomlparser-ccp-default-scrape-settings.go @@ -111,6 +111,7 @@ func (fcw *FileConfigWriter) WriteDefaultScrapeSettingsToFile(filename string, c func (c *Configurator) ConfigureDefaultScrapeSettings() { configSchemaVersion := os.Getenv("AZMON_AGENT_CFG_SCHEMA_VERSION") + fmt.Printf("ConfigureDefaultScrapeSettings getenv:configSchemaVersion:", configSchemaVersion) fmt.Printf("Start prometheus-collector-settings Processing\n") diff --git a/otelcollector/shared/file_utilities.go b/otelcollector/shared/file_utilities.go index c601fdcb1..48dcf746d 100644 --- a/otelcollector/shared/file_utilities.go +++ b/otelcollector/shared/file_utilities.go @@ -40,12 +40,15 @@ func FmtVar(name, value string) { func ExistsAndNotEmpty(filename string) bool { info, err := os.Stat(filename) if os.IsNotExist(err) { + fmt.Println("ExistsAndNotEmpty: file:", filename, "doesn't exist") return false } if err != nil { + fmt.Println("ExistsAndNotEmpty: path:", filename, ":error:", err) return false } if info.Size() == 0 { + fmt.Println("ExistsAndNotEmpty: file size is 0 for:", filename) return false } return true @@ -137,24 +140,25 @@ func SetEnvVarsFromFile(filename string) error { return nil } -func Inotify(outputFile string, location1 string, location2 string) error { +func Inotify(outputFile string, location string) error { // Start inotify to watch for changes fmt.Println("Starting inotify for watching config map update") _, err := os.Create(outputFile) if err != nil { log.Fatalf("Error creating output file: %v\n", err) + fmt.Println("Error creating inotify output file:", err) } // Define the command to start inotify inotifyCommand := exec.Command( "inotifywait", - location1, - location2, + location, "--daemon", "--recursive", "--outfile", outputFile, - "--event", "create,delete", + "--event", "create", + "--event", "delete", "--format", "%e : %T", "--timefmt", "+%s", ) @@ -163,6 +167,41 @@ func Inotify(outputFile string, location1 string, location2 string) error { err = inotifyCommand.Start() if err != nil { log.Fatalf("Error starting inotify process: %v\n", err) + fmt.Println("Error starting inotify process:", err) + } + + return nil +} + +func InotifyCCP(outputFile string, location string) error { + // Start inotify to watch for changes + fmt.Println("Starting inotify for watching config map update for ccp") + + _, err := os.Create(outputFile) + if err != nil { + log.Fatalf("Error creating output file: %v\n", err) + fmt.Println("Error creating inotify output file:", err) + } + + // Define the command to start inotify + inotifyCommand := exec.Command( + "inotifywait", + location, + "--daemon", + "--recursive", + "--outfile", outputFile, + "--event", "create", + "--event", "delete", + "--event", "modify", + "--format", "%e : %T", + "--timefmt", "+%s", + ) + + // Start the inotify process + err = inotifyCommand.Start() + if err != nil { + log.Fatalf("Error starting inotify process: %v\n", err) + fmt.Println("Error starting inotify process:", err) } return nil diff --git a/otelcollector/shared/process_utilities_linux.go b/otelcollector/shared/process_utilities_linux.go index c4f2b68a4..344c2d0ee 100644 --- a/otelcollector/shared/process_utilities_linux.go +++ b/otelcollector/shared/process_utilities_linux.go @@ -50,6 +50,7 @@ func SetEnvAndSourceBashrcOrPowershell(key, value string, echo bool) error { // Set the environment variable err := os.Setenv(key, value) if err != nil { + fmt.Println("error in SetEnvAndSourceBashrcOrPowershell when setting key:", key, ":value:" , value, ":error:", err) return fmt.Errorf("failed to set environment variable: %v", err) }