From 65453506b5224e1f124b931162ebb0a179a7d91c Mon Sep 17 00:00:00 2001 From: rashmichandrashekar Date: Wed, 21 Aug 2024 08:53:39 -0700 Subject: [PATCH] Config Reader sidecar memory leak fix (#962) [comment]: # (Note that your PR title should follow the conventional commit format: https://conventionalcommits.org/en/v1.0.0/#summary) # PR Description [comment]: # (The below checklist is for PRs adding new features. If a box is not checked, add a reason why it's not needed.) # New Feature Checklist - [ ] List telemetry added about the feature. - [ ] Link to the one-pager about the feature. - [ ] List any tasks necessary for release (3P docs, AKS RP chart changes, etc.) after merging the PR. - [ ] Attach results of scale and perf testing. [comment]: # (The below checklist is for code changes. Not all boxes necessarily need to be checked. Build, doc, and template changes do not need to fill out the checklist.) # Tests Checklist - [ ] Have end-to-end Ginkgo tests been run on your cluster and passed? To bootstrap your cluster to run the tests, follow [these instructions](/otelcollector/test/README.md#bootstrap-a-dev-cluster-to-run-ginkgo-tests). - Labels used when running the tests on your cluster: - [ ] `operator` - [ ] `windows` - [ ] `arm64` - [ ] `arc-extension` - [ ] `fips` - [ ] Have new tests been added? For features, have tests been added for this feature? For fixes, is there a test that could have caught this issue and could validate that the fix works? - [ ] Is a new scrape job needed? - [ ] The scrape job was added to the folder [test-cluster-yamls](/otelcollector/test/test-cluster-yamls/) in the correct configmap or as a CR. - [ ] Was a new test label added? - [ ] A string constant for the label was added to [constants.go](/otelcollector/test/utils/constants.go). - [ ] The label and description was added to the [test README](/otelcollector/test/README.md). - [ ] The label was added to this [PR checklist](/.github/pull_request_template). - [ ] The label was added as needed to [testkube-test-crs.yaml](/otelcollector/test/testkube/testkube-test-crs.yaml). - [ ] Are additional API server permissions needed for the new tests? - [ ] These permissions have been added to [api-server-permissions.yaml](/otelcollector/test/testkube/api-server-permissions.yaml). - [ ] Was a new test suite (a new folder under `/tests`) added? - [ ] The new test suite is included in [testkube-test-crs.yaml](/otelcollector/test/testkube/testkube-test-crs.yaml). --- .trivyignore | 3 ++ .../configuration-reader-builder/main.go | 43 +++++++++++-------- otelcollector/shared/process_utilities.go | 18 +++++++- 3 files changed, 44 insertions(+), 20 deletions(-) diff --git a/.trivyignore b/.trivyignore index 704d4a556..63fec304c 100644 --- a/.trivyignore +++ b/.trivyignore @@ -82,3 +82,6 @@ CVE-2024-24791 CVE-2023-5678 # MEDIUM - ruby CVE-2024-27281 +# MEDIUM - targetallocator +CVE-2023-42363 +CVE-2023-42365 diff --git a/otelcollector/configuration-reader-builder/main.go b/otelcollector/configuration-reader-builder/main.go index 4470aff56..a410ee7d5 100644 --- a/otelcollector/configuration-reader-builder/main.go +++ b/otelcollector/configuration-reader-builder/main.go @@ -180,28 +180,35 @@ func taHealthHandler(w http.ResponseWriter, r *http.Request) { status := http.StatusOK message := "\ntargetallocator is running." - resp, _ := http.Get("http://localhost:8080/metrics") - - if resp != nil && resp.StatusCode == http.StatusOK { - if taConfigUpdated { - if !taLivenessStartTime.IsZero() { - duration := time.Since(taLivenessStartTime) - // Serve the response of ServiceUnavailable for 60s and then reset - if duration.Seconds() < 60 { - status = http.StatusServiceUnavailable - message += "targetallocator-config changed" - } else { - taConfigUpdated = false - taLivenessStartTime = time.Time{} + client := &http.Client{Timeout: time.Duration(2) * time.Second} + + req, err := http.NewRequest("GET", "http://localhost:8080/metrics", nil) + if err == nil { + resp, _ := client.Do(req) + if resp != nil && resp.StatusCode == http.StatusOK { + if taConfigUpdated { + if !taLivenessStartTime.IsZero() { + duration := time.Since(taLivenessStartTime) + // Serve the response of ServiceUnavailable for 60s and then reset + if duration.Seconds() < 60 { + status = http.StatusServiceUnavailable + message += "targetallocator-config changed" + } else { + taConfigUpdated = false + taLivenessStartTime = time.Time{} + } } } - } - if status != http.StatusOK { - fmt.Printf(message) + if status != http.StatusOK { + fmt.Printf(message) + } + w.WriteHeader(status) + fmt.Fprintln(w, message) + } + if resp != nil && resp.Body != nil { + defer resp.Body.Close() } - w.WriteHeader(status) - fmt.Fprintln(w, message) } else { message = "\ncall to get TA metrics failed" status = http.StatusServiceUnavailable diff --git a/otelcollector/shared/process_utilities.go b/otelcollector/shared/process_utilities.go index 38d198f11..794cf68b0 100644 --- a/otelcollector/shared/process_utilities.go +++ b/otelcollector/shared/process_utilities.go @@ -406,16 +406,26 @@ func WaitForTokenAdapter(ccpMetricsEnabled string) { } waitedSecsSoFar := 1 + var resp *http.Response + var err error + + client := &http.Client{Timeout: time.Duration(2) * time.Second} + + req, err := http.NewRequest("GET", "http://localhost:9999/healthz", nil) + if err != nil { + log.Printf("Unable to create http request for the healthz endpoint") + return + } for { if waitedSecsSoFar > tokenAdapterWaitSecs { - if _, err := http.Get("http://localhost:9999/healthz"); err != nil { + if resp, err = client.Do(req); err != nil { log.Printf("giving up waiting for token adapter to become healthy after %d secs\n", waitedSecsSoFar) log.Printf("export tokenadapterUnhealthyAfterSecs=%d\n", waitedSecsSoFar) break } } else { log.Printf("checking health of token adapter after %d secs\n", waitedSecsSoFar) - resp, err := http.Get("http://localhost:9999/healthz") + resp, err = client.Do(req) if err == nil && resp.StatusCode == http.StatusOK { log.Printf("found token adapter to be healthy after %d secs\n", waitedSecsSoFar) log.Printf("export tokenadapterHealthyAfterSecs=%d\n", waitedSecsSoFar) @@ -425,6 +435,10 @@ func WaitForTokenAdapter(ccpMetricsEnabled string) { time.Sleep(1 * time.Second) waitedSecsSoFar++ } + + if resp != nil && resp.Body != nil { + defer resp.Body.Close() + } } func StartFluentBit(fluentBitConfigFile string) {