Skip to content

Commit

Permalink
bring back telegraf with merge from main
Browse files Browse the repository at this point in the history
  • Loading branch information
bragi92 committed May 31, 2024
2 parents 64a52fb + 3e72c0e commit d246a9b
Show file tree
Hide file tree
Showing 37 changed files with 956 additions and 769 deletions.
124 changes: 59 additions & 65 deletions .pipelines/azure-pipeline-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ variables:
MCR_REPOSITORY: '/azuremonitor/containerinsights/cidev/prometheus-collector/images'
MCR_REPOSITORY_HELM: '/azuremonitor/containerinsights/cidev/prometheus-collector'
MCR_REPOSITORY_HELM_DEPENDENCIES: '/azuremonitor/containerinsights/cidev'
KUBE_STATE_METRICS_IMAGE: 'mcr.microsoft.com/oss/kubernetes/kube-state-metrics:v2.9.2'
KUBE_STATE_METRICS_IMAGE: 'mcr.microsoft.com/oss/kubernetes/kube-state-metrics:v2.12.0'
NODE_EXPORTER_IMAGE: 'mcr.microsoft.com/oss/prometheus/node-exporter:v1.6.0'
IS_PR: $[eq(variables['Build.Reason'], 'PullRequest')]
IS_MAIN_BRANCH: $[eq(variables['Build.SourceBranchName'], 'main')]
Expand Down Expand Up @@ -1420,69 +1420,63 @@ stages:
runOnce:
deploy:
steps:
- bash: |
# Create JSON request body
cat <<EOF > "request.json"
{
"artifactEndpoints": [
{
"Regions": [
"westcentralus"
],
"Releasetrains": [
"pipeline"
],
"FullPathToHelmChart": "https://mcr.microsoft.com/azuremonitor/containerinsights/cidev/ama-metrics-arc",
"ExtensionUpdateFrequencyInMinutes": 5,
"IsCustomerHidden": true,
"ReadyforRollout": true,
"RollbackVersion": null,
"PackageConfigName": "Microsoft.AzureMonitor.Containers.Metrics-Prom041823"
}
]
}
EOF
# Send Request
SUBSCRIPTION="b9842c7c-1a38-4385-8f39-a51314758bcf"
RESOURCE_AUDIENCE="c699bf69-fb1d-4eaf-999b-99e6b2ae4d85"
SPN_CLIENT_ID="9a4c55e9-576a-450a-88bd-53bd634db38d"
SPN_TENANT_ID="72f988bf-86f1-41af-91ab-2d7cd011db47"
METHOD="PUT"
echo "Request parameter preparation, SUBSCRIPTION is $SUBSCRIPTION, RESOURCE_AUDIENCE is $RESOURCE_AUDIENCE, CHART_VERSION is $HELM_SEMVER, SPN_CLIENT_ID is $SPN_CLIENT_ID, SPN_TENANT_ID is $SPN_TENANT_ID"
# MSI is not supported
echo "Login cli using spn"
az login --service-principal --username=$SPN_CLIENT_ID --password=$(ARC_SPN_SECRET) --tenant=$SPN_TENANT_ID
if [ $? -eq 0 ]; then
echo "Logged in successfully with spn"
else
echo "-e error failed to login to az with managed identity credentials"
exit 1
fi
ACCESS_TOKEN=$(az account get-access-token --resource $RESOURCE_AUDIENCE --query accessToken -o json)
if [ $? -eq 0 ]; then
echo "get access token from resource:$RESOURCE_AUDIENCE successfully."
else
echo "-e error get access token from resource:$RESOURCE_AUDIENCE failed."
exit 1
fi
ACCESS_TOKEN=$(echo $ACCESS_TOKEN | tr -d '"' | tr -d '"\r\n')
ARC_API_URL="https://eastus2euap.dp.kubernetesconfiguration.azure.com"
EXTENSION_NAME="microsoft.azuremonitor.containers.metrics"
API_VERSION="2021-05-01"
echo "start send request"
az rest --method $METHOD --headers "{\"Authorization\": \"Bearer $ACCESS_TOKEN\", \"Content-Type\": \"application/json\"}" --body @request.json --uri $ARC_API_URL/subscriptions/$SUBSCRIPTION/extensionTypeRegistrations/$EXTENSION_NAME/versions/$HELM_SEMVER?api-version=$API_VERSION
if [ $? -eq 0 ]; then
echo "arc extension registered successfully"
else
echo "-e error failed to register arc extension"
exit 1
fi
- task: AzureCLI@2
inputs:
azureSubscription: 'prometheus-arc-dev-release-mi'
scriptType: 'bash'
scriptLocation: 'inlineScript'
inlineScript: |
# Create JSON request body
cat <<EOF > "request.json"
{
"artifactEndpoints": [
{
"Regions": [
"westcentralus"
],
"Releasetrains": [
"pipeline"
],
"FullPathToHelmChart": "https://mcr.microsoft.com/azuremonitor/containerinsights/cidev/ama-metrics-arc",
"ExtensionUpdateFrequencyInMinutes": 5,
"IsCustomerHidden": true,
"ReadyforRollout": true,
"RollbackVersion": null,
"PackageConfigName": "Microsoft.AzureMonitor.Containers.Metrics-Prom041823"
}
]
}
EOF
# Send Request
SUBSCRIPTION="b9842c7c-1a38-4385-8f39-a51314758bcf"
RESOURCE_AUDIENCE="c699bf69-fb1d-4eaf-999b-99e6b2ae4d85"
METHOD="PUT"
echo "Request parameter preparation, SUBSCRIPTION is $SUBSCRIPTION, RESOURCE_AUDIENCE is $RESOURCE_AUDIENCE, CHART_VERSION is $HELM_SEMVER"
ACCESS_TOKEN=$(az account get-access-token --resource $RESOURCE_AUDIENCE --query accessToken -o json)
if [ $? -eq 0 ]; then
echo "get access token from resource:$RESOURCE_AUDIENCE successfully."
else
echo "-e error get access token from resource:$RESOURCE_AUDIENCE failed."
exit 1
fi
ACCESS_TOKEN=$(echo $ACCESS_TOKEN | tr -d '"' | tr -d '"\r\n')
ARC_API_URL="https://eastus2euap.dp.kubernetesconfiguration.azure.com"
EXTENSION_NAME="microsoft.azuremonitor.containers.metrics"
API_VERSION="2021-05-01"
echo "start send request"
az rest --method $METHOD --headers "{\"Authorization\": \"Bearer $ACCESS_TOKEN\", \"Content-Type\": \"application/json\"}" --body @request.json --uri $ARC_API_URL/subscriptions/$SUBSCRIPTION/extensionTypeRegistrations/$EXTENSION_NAME/versions/$HELM_SEMVER?api-version=$API_VERSION
if [ $? -eq 0 ]; then
echo "arc extension registered successfully"
else
echo "-e error failed to register arc extension"
exit 1
fi
displayName: "Deploy: Release to dev release train"

- task: AzureCLI@2
Expand Down Expand Up @@ -1645,7 +1639,7 @@ stages:
displayName: "Apply TestKube CRs, scrape configs and pod/service monitors"
- bash: |
sleep 300
sleep 360
exit 0
displayName: "Wait for cluster to be ready"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
{
"type": "ShellExtensionType",
"properties": {
"imageName": "adm-ubuntu-1804-l",
"imageVersion": "v27"
"imageName": "adm-ubuntu-2004-l",
"imageVersion": "v4"
}
}
]
Expand Down Expand Up @@ -83,4 +83,4 @@
]
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,24 @@ cat <<EOF > "request.json"
EOF

# Send Request
echo "Request parameter preparation, SUBSCRIPTION is $SUBSCRIPTION, RESOURCE_AUDIENCE is $RESOURCE_AUDIENCE, CHART_VERSION is $CHART_VERSION, SPN_CLIENT_ID is $SPN_CLIENT_ID, SPN_TENANT_ID is $SPN_TENANT_ID"
echo "Request parameter preparation, SUBSCRIPTION is $SUBSCRIPTION, RESOURCE_AUDIENCE is $RESOURCE_AUDIENCE, CHART_VERSION is $CHART_VERSION"

# MSI is not supported
echo "Login cli using spn"
az login --service-principal --username=$SPN_CLIENT_ID --password=${SPN_SECRET} --tenant=$SPN_TENANT_ID
if [ $? -eq 0 ]; then
echo "Logged in successfully with spn"
# Retries needed due to: https://stackoverflow.microsoft.com/questions/195032
n=0
signInExitCode=-1
until [ "$n" -ge 5 ]
do
az login --identity --allow-no-subscriptions && signInExitCode=0 && break
n=$((n+1))
sleep 15
done

if [ $signInExitCode -eq 0 ]; then
echo "Logged in successfully"
else
echo "-e error failed to login to az with managed identity credentials"
exit 1
fi
fi

ACCESS_TOKEN=$(az account get-access-token --resource $RESOURCE_AUDIENCE --query accessToken -o json)
if [ $? -eq 0 ]; then
Expand Down
32 changes: 21 additions & 11 deletions .trivyignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@

# =========== HIGH ================
# HIGH - otelcollector
# HIGH - telegraf
GHSA-fr2g-9hjm-wr23
CVE-2023-29403
CVE-2023-45283
# HIGH - promconfigvalidator
# HIGH - go vulnerabilities
# HIGH - telegraf
CVE-2023-39325
GHSA-m425-mq94-257g
CVE-2023-47090
CVE-2023-46129
CVE-2024-21626

# =========== MEDIUM ================
# MEDIUM - otelcollector
Expand Down Expand Up @@ -45,17 +50,22 @@ CVE-2023-39318
CVE-2023-39319
CVE-2023-39326
CVE-2023-45284
# MEDIUM - telegraf
GHSA-jq35-85cj-fj4p
GHSA-7ww5-4wqc-m92c
GHSA-mhpq-9638-x6pw
CVE-2024-27304
GHSA-7jwh-3vrq-q3m8
CVE-2023-50658
CVE-2023-48795
CVE-2023-3978
CVE-2023-44487
CVE-2023-50658
CVE-2024-28110
CVE-2024-27289
CVE-2024-24557
CVE-2024-29018
# MEDIUM - mariner
CVE-2023-5678
# MEDIUM - ruby
CVE-2024-27281
# MEDIUM - KSM
CVE-2023-29406
CVE-2023-29409
CVE-2023-39318
CVE-2023-39319
CVE-2023-39326
CVE-2023-45284
# HIGH - KSM
CVE-2023-45283
CVE-2023-29403
25 changes: 25 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,30 @@
# Azure Monitor Metrics for AKS clusters

## Release 05-29-2024
* Linux image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.13-main-05-<tbd>`
* Windows image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.13-<tbd>-win`
* TA image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.13-main-<tbd>-targetallocator`
* cfg sidecar image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.13-main-<tbd>-cfg`
* Change log -
* fix: update kube-state-metrics from: `v2.9.2` to: `v2.12.0` - (#887) https://github.com/Azure/prometheus-collector/pull/887
* fix: switch to Managed Identity for ARC release - (#895) https://github.com/Azure/prometheus-collector/pull/895
* fix: move PV metrics to correct job (from kubelet to k-s-m) - (#898) https://github.com/Azure/prometheus-collector/pull/898
* `kube_persistentvolumeclaim_access_mode`
* `kube_persistentvolumeclaim_labels`
* `kube_persistentvolume_status_phase`
* fix: signature artifacts drop issue - https://github.com/Azure/prometheus-collector/pull/885/files
* fix: revert Telegraf removal (i.e revert PRs #766 & #841) - (#899) https://github.com/Azure/prometheus-collector/pull/899

## Release 05-20-2024 (CCP release only)
* Linux image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.12-main-05-21-2024-56bc7e3d`
* Windows image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.12-main-05-21-2024-56bc7e3d-win`
* TA image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.12-main-05-21-2024-56bc7e3d-targetallocator`
* cfg sidecar image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.12-main-05-21-2024-56bc7e3d-cfg`
* Change log -
* fix: ccp ignore minimal ingestion profile setting and respecting the keep list regex values - https://github.com/Azure/prometheus-collector/pull/886
* fix: signature artifacts drop issue - https://github.com/Azure/prometheus-collector/pull/885/files
* fix: Remove histograms from minimal ingestion list - ccp metrics collector - https://github.com/Azure/prometheus-collector/pull/884

## Release 05-07-2024 (CCP release only)
* Linux image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.11-main-6.8.11-main-05-07-2024-fcfa51bd`
* Windows image - `mcr.microsoft.com/azuremonitor/containerinsights/ciprod/prometheus-collector/images:6.8.11-main-6.8.11-main-05-07-2024-fcfa51bd-win`
Expand Down
2 changes: 1 addition & 1 deletion internal/alerts/example-alert-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@
},
{
"alert": "Memory usage % greater than 75 for prometheus-collector containers on cluster ci-dev-aks-mac-eus",
"expression": "(sum(container_memory_working_set_bytes{namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_limits{namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) > 75",
"expression": "(sum(container_memory_working_set_bytes{namespace=\"kube-system\", container=\"prometheus-collector\", image!=\"\"}) by (container, pod) / sum(kube_pod_container_resource_limits{namespace=\"kube-system\", container=\"prometheus-collector\", resource=\"memory\"}) by (container, pod)) * 100> 75",
"for": "PT3M",
"annotations": {
"description": "Memory usage greater than 75% for prometheus-collector containers on cluster ci-dev-aks-mac-eus"
Expand Down
10 changes: 10 additions & 0 deletions internal/docs/Telemetry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Application Insights telemetry resources :


| Cloud | Subscription | Resource Group | Name |
|-------------------|--------------|-----------------------------------------------|-------------------------------------------|
| AzureCloud | LA_ContainerInsights_Monitoring_USEast_Prod_02 | ContainerInsightsPrometheusCollector-Prod | ContainerInsightsPrometheusCollector-Prod |
| AzureChinaCloud | LA_ContainerInsights_INFRAINSIGHTS_MoonCake_PROD_00 | ContainerInsightsPrometheusCollector-Mooncake | ContainerInsightsPrometheusCollector-Mooncake |
| AzureUSGovernment | LA_ContainerInsights_ContainerInsights_Fairfax_PROD_00 | ContainerInsightsPrometheusCollector-Fairfax | ContainerInsightsPrometheusCollector-Fairfax |
| ussec | N/A | N/A | ContainerInsightsPrometheusCollector-USSec |
| usnat | N/A | N/A | ContainerInsightsPrometheusCollector-USNat |
2 changes: 1 addition & 1 deletion otelcollector/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
6.8.11
6.8.13
4 changes: 4 additions & 0 deletions otelcollector/build/linux/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ COPY --from=main-builder --chmod=777 /main/main.exe $tmpdir/main

COPY ./scripts/*.sh $tmpdir/
COPY ./metricextension/me.config ./metricextension/me_internal.config ./metricextension/me_ds.config ./metricextension/me_ds_internal.config /usr/sbin/
COPY ./telegraf/ $tmpdir/telegraf/
COPY ./fluent-bit/fluent-bit.conf ./fluent-bit/fluent-bit-daemonset.conf ./fluent-bit/fluent-bit-parsers.conf $tmpdir/fluent-bit/
COPY --from=fluent-bit-builder /src/out_appinsights.so $tmpdir/fluent-bit/bin/
COPY ./react /static/react
Expand Down Expand Up @@ -172,6 +173,7 @@ COPY --from=builder /usr/bin/inotifywait /usr/bin/inotifywait
COPY --from=builder /usr/bin/bash /usr/bin/bash
COPY --from=builder /usr/sbin/busybox /usr/sbin/busybox
COPY --from=builder /usr/bin/fluent-bit /usr/bin/fluent-bit
COPY --from=builder /usr/bin/telegraf /usr/bin/telegraf
COPY --from=builder /usr/sbin/crond /usr/sbin/crond
COPY --from=builder /usr/bin/vim /usr/bin/vim
COPY --from=builder /usr/share/vim /usr/share/vim
Expand Down Expand Up @@ -204,6 +206,8 @@ COPY --from=builder /lib64/libuuid.so.1 /lib64
# fluent-bit dependencies
# libssl.so.1.1 & libcrypto.so.1.1 are already available with openssl in distroless and copying them over causes FIPS HMAC verification failures
COPY --from=builder /lib/libyaml-0.so.2 /lib/libsystemd.so.0 /lib/libcurl.so.4 /lib/libm.so.6 /lib/libz.so.1 /lib/libzstd.so.1 /lib/libsasl2.so.3 /lib/libgcc_s.so.1 /lib/libc.so.6 /lib/liblzma.so.5 /lib/liblz4.so.1 /lib/libcap.so.2 /lib/libgcrypt.so.20 /lib/libnghttp2.so.14 /lib/libssh2.so.1 /lib/libgssapi_krb5.so.2 /lib/libresolv.so.2 /lib/libgpg-error.so.0 /usr/lib/libkrb5.so.3 /usr/lib/libk5crypto.so.3 /usr/lib/libcom_err.so.2 /usr/lib/libkrb5support.so.0 /lib/
# telegraf dependencies
COPY --from=builder /lib/libc.so.6 /lib/
# mdsd dependencies
COPY --from=builder /usr/lib/libdl.so.2 /usr/lib/librt.so.1 /usr/lib/libpthread.so.0 /usr/lib/libm.so.6 /usr/lib/libstdc++.so.6 /usr/lib/libgcc_s.so.1 /usr/lib/
# logrotate dependencies
Expand Down
Loading

0 comments on commit d246a9b

Please sign in to comment.