Skip to content

Commit

Permalink
merge from main (#622)
Browse files Browse the repository at this point in the history
* Add cluster scope to alert rule groups for linking them with UX (#600)

* Removing duplicate alerts from ci recommended alerts

* Remove test branch

* Remove preview keyword from policy readme

* Add cluster id scope to rule groups for alerts to be linked to UX

* adding terraform update

* Add cicd and prod near ring cluster monitoring for managed prometheus (#602)

* Removing duplicate alerts from ci recommended alerts

* Remove test branch

* Remove preview keyword from policy readme

* Add cicd and prod near ring cluster monitoring for managed prometheus

* Make single file for alerts and update dashboard to include unit in memory

* Replace label for cluster name with clusterName property

* Add telemetry for scrape interval (#614)

* Removing duplicate alerts from ci recommended alerts

* Remove test branch

* Remove preview keyword from policy readme

* Add telemetry for scrape interval hash

* remove branch name

* Upgrade dependencies (#616)

* upgrade all dependencies but collector

* update golang

* Update CVEs

* update config

* remove branch

* make build fail when trivy fails

* fix trivy scan for image not found for PRs

* windows fix for replicaset collecting windows data (#620)

* fix: remove uneeded windows scrape config in replicaset

* .

* .

* .

* revert windows telegraf update

* missed end

* remove examplar disablement for windows

* bin place me_win configs

---------

Co-authored-by: Grace Wehner <[email protected]>
Co-authored-by: bragi92 <[email protected]>

* Bug fix- update cert thumbprint to latest ame prod cert (#615)

* Removing duplicate alerts from ci recommended alerts

* Remove test branch

* Remove preview keyword from policy readme

* Bug fix- update cert thumbprint for image signing  to latest ame prod cert

* Fix $ substitution issue in relabel and metric relabel config (#618)

* dollar fix for node name and node ip

* test $ replacement

* clean up build

* Bump @adobe/css-tools in /tools/az-prom-rules-converter/web-app-example (#577)

Bumps [@adobe/css-tools](https://github.com/adobe/css-tools) from 4.0.1 to 4.3.1.
- [Changelog](https://github.com/adobe/css-tools/blob/main/History.md)
- [Commits](https://github.com/adobe/css-tools/commits)

---
updated-dependencies:
- dependency-name: "@adobe/css-tools"
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

---------

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: Sohamdg081992 <[email protected]>
Co-authored-by: Grace Wehner <[email protected]>
Co-authored-by: bragi92 <[email protected]>
Co-authored-by: rashmichandrashekar <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
  • Loading branch information
6 people authored Oct 6, 2023
1 parent 8c55db7 commit e9c13ba
Show file tree
Hide file tree
Showing 42 changed files with 13,901 additions and 8,691 deletions.
27 changes: 19 additions & 8 deletions .pipelines/azure-pipeline-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ trigger:
branches:
include:
- main
- vishwa/10022023-upgrade
pr:
autoCancel: true
branches:
Expand Down Expand Up @@ -235,7 +236,7 @@ jobs:
- task: GoTool@0
displayName: "Build: specify golang version"
inputs:
version: '1.19'
version: '1.20'

- bash: |
sudo apt-get install build-essential -y
Expand Down Expand Up @@ -336,18 +337,28 @@ jobs:
oras attach $(LINUX_FULL_IMAGE_NAME) \
--artifact-type 'application/vnd.cncf.notary.signature' \
./payload.json:application/cose \
-a "io.cncf.notary.x509chain.thumbprint#S256=[\"659AAA9C0E822B4B20A964AA0178BD9419A50530\"]"
-a "io.cncf.notary.x509chain.thumbprint#S256=[\"79E6A702361E1F60DAA84AEEC4CBF6F6420DE6BA\"]"
workingDirectory: $(Build.ArtifactStagingDirectory)/linux/
displayName: "ORAS Push Artifacts in $(Build.ArtifactStagingDirectory)/linux/"
condition: eq(variables.IS_MAIN_BRANCH, true)
- bash: |
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --exit-code 1 $(LINUX_FULL_IMAGE_NAME)
if [ $? -ne 0 ]; then
exit 1
fi
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --exit-code 1 $(KUBE_STATE_METRICS_IMAGE)
if [ $? -ne 0 ]; then
exit 1
fi
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --exit-code 1 $(NODE_EXPORTER_IMAGE)
if [ $? -ne 0 ]; then
exit 1
fi
workingDirectory: $(Build.SourcesDirectory)
displayName: "Build: run trivy scan"
condition: eq(variables.IS_PR, false)
- task: CodeQL3000Finalize@0
displayName: 'SDL: run codeql'
Expand Down Expand Up @@ -423,7 +434,7 @@ jobs:
- task: GoTool@0
displayName: "Build: specify golang version"
inputs:
version: '1.19'
version: '1.20'

- powershell: |
./makefile_windows.ps1
Expand Down Expand Up @@ -461,7 +472,7 @@ jobs:
- task: GoTool@0
displayName: "Build: specify golang version"
inputs:
version: '1.19'
version: '1.20'

- powershell: |
./makefile_windows.ps1
Expand Down Expand Up @@ -504,7 +515,7 @@ jobs:
- task: GoTool@0
displayName: "Build: specify golang version"
inputs:
version: '1.19'
version: '1.20'

- powershell: |
New-Item -Path "$(Build.ArtifactStagingDirectory)" -Name "windows" -ItemType "directory"
Expand Down Expand Up @@ -570,7 +581,7 @@ jobs:
New-Item -ItemType Directory -Force -Path $env:USERPROFILE\bin
Copy-Item -Path $currentDirectory\oras.exe -Destination "$env:USERPROFILE\bin\"
$env:PATH = "$env:USERPROFILE\bin;$env:PATH"
oras attach $(WINDOWS_FULL_IMAGE_NAME) --artifact-type application/vnd.cncf.notary.signature ./payload.json:application/cose -a io.cncf.notary.x509chain.thumbprint#S256=[\""659AAA9C0E822B4B20A964AA0178BD9419A50530\""]
oras attach $(WINDOWS_FULL_IMAGE_NAME) --artifact-type application/vnd.cncf.notary.signature ./payload.json:application/cose -a io.cncf.notary.x509chain.thumbprint#S256=[\""79E6A702361E1F60DAA84AEEC4CBF6F6420DE6BA\""]
workingDirectory: $(Build.ArtifactStagingDirectory)/windows
displayName: "Download, install Oras and run oras attach"
condition: eq(variables.IS_MAIN_BRANCH, true)
Expand Down Expand Up @@ -600,7 +611,7 @@ jobs:
- task: HelmInstaller@1
displayName: 'Build: install Helm version'
inputs:
helmVersionToInstall: latest
helmVersionToInstall: 3.12.3

- bash: |
envsubst < $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/Chart-template.yaml > $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/Chart.yaml && envsubst < $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/values-template.yaml > $(Build.SourcesDirectory)/otelcollector/deploy/chart/prometheus-collector/values.yaml
Expand Down Expand Up @@ -643,7 +654,7 @@ jobs:
- task: HelmInstaller@1
displayName: 'Build: install Helm version'
inputs:
helmVersionToInstall: latest
helmVersionToInstall: 3.12.3

- bash: |
export HELM_CHART_NAME=$ARC_HELM_CHART_NAME
Expand Down
44 changes: 19 additions & 25 deletions .trivyignore
Original file line number Diff line number Diff line change
@@ -1,29 +1,23 @@
# Check for HIGH/CRITICAL & MEDIUM CVEs. HIGH/CRITICAL to be fixed asap, MEDIUM is best effort
# ignore these CVEs, but continue scanning to catch other vulns. Note : this will ignore these cves globally

# CRITICAL/HIGH
# Ruby GEM
CVE-2021-33621
# node-exporter
CVE-2021-38561
CVE-2021-44716
CVE-2022-21698
CVE-2022-27191
# opt/telegraf/telegraf
CVE-2022-23471
CVE-2023-25153
CVE-2023-25173
# CRITICAL
# none

# MEDIUM
# opt/telegraf/telegraf
CVE-2019-3826
# kube-state-metrics
CVE-2022-41723
# opt/microsoft/otelcollector/otelcollector
# opt/promconfigvalidator
# opt/telegraf/telegraf
# kube-state-metrics
# bin/node_exporter
CVE-2022-41717
CVE-2022-46146
CVE-2022-41721
# =========== HIGH ================
# HIGH - otelcollector
CVE-2023-2253
CVE-2023-28840
# HIGH - promconfigvalidator
CVE-2023-2253
CVE-2023-28840

# =========== MEDIUM ================
# MEDIUM - otelcollector
CVE-2023-28841
CVE-2023-28842
CVE-2023-40577
# MEDIUM - promconfigvalidator
CVE-2023-28841
CVE-2023-28842
CVE-2023-40577
12 changes: 8 additions & 4 deletions AddonArmTemplate/FullAzureMonitorMetricsProfile.json
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@
"properties": {
"description": "[concat(variables('nodeRecordingRuleGroupDescription'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
"[parameters('azureMonitorWorkspaceResourceId')]",
"[parameters('clusterResourceId')]"
],
"clusterName": "[variables('clusterName')]",
"interval": "PT1M",
Expand Down Expand Up @@ -281,7 +282,8 @@
"properties": {
"description": "[concat(variables('kubernetesRecordingRuleGroupDescription'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
"[parameters('azureMonitorWorkspaceResourceId')]",
"[parameters('clusterResourceId')]"
],
"clusterName": "[variables('clusterName')]",
"interval": "PT1M",
Expand Down Expand Up @@ -385,7 +387,8 @@
"properties": {
"description": "[concat(variables('RecordingRuleGroupDescriptionWin'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
"[parameters('azureMonitorWorkspaceResourceId')]",
"[parameters('clusterResourceId')]"
],
"enabled": "[parameters('enableWindowsRecordingRules')]",
"clusterName": "[variables('clusterName')]",
Expand Down Expand Up @@ -462,7 +465,8 @@
"properties": {
"description": "[concat(variables('RecordingRuleGroupDescriptionWin'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
"[parameters('azureMonitorWorkspaceResourceId')]",
"[parameters('clusterResourceId')]"
],
"enabled": "[parameters('enableWindowsRecordingRules')]",
"clusterName": "[variables('clusterName')]",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@
"properties": {
"description": "[concat(variables('RecordingRuleGroupDescriptionWin'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
"[parameters('azureMonitorWorkspaceResourceId')]",
"[parameters('clusterResourceId')]"
],
"enabled": true,
"clusterName": "[variables('clusterName')]",
Expand Down Expand Up @@ -133,7 +134,8 @@
"properties": {
"description": "[concat(variables('RecordingRuleGroupDescriptionWin'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
"[parameters('azureMonitorWorkspaceResourceId')]",
"[parameters('clusterResourceId')]"
],
"enabled": true,
"clusterName": "[variables('clusterName')]",
Expand Down Expand Up @@ -211,4 +213,4 @@
}
}
]
}
}
8 changes: 2 additions & 6 deletions AddonBicepTemplate/AzureMonitorAlertsProfile.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ resource recommendedAlerts 'Microsoft.AlertsManagement/prometheusRuleGroups@2023
location: location
properties: {
description: 'Kubernetes Alert RuleGroup-RecommendedCIAlerts - 0.1'
scopes: [
monitorWorkspace.id
]
scopes: [monitorWorkspace.id,aksResourceId]
clusterName: split(aksResourceId, '/')[8]
enabled: true
interval: 'PT5M'
Expand Down Expand Up @@ -241,9 +239,7 @@ resource communityALerts 'Microsoft.AlertsManagement/prometheusRuleGroups@2023-0
location: location
properties: {
description: 'Kubernetes Alert RuleGroup-communityCIAlerts - 0.1'
scopes: [
monitorWorkspace.id
]
scopes: [monitorWorkspace.id,aksResourceId]
clusterName: split(aksResourceId, '/')[8]
enabled: true
interval: 'PT1M'
Expand Down
16 changes: 4 additions & 12 deletions AddonBicepTemplate/FullAzureMonitorMetricsProfile.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,7 @@ resource nodeRecordingRuleGroup 'Microsoft.AlertsManagement/prometheusRuleGroups
location: azureMonitorWorkspaceLocation
properties: {
description: '${nodeRecordingRuleGroupDescription}${version}'
scopes: [
azureMonitorWorkspaceResourceId
]
scopes: [azureMonitorWorkspaceResourceId,clusterResourceId]
enabled: true
clusterName: clusterName
interval: 'PT1M'
Expand Down Expand Up @@ -201,9 +199,7 @@ resource kubernetesRecordingRuleGroup 'Microsoft.AlertsManagement/prometheusRule
location: azureMonitorWorkspaceLocation
properties: {
description: '${kubernetesRecordingRuleGroupDescription}${version}'
scopes: [
azureMonitorWorkspaceResourceId
]
scopes: [azureMonitorWorkspaceResourceId,clusterResourceId]
enabled: true
clusterName: clusterName
interval: 'PT1M'
Expand Down Expand Up @@ -305,9 +301,7 @@ resource nodeRecordingRuleGroupNameWin 'Microsoft.AlertsManagement/prometheusRul
location: azureMonitorWorkspaceLocation
properties: {
description: '${RecordingRuleGroupDescriptionWin}${version}'
scopes: [
azureMonitorWorkspaceResourceId
]
scopes: [azureMonitorWorkspaceResourceId,clusterResourceId]
enabled: enableWindowsRecordingRules
clusterName: clusterName
interval: 'PT1M'
Expand Down Expand Up @@ -381,9 +375,7 @@ resource nodeAndKubernetesRecordingRuleGroupNameWin 'Microsoft.AlertsManagement/
location: azureMonitorWorkspaceLocation
properties: {
description: '${RecordingRuleGroupDescriptionWin}${version}'
scopes: [
azureMonitorWorkspaceResourceId
]
scopes: [azureMonitorWorkspaceResourceId,clusterResourceId]
enabled: enableWindowsRecordingRules
clusterName: clusterName
interval: 'PT1M'
Expand Down
2 changes: 1 addition & 1 deletion AddonBicepTemplate/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ You can deploy the templates using a command like :

In order to deploy community alerts and ci recommended alerts through template, deploy using command like:

```az deployment group create -g <resource_group> -n <deployment_name> --template-file .\AzureMonitorAlertsProfileParameters.json --parameters .\AzureMonitorAlertsProfileParameters.json```
```az deployment group create -g <resource_group> -n <deployment_name> --template-file .\AzureMonitorAlertsProfile.bicep --parameters .\AzureMonitorAlertsProfileParameters.json```

**NOTE**

Expand Down
16 changes: 12 additions & 4 deletions AddonPolicyTemplate/AddonPolicyMetricsProfile.rules.json
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,10 @@
"location": "[parameters('azureMonitorWorkspaceLocation')]",
"properties": {
"description": "[concat(variables('nodeRecordingRuleGroupDescription'), variables('version'))]",
"scopes": ["[parameters('azureMonitorWorkspaceResourceId')]"],
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]",
"[parameters('clusterResourceId')]"
],
"clusterName": "[variables('clusterName')]",
"interval": "PT1M",
"rules": [
Expand Down Expand Up @@ -283,7 +286,10 @@
"location": "[parameters('azureMonitorWorkspaceLocation')]",
"properties": {
"description": "[concat(variables('kubernetesRecordingRuleGroupDescription'), variables('version'))]",
"scopes": ["[parameters('azureMonitorWorkspaceResourceId')]"],
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]",
"[parameters('clusterResourceId')]"
],
"clusterName": "[variables('clusterName')]",
"interval": "PT1M",
"rules": [
Expand Down Expand Up @@ -386,7 +392,8 @@
"properties": {
"description": "[concat(variables('RecordingRuleGroupDescriptionWin'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
"[parameters('azureMonitorWorkspaceResourceId')]",
"[parameters('clusterResourceId')]"
],
"enabled": "[parameters('enableWindowsRecordingRules')]",
"clusterName": "[variables('clusterName')]",
Expand Down Expand Up @@ -463,7 +470,8 @@
"properties": {
"description": "[concat(variables('RecordingRuleGroupDescriptionWin'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
"[parameters('azureMonitorWorkspaceResourceId')]",
"[parameters('clusterResourceId')]"
],
"enabled": "[parameters('enableWindowsRecordingRules')]",
"clusterName": "[variables('clusterName')]",
Expand Down
8 changes: 4 additions & 4 deletions AddonTerraformTemplate/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ resource "azurerm_monitor_alert_prometheus_rule_group" "node_recording_rules_rul
description = "Node Recording Rules Rule Group"
rule_group_enabled = true
interval = "PT1M"
scopes = [azurerm_monitor_workspace.amw.id]
scopes = [azurerm_monitor_workspace.amw.id,azurerm_kubernetes_cluster.k8s.id]

rule {
enabled = true
Expand Down Expand Up @@ -209,7 +209,7 @@ resource "azurerm_monitor_alert_prometheus_rule_group" "kubernetes_recording_rul
description = "Kubernetes Recording Rules Rule Group"
rule_group_enabled = true
interval = "PT1M"
scopes = [azurerm_monitor_workspace.amw.id]
scopes = [azurerm_monitor_workspace.amw.id,azurerm_kubernetes_cluster.k8s.id]

rule {
enabled = true
Expand Down Expand Up @@ -366,7 +366,7 @@ resource "azurerm_monitor_alert_prometheus_rule_group" "node_and_kubernetes_reco
description = "Node and Kubernetes Recording Rules Rule Group for Windows Nodes"
rule_group_enabled = true
interval = "PT1M"
scopes = [azurerm_monitor_workspace.amw.id]
scopes = [azurerm_monitor_workspace.amw.id,azurerm_kubernetes_cluster.k8s.id]

rule {
enabled = true
Expand Down Expand Up @@ -497,7 +497,7 @@ resource "azurerm_monitor_alert_prometheus_rule_group" "node_recording_rules_rul
description = "Node and Kubernetes Recording Rules Rule Group for Windows Nodes"
rule_group_enabled = true
interval = "PT1M"
scopes = [azurerm_monitor_workspace.amw.id]
scopes = [azurerm_monitor_workspace.amw.id,azurerm_kubernetes_cluster.k8s.id]

rule {
enabled = true
Expand Down
9 changes: 8 additions & 1 deletion GeneratedMonitoringArtifacts/Default/DefaultAlerts.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
"description": "Cluster name"
}
},
"clusterResourceId": {
"type": "string",
"metadata": {
"description": "Cluster Resource Id"
}
},
"actionGroupResourceId": {
"type": "string",
"metadata": {
Expand Down Expand Up @@ -40,7 +46,8 @@
"properties": {
"description": "[concat(variables('kubernetesAlertRuleGroupDescription'), variables('version'))]",
"scopes": [
"[parameters('azureMonitorWorkspaceResourceId')]"
"[parameters('azureMonitorWorkspaceResourceId')]",
"[parameters('clusterResourceId')]"
],
"clusterName": "[parameters('clusterName')]",
"interval": "PT1M",
Expand Down
Loading

0 comments on commit e9c13ba

Please sign in to comment.