Skip to content

Commit

Permalink
ci/cd: pipeline reliability and testkube fixes (#998)
Browse files Browse the repository at this point in the history
[comment]: # (Note that your PR title should follow the conventional
commit format: https://conventionalcommits.org/en/v1.0.0/#summary)
# PR Description
- Skip some SDL tasks for branch builds. Still run for PRs and merges to
main
- Add retries to trivy task if failing to pull from the DB. Do not retry
if the scan actually ran and failed because of vulnerabilities
- Enable backup DB that trivy has added through an env var.
- Check for arc proxy cluster to be ready and add retries.
- Fix testkube configmap yaml to scrape correct node-exporter port
  • Loading branch information
gracewehner authored Oct 17, 2024
1 parent 6a2c0ee commit f871133
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 17 deletions.
126 changes: 110 additions & 16 deletions .pipelines/azure-pipeline-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,7 @@ stages:
name: Azure-Pipelines-Windows-CI-Test-EO
variables:
skipComponentGovernanceDetection: true
condition: and(succeeded(), or(eq(variables.IS_PR, true), eq(variables.IS_MAIN_BRANCH, true)))
steps:
- checkout: self
submodules: true
Expand All @@ -529,6 +530,7 @@ stages:
name: Azure-Pipelines-CI-Test-EO
variables:
skipComponentGovernanceDetection: true
condition: and(succeeded(), or(eq(variables.IS_PR, true), eq(variables.IS_MAIN_BRANCH, true)))
steps:
- checkout: self
submodules: true
Expand Down Expand Up @@ -693,18 +695,29 @@ stages:
- bash: |
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --exit-code 1 $(LINUX_FULL_IMAGE_NAME)
if [ $? -ne 0 ]; then
exit 1
fi
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --exit-code 1 $(KUBE_STATE_METRICS_IMAGE)
if [ $? -ne 0 ]; then
exit 1
fi
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --exit-code 1 $(NODE_EXPORTER_IMAGE)
if [ $? -ne 0 ]; then
exit 1
fi
export TRIVY_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-db,public.ecr.aws/aquasecurity/trivy-db"
export TRIVY_JAVA_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-java-db,public.ecr.aws/aquasecurity/trivy-java-db"
for image in $(LINUX_FULL_IMAGE_NAME) $(KUBE_STATE_METRICS_IMAGE) $(NODE_EXPORTER_IMAGE); do
for i in {1..5}; do
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM $image > trivy_output.log 2>&1
TRIVY_EXIT_CODE=$?
if [ $TRIVY_EXIT_CODE -eq 0 ]; then
cat trivy_output.log
break
fi
if grep -q "TOOMANYREQUESTS" trivy_output.log; then
echo "Error: Too many requests to the Trivy server. Retrying... ($i/5)"
sleep 5
else
cat trivy_output.log
exit 1
fi
done
if [ $TRIVY_EXIT_CODE -ne 0 ]; then
echo "Error: Trivy scan failed after 5 retries."
exit 1
fi
done
workingDirectory: $(Build.SourcesDirectory)
displayName: "Build: run trivy scan"
Expand Down Expand Up @@ -869,8 +882,25 @@ stages:
- bash: |
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --exit-code 1 $(LINUX_CCP_FULL_IMAGE_NAME)
if [ $? -ne 0 ]; then
export TRIVY_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-db,public.ecr.aws/aquasecurity/trivy-db"
export TRIVY_JAVA_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-java-db,public.ecr.aws/aquasecurity/trivy-java-db"
for i in {1..5}; do
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM $(LINUX_CCP_FULL_IMAGE_NAME) > trivy_output.log 2>&1
TRIVY_EXIT_CODE=$?
if [ $TRIVY_EXIT_CODE -eq 0 ]; then
cat trivy_output.log
break
fi
if grep -q "TOOMANYREQUESTS" trivy_output.log; then
echo "Error: Too many requests to the Trivy server. Retrying... ($i/5)"
sleep 5
else
cat trivy_output.log
exit 1
fi
done
if [ $TRIVY_EXIT_CODE -ne 0 ]; then
echo "Error: Trivy scan failed after 5 retries."
exit 1
fi
workingDirectory: $(Build.SourcesDirectory)
Expand Down Expand Up @@ -979,7 +1009,27 @@ stages:
- bash: |
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --exit-code 1 $(TARGET_ALLOCATOR_FULL_IMAGE_NAME)
export TRIVY_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-db,public.ecr.aws/aquasecurity/trivy-db"
export TRIVY_JAVA_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-java-db,public.ecr.aws/aquasecurity/trivy-java-db"
for i in {1..5}; do
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM $(TARGET_ALLOCATOR_FULL_IMAGE_NAME) > trivy_output.log 2>&1
TRIVY_EXIT_CODE=$?
if [ $TRIVY_EXIT_CODE -eq 0 ]; then
cat trivy_output.log
break
fi
if grep -q "TOOMANYREQUESTS" trivy_output.log; then
echo "Error: Too many requests to the Trivy server. Retrying... ($i/5)"
sleep 5
else
cat trivy_output.log
exit 1
fi
done
if [ $TRIVY_EXIT_CODE -ne 0 ]; then
echo "Error: Trivy scan failed after 5 retries."
exit 1
fi
workingDirectory: $(Build.SourcesDirectory)
displayName: "Build: run trivy scan"
Expand Down Expand Up @@ -1074,7 +1124,27 @@ stages:
- bash: |
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --exit-code 1 $(LINUX_CONFIG_READER_FULL_IMAGE_NAME)
export TRIVY_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-db,public.ecr.aws/aquasecurity/trivy-db"
export TRIVY_JAVA_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-java-db,public.ecr.aws/aquasecurity/trivy-java-db"
for i in {1..5}; do
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM $(LINUX_CONFIG_READER_FULL_IMAGE_NAME) > trivy_output.log 2>&1
TRIVY_EXIT_CODE=$?
if [ $TRIVY_EXIT_CODE -eq 0 ]; then
cat trivy_output.log
break
fi
if grep -q "TOOMANYREQUESTS" trivy_output.log; then
echo "Error: Too many requests to the Trivy server. Retrying... ($i/5)"
sleep 5
else
cat trivy_output.log
exit 1
fi
done
if [ $TRIVY_EXIT_CODE -ne 0 ]; then
echo "Error: Trivy scan failed after 5 retries."
exit 1
fi
workingDirectory: $(Build.SourcesDirectory)
displayName: "Build: run trivy scan"
Expand Down Expand Up @@ -1525,6 +1595,29 @@ stages:
inlineScript: |
az config set extension.use_dynamic_install=yes_without_prompt
az k8s-extension update --name azuremonitor-metrics --resource-group ci-dev-arc-wcus --cluster-name ci-dev-arc-wcus --cluster-type connectedClusters --version $HELM_SEMVER --release-train pipeline
retryCountOnTaskFailure: 2

- task: AzureCLI@2
displayName: "Deploy: wait for ci-dev-arc-proxy cluster to be ready"
inputs:
azureSubscription: 'ContainerInsights_Build_Subscription(9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb)'
scriptType: 'bash'
scriptLocation: 'inlineScript'
inlineScript: |
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
do
state=$(az k8s-extension show --name azuremonitor-metrics --resource-group ci-dev-arc-proxy --cluster-name ci-dev-arc-proxy --cluster-type connectedClusters | jq -r '.provisioningState')
# We want to wait in case the status is 'Creating' or 'Updating' because of another PR merged shortly before the current one.
if [ "$state" = "Succeeded" ] || [ "$state" = "Failed" ]
then
echo "Cluster is ready to install extension"
exit 0
fi
sleep 30
done
echo "Cluster is installing a different version of the extension"
exit 1
retryCountOnTaskFailure: 5

- task: AzureCLI@2
displayName: "Deploy: ci-dev-arc-proxy cluster"
Expand All @@ -1535,6 +1628,7 @@ stages:
inlineScript: |
az config set extension.use_dynamic_install=yes_without_prompt
az k8s-extension update --name azuremonitor-metrics --resource-group ci-dev-arc-proxy --cluster-name ci-dev-arc-proxy --cluster-type connectedClusters --version $HELM_SEMVER --release-train pipeline
retryCountOnTaskFailure: 2

- deployment: Testkube_ARC
displayName: "Test: Arc testkube tests"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ data:
replacement: $$NODE_IP
target_label: node_ip_double_dollar_sign
static_configs:
- targets: ['$NODE_IP:9100']
- targets: ['$NODE_IP:19100']
metadata:
name: ama-metrics-prometheus-config-node
namespace: kube-system

0 comments on commit f871133

Please sign in to comment.