From 4f96b7f10d92eed8f7db6772eb49635f37506a16 Mon Sep 17 00:00:00 2001 From: Djuradj Kurepa <91743470+dkurepa@users.noreply.github.com> Date: Tue, 24 Sep 2024 13:31:02 +0200 Subject: [PATCH] Rewrite PCS Deployment using C# (#3998) --- Directory.Packages.props | 1 + arcade-services.sln | 15 ++ ...pipelines-product-construction-service.yml | 255 ++++++++++-------- .../product-construction-service-deploy.ps1 | 175 ------------ .../Generated/Status.cs | 45 +++- .../Deployer.cs | 254 +++++++++++++++++ .../DeploymentOptions.cs | 31 +++ ...oductConstructionService.Deployment.csproj | 24 ++ .../Program.cs | 25 ++ .../Utility.cs | 35 +++ src/ProductConstructionService/Readme.md | 2 +- 11 files changed, 568 insertions(+), 294 deletions(-) delete mode 100644 eng/deployment/product-construction-service-deploy.ps1 create mode 100644 src/ProductConstructionService/ProductConstructionService.Deployment/Deployer.cs create mode 100644 src/ProductConstructionService/ProductConstructionService.Deployment/DeploymentOptions.cs create mode 100644 src/ProductConstructionService/ProductConstructionService.Deployment/ProductConstructionService.Deployment.csproj create mode 100644 src/ProductConstructionService/ProductConstructionService.Deployment/Program.cs create mode 100644 src/ProductConstructionService/ProductConstructionService.Deployment/Utility.cs diff --git a/Directory.Packages.props b/Directory.Packages.props index d8479a1a97..f9329b4eb6 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -25,6 +25,7 @@ + diff --git a/arcade-services.sln b/arcade-services.sln index 2d491b556e..fed5c5e535 100644 --- a/arcade-services.sln +++ b/arcade-services.sln @@ -144,6 +144,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ProductConstructionService. EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ProductConstructionService.ScenarioTests", "test\ProductConstructionService.ScenarioTests\ProductConstructionService.ScenarioTests.csproj", "{12D91D30-EC50-4D2B-8D67-0C19FCD2303F}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ProductConstructionService.Deployment", "src\ProductConstructionService\ProductConstructionService.Deployment\ProductConstructionService.Deployment.csproj", "{A4125B78-593D-4659-AA28-0E176D4644E5}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -707,6 +709,18 @@ Global {12D91D30-EC50-4D2B-8D67-0C19FCD2303F}.Release|x64.Build.0 = Release|Any CPU {12D91D30-EC50-4D2B-8D67-0C19FCD2303F}.Release|x86.ActiveCfg = Release|Any CPU {12D91D30-EC50-4D2B-8D67-0C19FCD2303F}.Release|x86.Build.0 = Release|Any CPU + {A4125B78-593D-4659-AA28-0E176D4644E5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A4125B78-593D-4659-AA28-0E176D4644E5}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A4125B78-593D-4659-AA28-0E176D4644E5}.Debug|x64.ActiveCfg = Debug|Any CPU + {A4125B78-593D-4659-AA28-0E176D4644E5}.Debug|x64.Build.0 = Debug|Any CPU + {A4125B78-593D-4659-AA28-0E176D4644E5}.Debug|x86.ActiveCfg = Debug|Any CPU + {A4125B78-593D-4659-AA28-0E176D4644E5}.Debug|x86.Build.0 = Debug|Any CPU + {A4125B78-593D-4659-AA28-0E176D4644E5}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A4125B78-593D-4659-AA28-0E176D4644E5}.Release|Any CPU.Build.0 = Release|Any CPU + {A4125B78-593D-4659-AA28-0E176D4644E5}.Release|x64.ActiveCfg = Release|Any CPU + {A4125B78-593D-4659-AA28-0E176D4644E5}.Release|x64.Build.0 = Release|Any CPU + {A4125B78-593D-4659-AA28-0E176D4644E5}.Release|x86.ActiveCfg = Release|Any CPU + {A4125B78-593D-4659-AA28-0E176D4644E5}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -762,6 +776,7 @@ Global {B5833185-DD07-4C64-A4DA-D8290294F7E7} = {243A4561-BF35-405A-AF12-AC57BB27796D} {D94319F8-FCA0-495B-8B6E-190B4EEBEF93} = {1A456CF0-C09A-4DE6-89CE-1110EED31180} {12D91D30-EC50-4D2B-8D67-0C19FCD2303F} = {1A456CF0-C09A-4DE6-89CE-1110EED31180} + {A4125B78-593D-4659-AA28-0E176D4644E5} = {243A4561-BF35-405A-AF12-AC57BB27796D} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {32B9C883-432E-4FC8-A1BF-090EB033DD5B} diff --git a/azure-pipelines-product-construction-service.yml b/azure-pipelines-product-construction-service.yml index 812a5e9780..d8e0761a1c 100644 --- a/azure-pipelines-product-construction-service.yml +++ b/azure-pipelines-product-construction-service.yml @@ -55,92 +55,8 @@ variables: value: -dev stages: -- stage: DeployPCS - displayName: Deploy Product Construction Service - dependsOn: [] - jobs: - - job: DeployPCS - displayName: Deploy Product Construction Service - pool: - name: NetCore1ESPool-Internal - demands: ImageOverride -equals 1es-ubuntu-2004 - - steps: - - checkout: self - - - template: eng/templates/steps/docker-build.yml - parameters: - devBranchSuffix: $(devBranchSuffix) - dockerImageName: $(dockerRegistryUrl)/$(containerName) - - - ${{ if notin(variables['Build.Reason'], 'PullRequest') }}: - - ${{ if ne(variables['Build.SourceBranch'], 'refs/heads/production') }}: - - task: AzureCLI@2 - inputs: - azureSubscription: $(serviceConnectionName) - scriptType: pscore - scriptLocation: inlineScript - inlineScript: | - New-Item -ItemType Directory -Path $(diffFolder) - $before = az containerapp show --name $(containerappName) -g $(resourceGroupName) --output json - Set-Content -Path $(diffFolder)/before.json -Value $before - displayName: Snapshot configuration (before) - - - task: AzureCLI@2 - name: GetAuthInfo - displayName: Get PCS Token - inputs: - azureSubscription: $(authServiceConnection) - addSpnToEnvironment: true - scriptType: pscore - scriptLocation: inlineScript - inlineScript: | - # Fetch pcs token - $token = (az account get-access-token --resource "$(MaestroAppId)" | ConvertFrom-Json).accessToken - "##vso[task.setvariable variable=Token;isOutput=true;isSecret=true]$token" - - - task: AzureCLI@2 - inputs: - azureSubscription: $(serviceConnectionName) - scriptType: pscore - scriptLocation: scriptPath - scriptPath: $(Build.SourcesDirectory)/eng/deployment/product-construction-service-deploy.ps1 - arguments: > - -subscriptionId $(subscriptionId) - -resourceGroupName $(resourceGroupName) - -containerappName $(containerappName) - -workspaceName $(containerappWorkspaceName) - -newImageTag $(DockerTag.newDockerImageTag) - -containerRegistryName $(containerRegistryName) - -imageName $(containerName) - -token $(GetAuthInfo.Token) - -containerjobNames '$(containerjobNames)' - displayName: Deploy container app - - - task: AzureCLI@2 - inputs: - azureSubscription: $(serviceConnectionName) - scriptType: pscore - scriptLocation: inlineScript - inlineScript: | - $after = az containerapp show --name $(containerappName) -g $(resourceGroupName) --output json - Set-Content -Path $(diffFolder)/after.json -Value $after - displayName: Snapshot configuration (after) - - # git diff will set the exit code to 1, since the files are different, we have to manually set it back to 0 - - powershell: | - $diff = git diff before.json after.json - $LASTEXITCODE = 0 - Set-Content -Path diff -Value $diff - displayName: Diff configuration snapshots - workingDirectory: $(diffFolder) - - - publish: $(diffFolder) - displayName: Upload snapshot diff - artifact: DeploymentDiff - -- stage: BuildRepo - displayName: Build and Publish Repo +- stage: Build + displayName: Build dependsOn: [] jobs: - job: BuildAndPublish @@ -179,33 +95,154 @@ stages: -projects .\test\ProductConstructionService.ScenarioTests\ProductConstructionService.ScenarioTests.csproj displayName: Build ScenarioTests + - powershell: > + .\eng\common\build.ps1 + -restore + -build + -configuration $(BuildConfig) + -projects .\src\ProductConstructionService\ProductConstructionService.Deployment\ProductConstructionService.Deployment.csproj + displayName: Build ProductConsturctionService.Deployment + - publish: $(Build.SourcesDirectory)\artifacts\bin\ProductConstructionService.ScenarioTests\$(BuildConfig)\net8.0\publish artifact: ProductConstructionService.ScenarioTests - publish: $(Build.SourcesDirectory)\artifacts\packages\$(BuildConfig)\NonShipping artifact: PackageArtifacts -- stage: TestPCS - displayName: Run E2E Product Construction Service Tests - dependsOn: - - DeployPCS - - BuildRepo + - publish: $(Build.SourcesDirectory)\artifacts\bin\ProductConstructionService.Deployment\$(BuildConfig)\net8.0 + artifact: ProductConstructionService.Deployment - jobs: - - template: /eng/templates/jobs/e2e-pcs-tests.yml - parameters: - name: scenarioTests_GitHub - displayName: GitHub tests - testFilter: 'TestCategory=GitHub' - - - template: /eng/templates/jobs/e2e-pcs-tests.yml - parameters: - name: scenarioTests_AzDO - displayName: AzDO tests - testFilter: 'TestCategory=AzDO' - - - template: /eng/templates/jobs/e2e-pcs-tests.yml - parameters: - name: scenarioTests_Other - displayName: Other tests - testFilter: 'TestCategory!=GitHub&TestCategory!=AzDO' + + - job: BuildAndPublishDocker + displayName: Build And Publish new Docker Image + pool: + name: NetCore1ESPool-Internal + demands: ImageOverride -equals 1es-ubuntu-2004 + + steps: + - checkout: self + + - template: eng/templates/steps/docker-build.yml + parameters: + devBranchSuffix: $(devBranchSuffix) + dockerImageName: $(dockerRegistryUrl)/$(containerName) + +- ${{ if notin(variables['Build.Reason'], 'PullRequest') }}: + - ${{ if ne(variables['Build.SourceBranch'], 'refs/heads/production') }}: + - stage: DeployPCS + dependsOn: + - Build + displayName: Deploy Product Construction Service + + jobs: + - job: Deploy + displayName: Deploy container app + pool: + name: NetCore1ESPool-Internal + demands: ImageOverride -equals 1es-windows-2019 + variables: + newDockerImageTag: $[stageDependencies.Build.BuildAndPublishDocker.outputs['DockerTag.newDockerImageTag']] + + steps: + - powershell: | + Write-Host $(containerjobNames) + $az = Get-Command az.cmd + "##vso[task.setvariable variable=azCliPath]$($az.Source)" + + - powershell: Write-Host $(azCliPath) + + - task: AzureCLI@2 + inputs: + azureSubscription: $(serviceConnectionName) + scriptType: pscore + scriptLocation: inlineScript + inlineScript: | + New-Item -ItemType Directory -Path $(diffFolder) + $before = az containerapp show --name $(containerappName) -g $(resourceGroupName) --output json + Set-Content -Path $(diffFolder)/before.json -Value $before + displayName: Snapshot configuration (before) + + - download: current + displayName: Download ProductConstructionService.Deployment + artifact: ProductConstructionService.Deployment + + - task: NuGetToolInstaller@1 + displayName: Use NuGet + inputs: + versionSpec: 5.3.x + + - powershell: | + . .\eng\common\tools.ps1 + InitializeDotNetCli -install:$true + .\.dotnet\dotnet workload install aspire + displayName: Install .NET and Aspire Workload + + - powershell: .\eng\common\build.ps1 -restore + displayName: Install .NET + + # We'll need to give this service connection permission to get an auth token for PCS + - task: AzureCLI@2 + inputs: + azureSubscription: $(serviceConnectionName) + scriptType: pscore + scriptLocation: inlineScript + inlineScript: | + $(Pipeline.Workspace)/ProductConstructionService.Deployment/ProductConstructionService.Deployment.exe ` + --subscriptionId $(subscriptionId) ` + --resourceGroupName $(resourceGroupName) ` + --containerRegistryName $(containerRegistryName) ` + --containerAppName $(containerappName) ` + --workspaceName $(containerappWorkspaceName) ` + --containerJobNames $(containerjobNames) ` + --newImageTag $(newDockerImageTag) ` + --imageName $(containerName) ` + --azCliPath "$(azCliPath)" ` + --isCi true ` + --entraAppId $(MaestroAppId) + displayName: Deploy container app + + - task: AzureCLI@2 + inputs: + azureSubscription: $(serviceConnectionName) + scriptType: pscore + scriptLocation: inlineScript + inlineScript: | + $after = az containerapp show --name $(containerappName) -g $(resourceGroupName) --output json + Set-Content -Path $(diffFolder)/after.json -Value $after + displayName: Snapshot configuration (after) + + # git diff will set the exit code to 1, since the files are different, we have to manually set it back to 0 + - powershell: | + $diff = git diff before.json after.json + $LASTEXITCODE = 0 + Set-Content -Path diff -Value $diff + displayName: Diff configuration snapshots + workingDirectory: $(diffFolder) + + - publish: $(diffFolder) + displayName: Upload snapshot diff + artifact: DeploymentDiff + + - stage: TestPCS + displayName: Run E2E Product Construction Service Tests + dependsOn: + - DeployPCS + + jobs: + - template: /eng/templates/jobs/e2e-pcs-tests.yml + parameters: + name: scenarioTests_GitHub + displayName: GitHub tests + testFilter: 'TestCategory=GitHub' + + - template: /eng/templates/jobs/e2e-pcs-tests.yml + parameters: + name: scenarioTests_AzDO + displayName: AzDO tests + testFilter: 'TestCategory=AzDO' + + - template: /eng/templates/jobs/e2e-pcs-tests.yml + parameters: + name: scenarioTests_Other + displayName: Other tests + testFilter: 'TestCategory!=GitHub&TestCategory!=AzDO' diff --git a/eng/deployment/product-construction-service-deploy.ps1 b/eng/deployment/product-construction-service-deploy.ps1 deleted file mode 100644 index c152c04cd1..0000000000 --- a/eng/deployment/product-construction-service-deploy.ps1 +++ /dev/null @@ -1,175 +0,0 @@ -# This script deploys the Product Construction Service using the blue/green deployment pattern. -# The script determines the color of the currently active revision, deactivates the old inactive revision, -# and deploys the new revision, switching all traffic to it if the health probes pass. -param( - [Parameter(Mandatory=$true)][string]$subscriptionId, - [Parameter(Mandatory=$true)][string]$resourceGroupName, - [Parameter(Mandatory=$true)][string]$containerappName, - [Parameter(Mandatory=$true)][string]$workspaceName, - [Parameter(Mandatory=$true)][string]$newImageTag, - [Parameter(Mandatory=$true)][string]$containerRegistryName, - [Parameter(Mandatory=$true)][string]$imageName, - [Parameter(Mandatory=$true)][string]$token, - [Parameter(Mandatory=$true)][string]$containerjobNames -) - -$containerapp = az containerapp show -g $resourceGroupName -n $containerappName | ConvertFrom-Json -$pcsUrl = "https://$($containerapp.properties.configuration.ingress.fqdn)" -$pcsStatusUrl = $pcsUrl + "/api/status" -$pcsStopUrl = $pcsStatusUrl + "/stop?api-version=2020-02-20" -$pcsStartUrl = $pcsStatusUrl + "/start?api-version=2020-02-20" -$authenticationHeader = @{ - "Authorization" = "Bearer $token" -} - -function Wait() { - Start-Sleep -Seconds 20 - return $true -} - -function StopAndWait([string]$pcsStatusUrl, [string]$pcsStopUrl, [hashtable]$authenticationHeader) { - try { - - $stopResponse = Invoke-WebRequest -Uri $pcsStopUrl -Method Put -Headers $authenticationHeader - - if ($stopResponse.StatusCode -ne 200) { - Write-Warning "Service isn't responding to the stop request. Deploying the new revision without stopping the service." - return - } - - # wait for the service to finish processing the current job - do - { - $pcsStateResponse = Invoke-WebRequest -Uri $pcsStatusUrl -Method Get - if ($pcsStateResponse.StatusCode -ne 200) { - Write-Warning "Service isn't responding to the status request. Deploying the new revision without stopping the service." - return - } - Write-Host "Product Construction Service state: $($pcsStateResponse.Content)" - } while ($pcsStateResponse.Content -notmatch "Stopped" -and $(Wait)) - } - catch { - Write-Warning "An error occurred: $($_.Exception.Message). Deploying the new revision without stopping the service." - } - return -} - -function GetLogsLink { - param ( - [string]$revisionName, - [string]$resourceGroup, - [string]$workspaceName, - [string]$subscriptionId - ) - - $query = "ContainerAppConsoleLogs_CL ` -| where RevisionName_s == '$revisionName' ` -| project TimeGenerated, Log_s" - - $bytes = [System.Text.Encoding]::UTF8.GetBytes($query) - $memoryStream = New-Object System.IO.MemoryStream - $compressedStream = New-Object System.IO.Compression.GZipStream($memoryStream, [System.IO.Compression.CompressionMode]::Compress, $true) - - $compressedStream.Write($bytes, 0, $bytes.Length) - $compressedStream.Close() - $memoryStream.Seek(0, [System.IO.SeekOrigin]::Begin) | Out-Null - $data = $memoryStream.ToArray() - $encodedQuery = [Convert]::ToBase64String($data) - $encodedQuery = [System.Web.HttpUtility]::UrlEncode($encodedQuery) - return "https://ms.portal.azure.com#@72f988bf-86f1-41af-91ab-2d7cd011db47/blade/Microsoft_OperationsManagementSuite_Workspace/Logs.ReactView/" + - "resourceId/%2Fsubscriptions%2F$subscriptionId%2FresourceGroups%2F$resourceGroup%2Fproviders%2FMicrosoft.OperationalInsights%2Fworkspaces%2F" + - "$workspaceName/source/LogsBlade.AnalyticsShareLinkToQuery/q/$encodedQuery/timespan/P1D/limit/1000" -} - -az extension add --name containerapp --upgrade - -Write-Host "Fetching all revisions to determine the active label" -$containerappTraffic = az containerapp ingress traffic show --name $containerappName --resource-group $resourceGroupName | ConvertFrom-Json -# find the currently active revision -$activeRevision = $containerappTraffic | Where-Object { $_.weight -eq 100 } - -Write-Host "Currently active revision: $($activeRevision.revisionName) with label $($activeRevision.label)" - -# detirmine the label of the inactive revision -if ($activeRevision.label -eq "blue") { - $inactiveLabel = "green" -} else { - $inactiveLabel = "blue" -} - -Write-Host "Next revision will be deployed with label $inactiveLabel" -Write-Host "Removing label $inactiveLabel from the inactive revision" -# remove the label from the inactive revision -$revisionRemovalOutput = az containerapp revision label remove --label $inactiveLabel --name $containerappName --resource-group $resourceGroupName 2>&1 - -if ($revisionRemovalOutput -match "Please specify a label name with an associated traffic weight") { - Write-Host "Couldn't find a revision with label $inactiveLabel. Skipping deactivation of inactive revision" -} -else -{ - Write-Host "Deactivating inactive revision" - # deactivate the inactive revision - - $inactiveRevision = $containerappTraffic | Where-Object { $_.label -eq $inactiveLabel } - - az containerapp revision deactivate --revision $inactiveRevision.revisionName --name $containerappName --resource-group $resourceGroupName -} - -# Tell the service to stop processing jobs after it finishes the current one -Write-Host "Stopping the service from processing new jobs" -StopAndWait -pcsStatusUrl $pcsStatusUrl -pcsStopUrl $pcsStopUrl -authenticationHeader $authenticationHeader - -$newRevisionName = "$containerappName--$newImageTag" -$newImage = "$containerRegistryName.azurecr.io/$imageName`:$newImageTag" - -# Kick off the deployment of the new image -Write-Host "Deploying container app / $newImageTag" -az containerapp update --name $containerappName --resource-group $resourceGroupName --image $newImage --revision-suffix $newImageTag | Out-Null - -# Deploy jobs -Write-Host "Deploying container jobs / $newImageTag" -foreach ($containerjobName in $containerjobNames.Split(',')) { - Write-Host "Updating job $containerjobName" - az containerapp job update --name $containerjobName --resource-group $resourceGroupName --image $newImage | Out-Null -} - -# Wait for the service to come up -try -{ - # Wait for the new revision to pass health probes and become active - Write-Host "Waiting for new revision $newRevisionName to become active" - do - { - $newRevisionRunningState = az containerapp revision show --name $containerappName --resource-group $resourceGroupName --revision $newRevisionName --query "properties.runningState" - Write-Host "New revision running state: $newRevisionRunningState" - } while ($newRevisionRunningState -notmatch "Running" -and $newRevisionRunningState -notmatch "Failed" -and $(Wait)) - - if ($newRevisionRunningState -match "Running") { - Write-Host "Assigning label $inactiveLabel to the new revision" - # assign the label to the new revision - az containerapp revision label add --label $inactiveLabel --name $containerappName --resource-group $resourceGroupName --revision $newRevisionName | Out-Null - - # transfer all traffic to the new revision - az containerapp ingress traffic set --name $containerappName --resource-group $resourceGroupName --label-weight "$inactiveLabel=100" | Out-Null - Write-Host "All traffic has been redirected to label $inactiveLabel" - } - else { - Write-Warning "New revision failed to start. Deactivating the new revision.." - $link = GetLogsLink ` - -revisionName "$newRevisionName" ` - -subscriptionId "$subscriptionId" ` - -resourceGroup "$resourceGroupName" ` - -workspaceName "$workspaceName" - - Write-Host " Check revision $newRevisionName logs in the inactive revision: ` - $link" - - az containerapp revision deactivate --revision $newRevisionName --name $containerappName --resource-group $resourceGroupName - exit 1 - } -} -finally { - # Start the service. This either starts the new revision or the old one if the new one failed to start - Write-Host "Starting the product construction service" - Invoke-WebRequest -Uri $pcsStartUrl -Method Put -Headers $authenticationHeader -} diff --git a/src/ProductConstructionService/ProductConstructionService.Client/Generated/Status.cs b/src/ProductConstructionService/ProductConstructionService.Client/Generated/Status.cs index 42d7a5fcfb..7135f8a34d 100644 --- a/src/ProductConstructionService/ProductConstructionService.Client/Generated/Status.cs +++ b/src/ProductConstructionService/ProductConstructionService.Client/Generated/Status.cs @@ -14,15 +14,15 @@ namespace ProductConstructionService.Client { public partial interface IStatus { - Task StopPcsWorkItemProcessorAsync( + Task StopPcsWorkItemProcessorAsync( CancellationToken cancellationToken = default ); - Task StartPcsWorkItemProcessorAsync( + Task StartPcsWorkItemProcessorAsync( CancellationToken cancellationToken = default ); - Task GetPcsWorkItemProcessorStatusAsync( + Task GetPcsWorkItemProcessorStatusAsync( CancellationToken cancellationToken = default ); @@ -41,7 +41,7 @@ public Status(ProductConstructionServiceApi client) partial void HandleFailedStopPcsWorkItemProcessorRequest(RestApiException ex); - public async Task StopPcsWorkItemProcessorAsync( + public async Task StopPcsWorkItemProcessorAsync( CancellationToken cancellationToken = default ) { @@ -70,8 +70,17 @@ public async Task StopPcsWorkItemProcessorAsync( await OnStopPcsWorkItemProcessorFailed(_req, _res).ConfigureAwait(false); } + if (_res.ContentStream == null) + { + await OnStopPcsWorkItemProcessorFailed(_req, _res).ConfigureAwait(false); + } - return; + using (var _reader = new StreamReader(_res.ContentStream)) + { + var _content = await _reader.ReadToEndAsync().ConfigureAwait(false); + var _body = Client.Deserialize(_content); + return _body; + } } } } @@ -101,7 +110,7 @@ internal async Task OnStopPcsWorkItemProcessorFailed(Request req, Response res) partial void HandleFailedStartPcsWorkItemProcessorRequest(RestApiException ex); - public async Task StartPcsWorkItemProcessorAsync( + public async Task StartPcsWorkItemProcessorAsync( CancellationToken cancellationToken = default ) { @@ -130,8 +139,17 @@ public async Task StartPcsWorkItemProcessorAsync( await OnStartPcsWorkItemProcessorFailed(_req, _res).ConfigureAwait(false); } + if (_res.ContentStream == null) + { + await OnStartPcsWorkItemProcessorFailed(_req, _res).ConfigureAwait(false); + } - return; + using (var _reader = new StreamReader(_res.ContentStream)) + { + var _content = await _reader.ReadToEndAsync().ConfigureAwait(false); + var _body = Client.Deserialize(_content); + return _body; + } } } } @@ -161,7 +179,7 @@ internal async Task OnStartPcsWorkItemProcessorFailed(Request req, Response res) partial void HandleFailedGetPcsWorkItemProcessorStatusRequest(RestApiException ex); - public async Task GetPcsWorkItemProcessorStatusAsync( + public async Task GetPcsWorkItemProcessorStatusAsync( CancellationToken cancellationToken = default ) { @@ -190,8 +208,17 @@ public async Task GetPcsWorkItemProcessorStatusAsync( await OnGetPcsWorkItemProcessorStatusFailed(_req, _res).ConfigureAwait(false); } + if (_res.ContentStream == null) + { + await OnGetPcsWorkItemProcessorStatusFailed(_req, _res).ConfigureAwait(false); + } - return; + using (var _reader = new StreamReader(_res.ContentStream)) + { + var _content = await _reader.ReadToEndAsync().ConfigureAwait(false); + var _body = Client.Deserialize(_content); + return _body; + } } } } diff --git a/src/ProductConstructionService/ProductConstructionService.Deployment/Deployer.cs b/src/ProductConstructionService/ProductConstructionService.Deployment/Deployer.cs new file mode 100644 index 0000000000..2138589f44 --- /dev/null +++ b/src/ProductConstructionService/ProductConstructionService.Deployment/Deployer.cs @@ -0,0 +1,254 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.IO.Compression; +using System.Web; +using Azure; +using Azure.Core; +using Azure.Identity; +using Azure.ResourceManager; +using Azure.ResourceManager.AppContainers; +using Azure.ResourceManager.AppContainers.Models; +using Azure.ResourceManager.Resources; +using Microsoft.DotNet.DarcLib.Helpers; +using ProductConstructionService.Client; + +namespace ProductConstructionService.Deployment; +public class Deployer +{ + private readonly DeploymentOptions _options; + private ContainerAppResource _containerApp; + private readonly ResourceGroupResource _resourceGroup; + private readonly IProcessManager _processManager; + private readonly IProductConstructionServiceApi _pcsClient; + + private const int SleepTimeSeconds = 20; + + public Deployer( + DeploymentOptions options, + IProcessManager processManager, + IProductConstructionServiceApi pcsClient) + { + _options = options; + _processManager = processManager; + _pcsClient = pcsClient; + + DefaultAzureCredential credential = new(); + ArmClient client = new(credential); + SubscriptionResource subscription = client.GetSubscriptionResource(new ResourceIdentifier($"/subscriptions/{_options.SubscriptionId}")); + + _resourceGroup = subscription.GetResourceGroups().Get("product-construction-service"); + _containerApp = _resourceGroup.GetContainerApp("product-construction-int").Value; + } + + private string[] DefaultAzCliParameters => [ + "--name", _options.ContainerAppName, + "--resource-group", _options.ResourceGroupName, + ]; + private readonly RevisionRunningState RunningAtMaxScaleState = new RevisionRunningState("RunningAtMaxScale"); + + public async Task DeployAsync() + { + List trafficWeights = _containerApp.Data.Configuration.Ingress.Traffic.ToList(); + + var activeRevisionTrafficWeight = trafficWeights.FirstOrDefault(weight => weight.Weight == 100) ?? + throw new ArgumentException("Container app has no active revision, please investigate manually"); + + Console.WriteLine($"Currently active revision {activeRevisionTrafficWeight.RevisionName} with label {activeRevisionTrafficWeight.Label}"); + + // Determine the label of the inactive revision + string inactiveRevisionLabel = activeRevisionTrafficWeight.Label == "blue" ? "green" : "blue"; + + Console.WriteLine($"Next revision will be deployed with label {inactiveRevisionLabel}"); + Console.WriteLine($"Removing label {inactiveRevisionLabel} from inactive revision"); + + // Cleanup all revisions except the currently active one + await CleanupRevisionsAsync(trafficWeights.Where(weight => weight != activeRevisionTrafficWeight)); + + // Tell the active revision to finish current work items and stop processing new ones + await StopProcessingNewJobs(); + + var newRevisionName = $"{_options.ContainerAppName}--{_options.NewImageTag}"; + var newImageFullUrl = $"{_options.ContainerRegistryName}.azurecr.io/{_options.ImageName}:{_options.NewImageTag}"; + try + { + // Kick off the deployment of the new image + await DeployContainerApp(newImageFullUrl); + + // While we're waiting for the new revision to become active, deploy container jobs + await DeployContainerJobs(newImageFullUrl); + + // Wait for the new app revision to become active + bool newRevisionActive = await WaitForRevisionToBecomeActive(newRevisionName); + + // If the new revision is active, the rollout succeeded, assign a label, and transfer all traffic to it + if (newRevisionActive) + { + await AssignLabelAndTransferTraffic(newRevisionName, inactiveRevisionLabel); + } + // If the new revision is not active, deactivate it and get print log link + else + { + await DeactivateFailedRevisionAndGetLogs(newRevisionName); + return -1; + } + return 0; + } + catch (Exception ex) + { + Console.WriteLine($"An error occurred: {ex}"); + return -1; + } + finally + { + // Start the service again. If the deployment failed, we'll activate the old revision, otherwise, we'll activate the new one + Console.WriteLine("Starting the service again"); + await _pcsClient.Status.StartPcsWorkItemProcessorAsync(); + } + } + + private async Task CleanupRevisionsAsync(IEnumerable revisionsTrafficWeight) + { + // Cleanup all revision labels + foreach (var revisionTrafficWeight in revisionsTrafficWeight) + { + if (!string.IsNullOrEmpty(revisionTrafficWeight.Label)) + { + var result = await InvokeAzCLI([ + "containerapp", "revision", "label", "remove", + "--label", revisionTrafficWeight.Label + ]); + result.ThrowIfFailed($"Failed to remove label {revisionTrafficWeight.Label} from revision {revisionTrafficWeight.RevisionName}. Stderr: {result.StandardError}"); + } + } + + // Now deactivate all revisions in the list + foreach (var revisionTrafficWeight in revisionsTrafficWeight) + { + ContainerAppRevisionResource revision = (await _containerApp.GetContainerAppRevisionAsync(revisionTrafficWeight.RevisionName)).Value; + + await revision.DeactivateRevisionAsync(); + } + } + + private async Task DeployContainerApp(string imageUrl) + { + Console.WriteLine("Deploying container app"); + _containerApp.Data.Template.Containers[0].Image = imageUrl; + _containerApp.Data.Template.RevisionSuffix = _options.NewImageTag; + await _containerApp.UpdateAsync(WaitUntil.Completed, _containerApp.Data); + } + + private async Task DeployContainerJobs(string imageUrl) + { + foreach(var jobName in _options.ContainerJobNames.Split(',')) + { + Console.WriteLine($"Deploying container job {jobName}"); + var containerJob = (await _resourceGroup.GetContainerAppJobAsync(jobName)).Value; + containerJob.Data.Template.Containers[0].Image = imageUrl; + + ContainerAppJobPatch jobPatch = new() + { + Properties = new ContainerAppJobPatchProperties() + { + Template = containerJob.Data.Template + } + }; + + await containerJob.UpdateAsync(WaitUntil.Completed, jobPatch); + } + } + + private async Task WaitForRevisionToBecomeActive(string revisionName) + { + Console.WriteLine($"Waiting for revision {revisionName} to become active"); + RevisionRunningState status; + do + { + var revision = (await _containerApp.GetContainerAppRevisionAsync(revisionName)).Value; + status = revision.Data.RunningState ?? RevisionRunningState.Unknown; + } + while (await Utility.SleepIfTrue( + () => status != RunningAtMaxScaleState && status != RevisionRunningState.Failed, + SleepTimeSeconds)); + + return status == RunningAtMaxScaleState; + } + + private async Task AssignLabelAndTransferTraffic(string revisionName, string label) + { + Console.WriteLine($"Assigning label {label} to the new revision"); + + var result = await InvokeAzCLI([ + "containerapp", "revision", "label", "add", + "--label", label, + "--revision", revisionName + ]); + result.ThrowIfFailed($"Failed to assign label {label} to revision {revisionName}. Stderr: {result.StandardError}"); + + Console.WriteLine($"Transferring all traffic to the new revision"); + result = await InvokeAzCLI([ + "containerapp", "ingress", "traffic", "set", + "--label-weight", $"{label}=100" + ]); + result.ThrowIfFailed($"Failed to transfer all traffic to revision {revisionName}"); + + Console.WriteLine($"New revision {revisionName} is now active with label {label} and all traffic is transferred to it."); + } + + private async Task DeactivateFailedRevisionAndGetLogs(string revisionName) + { + var revision = (await _containerApp.GetContainerAppRevisionAsync(revisionName)).Value; + await revision.DeactivateRevisionAsync(); + Console.WriteLine($"Deactivated revision {revisionName}"); + + Console.WriteLine($"Check revision logs too see failure reason: {GetLogsLink(revisionName)}"); + } + + private string GetLogsLink(string revisionName) + { + string query = """ + ContainerAppConsoleLogs_CL ` + | where RevisionName_s == '$revisionName' ` + | project TimeGenerated, Log_s + """; + + var encodedQuery = Utility.ConvertStringToCompressedBase64EncodedQuery(query); + + return "https://ms.portal.azure.com#@72f988bf-86f1-41af-91ab-2d7cd011db47/blade/Microsoft_OperationsManagementSuite_Workspace/Logs.ReactView/" + + $"resourceId/%2Fsubscriptions%2F{_options.SubscriptionId}%2FresourceGroups%2F{_options.ResourceGroupName}%2Fproviders%2FMicrosoft.OperationalInsights%2Fworkspaces%2F" + + $"{_options.WorkspaceName}/source/LogsBlade.AnalyticsShareLinkToQuery/q/{encodedQuery}/timespan/P1D/limit/1000"; + } + + private async Task InvokeAzCLI(string[] command) + { + return await _processManager.Execute( + Path.GetFileName(_options.AzCliPath), + [ + .. command, + .. DefaultAzCliParameters + ], + workingDir: Path.GetDirectoryName(_options.AzCliPath)); + } + + private async Task StopProcessingNewJobs() + { + Console.WriteLine("Stopping the service from processing new jobs"); + await _pcsClient.Status.StopPcsWorkItemProcessorAsync(); + + string status; + try + { + do + { + status = await _pcsClient.Status.GetPcsWorkItemProcessorStatusAsync(); + + Console.WriteLine($"Current status: {status}"); + } while (await Utility.SleepIfTrue(() => status != "Stopped", SleepTimeSeconds)); + } + catch(Exception ex) + { + Console.WriteLine($"An error occurred: {ex}. Deploying the new revision without stopping the service"); + } + } +} diff --git a/src/ProductConstructionService/ProductConstructionService.Deployment/DeploymentOptions.cs b/src/ProductConstructionService/ProductConstructionService.Deployment/DeploymentOptions.cs new file mode 100644 index 0000000000..7bfbfb83cc --- /dev/null +++ b/src/ProductConstructionService/ProductConstructionService.Deployment/DeploymentOptions.cs @@ -0,0 +1,31 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using CommandLine; + +namespace ProductConstructionService.Deployment; +public class DeploymentOptions +{ + [Option("subscriptionId", Required = true, HelpText = "Azure subscription ID")] + public required string SubscriptionId { get; init; } + [Option("resourceGroupName", Required = true, HelpText = "Resource group name")] + public required string ResourceGroupName { get; init; } + [Option("containerAppName", Required = true, HelpText = "Container app name")] + public required string ContainerAppName { get; init; } + [Option("newImageTag", Required = true, HelpText = "New image tag")] + public required string NewImageTag { get; init; } + [Option("containerRegistryName", Required = true, HelpText = "Container registry name")] + public required string ContainerRegistryName { get; init; } + [Option("workspaceName", Required = true, HelpText = "Workspace name")] + public required string WorkspaceName { get; init; } + [Option("imageName", Required = true, HelpText = "Image name")] + public required string ImageName { get; init; } + [Option("containerJobNames", Required = true, HelpText = "Container job names")] + public required string ContainerJobNames { get; init; } + [Option("azCliPath", Required = true, HelpText = "Path to az.cmd")] + public required string AzCliPath { get; init; } + [Option("isCi", Required = true, HelpText = "Is running in CI")] + public required bool IsCi { get; init; } + [Option("entraAppId", Required = true, HelpText = "Entra app ID")] + public required string EntraAppId { get; init; } +} diff --git a/src/ProductConstructionService/ProductConstructionService.Deployment/ProductConstructionService.Deployment.csproj b/src/ProductConstructionService/ProductConstructionService.Deployment/ProductConstructionService.Deployment.csproj new file mode 100644 index 0000000000..9cceadf9c3 --- /dev/null +++ b/src/ProductConstructionService/ProductConstructionService.Deployment/ProductConstructionService.Deployment.csproj @@ -0,0 +1,24 @@ + + + + Exe + net8.0 + enable + enable + False + + + + + + + + + + + + + + + + diff --git a/src/ProductConstructionService/ProductConstructionService.Deployment/Program.cs b/src/ProductConstructionService/ProductConstructionService.Deployment/Program.cs new file mode 100644 index 0000000000..68528ae07e --- /dev/null +++ b/src/ProductConstructionService/ProductConstructionService.Deployment/Program.cs @@ -0,0 +1,25 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using CommandLine; +using Microsoft.DotNet.DarcLib.Helpers; +using Microsoft.Extensions.Logging; +using ProductConstructionService.Deployment; + +return Parser.Default.ParseArguments(args) + .MapResult((options) => + { + var loggerFactory = LoggerFactory.Create(builder => builder.AddConsole()); + ProcessManager processManager = new ProcessManager(loggerFactory.CreateLogger(string.Empty), string.Empty); + + var pcsClient = ProductConstructionService.Client.PcsApiFactory.GetAuthenticated( + accessToken: null, + managedIdentityId: null, + disableInteractiveAuth: options.IsCi); + + var deployer = new Deployer(options, processManager, pcsClient); + return deployer.DeployAsync().GetAwaiter().GetResult(); + }, + (_) => -1); + + diff --git a/src/ProductConstructionService/ProductConstructionService.Deployment/Utility.cs b/src/ProductConstructionService/ProductConstructionService.Deployment/Utility.cs new file mode 100644 index 0000000000..7861d23052 --- /dev/null +++ b/src/ProductConstructionService/ProductConstructionService.Deployment/Utility.cs @@ -0,0 +1,35 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.IO.Compression; +using System.Web; + +namespace ProductConstructionService.Deployment; + +internal static class Utility +{ + public static async Task SleepIfTrue(Func condition, int durationSeconds) + { + if (condition()) + { + await Task.Delay(TimeSpan.FromSeconds(durationSeconds)); + return true; + } + + return false; + } + + public static string ConvertStringToCompressedBase64EncodedQuery(string query) + { + var bytes = System.Text.Encoding.UTF8.GetBytes(query); + MemoryStream memoryStream = new(); + GZipStream compressedStream = new(memoryStream, CompressionMode.Compress); + + compressedStream.Write(bytes, 0, bytes.Length); + compressedStream.Close(); + memoryStream.Seek(0, SeekOrigin.Begin); + var data = memoryStream.ToArray(); + var base64query = Convert.ToBase64String(data); + return HttpUtility.UrlEncode(base64query); + } +} diff --git a/src/ProductConstructionService/Readme.md b/src/ProductConstructionService/Readme.md index c7431e9be7..d89c71c019 100644 --- a/src/ProductConstructionService/Readme.md +++ b/src/ProductConstructionService/Readme.md @@ -148,7 +148,7 @@ When creating a Container App with a bicep template, we have to give it some kin # General deployment notes -The Product Construction Service uses the [Blue-Green](https://learn.microsoft.com/en-us/azure/container-apps/blue-green-deployment?pivots=bicep) deployment approach, implemented in the [product-construction-service-deploy.ps1](https://github.com/dotnet/arcade-services/blob/main/eng/deployment/product-construction-service-deploy.ps1) script. The script does the following: +The Product Construction Service uses the [Blue-Green](https://learn.microsoft.com/en-us/azure/container-apps/blue-green-deployment?pivots=bicep) deployment approach, implemented in the [ProductConstructionService.Deployment](https://github.com/dotnet/arcade-services/tree/main/src/ProductConstructionService/ProductConstructionService.Deployment) script. The script does the following: - Figures out the label that should be assigned to the new revision and removes it from the old, inactive revision. - Tells the currently active revision to stop processing new jobs and waits for the new one to finish. - Deploys the new revision.