Skip to content

Commit

Permalink
fix: correctly calculate maxReplicas for rollingUpdate deployments & …
Browse files Browse the repository at this point in the history
…rolling DeploymentConfigs

Correctly calculate the maxReplicas as `replicas + maxSurge` instead of `replicas + maxSurge - maxUnavailable`

Refs: postfinance#20
  • Loading branch information
druppelt committed Sep 14, 2024
1 parent 1495ba0 commit 86c2709
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 52 deletions.
39 changes: 23 additions & 16 deletions internal/calc/deployment.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package calc

import (
"errors"
"fmt"
"math"

Expand All @@ -10,10 +11,10 @@ import (

// calculates the cpu/memory resources a single deployment needs. Replicas and the deployment
// strategy are taken into account.
func deployment(deployment appsv1.Deployment) (*ResourceUsage, error) {
func deployment(deployment appsv1.Deployment) (*ResourceUsage, error) { //nolint:funlen // disable function length linting
var (
resourceOverhead float64 // max overhead compute resources (percent)
podOverhead int32 // max overhead pods during deployment
maxUnavailable int32 // max amount of unavailable pods during a deployment
maxSurge int32 // max amount of pods that are allowed in addition to replicas during deployment
)

replicas := deployment.Spec.Replicas
Expand All @@ -35,9 +36,9 @@ func deployment(deployment appsv1.Deployment) (*ResourceUsage, error) {

switch strategy.Type {
case appsv1.RecreateDeploymentStrategyType:
// no overhead on recreate
resourceOverhead = 1
podOverhead = 0
// kill all existing pods, then recreate new ones at once -> no overhead on recreate
maxUnavailable = *replicas
maxSurge = 0
case "":
// RollingUpdate is the default and can be an empty string. If so, set the defaults
// (https://pkg.go.dev/k8s.io/api/apps/v1?tab=doc#RollingUpdateDeployment) and continue calculation.
Expand Down Expand Up @@ -69,31 +70,37 @@ func deployment(deployment appsv1.Deployment) (*ResourceUsage, error) {
}

// docs say, that the absolute number is calculated by rounding down.
maxUnavailable, err := intstr.GetScaledValueFromIntOrPercent(&maxUnavailableValue, int(*replicas), false)
maxUnavailableInt, err := intstr.GetScaledValueFromIntOrPercent(&maxUnavailableValue, int(*replicas), false)
if err != nil {
return nil, err
}

if maxUnavailableInt < math.MinInt32 || maxUnavailableInt > math.MaxInt32 {
return nil, errors.New("maxUnavailableInt out of int32 boundaries")
}

maxUnavailable = int32(maxUnavailableInt)

Check failure on line 82 in internal/calc/deployment.go

View workflow job for this annotation

GitHub Actions / lint

G115: integer overflow conversion int -> int32 (gosec)

Check failure on line 82 in internal/calc/deployment.go

View workflow job for this annotation

GitHub Actions / lint

G115: integer overflow conversion int -> int32 (gosec)

// docs say, absolute number is calculated by rounding up.
maxSurge, err := intstr.GetScaledValueFromIntOrPercent(&maxSurgeValue, int(*replicas), true)
maxSurgeInt, err := intstr.GetScaledValueFromIntOrPercent(&maxSurgeValue, int(*replicas), true)
if err != nil {
return nil, err
}

// podOverhead is the number of pods which can run more during a deployment
podOverheadInt := maxSurge - maxUnavailable
if podOverheadInt > math.MaxInt32 || podOverheadInt < math.MinInt32 {
return nil, fmt.Errorf("deployment: %s maxSurge - maxUnavailable (%d-%d) was out of bounds for int32", deployment.Name, maxSurge, maxUnavailable)
if maxSurgeInt < math.MinInt32 || maxSurgeInt > math.MaxInt32 {
return nil, errors.New("maxSurgeInt out of int32 boundaries")
}
podOverhead = int32(podOverheadInt) //nolint:gosec,wsl // gosec doesn't understand that the int conversion is already guarded, wsl wants to group the assignment with the next block

resourceOverhead = (float64(podOverhead) / float64(*replicas)) + 1
maxSurge = int32(maxSurgeInt)

Check failure on line 94 in internal/calc/deployment.go

View workflow job for this annotation

GitHub Actions / lint

G115: integer overflow conversion int -> int32 (gosec)

Check failure on line 94 in internal/calc/deployment.go

View workflow job for this annotation

GitHub Actions / lint

G115: integer overflow conversion int -> int32 (gosec)

default:
return nil, fmt.Errorf("deployment: %s deployment strategy %q is unknown", deployment.Name, strategy.Type)
}

_ = maxUnavailable // fix complaining compiler. will need this field in the future

podResources := podResources(&deployment.Spec.Template.Spec)
newResources := podResources.Mul(float64(*replicas)).Mul(resourceOverhead)
newResources := podResources.Mul(float64(*replicas + maxSurge))

resourceUsage := ResourceUsage{
Resources: newResources,
Expand All @@ -103,7 +110,7 @@ func deployment(deployment appsv1.Deployment) (*ResourceUsage, error) {
Name: deployment.Name,
Replicas: *replicas,
Strategy: string(strategy.Type),
MaxReplicas: *replicas + podOverhead,
MaxReplicas: *replicas + maxSurge,
},
}

Expand Down
38 changes: 22 additions & 16 deletions internal/calc/deploymentConfig.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package calc

import (
"errors"
"fmt"
"math"

Expand All @@ -10,10 +11,10 @@ import (

// calculates the cpu/memory resources a single deployment needs. Replicas and the deployment
// strategy are taken into account.
func deploymentConfig(deploymentConfig openshiftAppsV1.DeploymentConfig) (*ResourceUsage, error) {
func deploymentConfig(deploymentConfig openshiftAppsV1.DeploymentConfig) (*ResourceUsage, error) { //nolint:funlen // disable function length linting
var (
resourceOverhead float64 // max overhead compute resources (percent)
podOverhead int32 // max overhead pods during deploymentConfig
maxUnavailable int32 // max amount of unavailable pods during a deployment
maxSurge int32 // max amount of pods that are allowed in addition to replicas during deployment
)

replicas := deploymentConfig.Spec.Replicas
Expand All @@ -35,9 +36,9 @@ func deploymentConfig(deploymentConfig openshiftAppsV1.DeploymentConfig) (*Resou
// TODO lookup default values, these are copied from kubernetes Deployment
switch strategy.Type {
case openshiftAppsV1.DeploymentStrategyTypeRecreate:
// no overhead on recreate
resourceOverhead = 1
podOverhead = 0
// kill all existing pods, then recreate new ones at once -> no overhead on recreate
maxUnavailable = replicas
maxSurge = 0
case "":
// Rolling is the default and can be an empty string. If so, set the defaults
// (https://pkg.go.dev/k8s.io/api/apps/v1?tab=doc#RollingUpdateDeployment) and continue calculation.
Expand Down Expand Up @@ -69,32 +70,37 @@ func deploymentConfig(deploymentConfig openshiftAppsV1.DeploymentConfig) (*Resou
}

// docs say, that the absolute number is calculated by rounding down.
maxUnavailable, err := intstr.GetScaledValueFromIntOrPercent(&maxUnavailableValue, int(replicas), false)
maxUnavailableInt, err := intstr.GetScaledValueFromIntOrPercent(&maxUnavailableValue, int(replicas), false)
if err != nil {
return nil, err
}

if maxUnavailableInt < math.MinInt32 || maxUnavailableInt > math.MaxInt32 {
return nil, errors.New("maxUnavailableInt out of int32 boundaries")
}

maxUnavailable = int32(maxUnavailableInt)

Check failure on line 82 in internal/calc/deploymentConfig.go

View workflow job for this annotation

GitHub Actions / lint

G115: integer overflow conversion int -> int32 (gosec)

Check failure on line 82 in internal/calc/deploymentConfig.go

View workflow job for this annotation

GitHub Actions / lint

G115: integer overflow conversion int -> int32 (gosec)

// docs say, absolute number is calculated by rounding up.
maxSurge, err := intstr.GetScaledValueFromIntOrPercent(&maxSurgeValue, int(replicas), true)
maxSurgeInt, err := intstr.GetScaledValueFromIntOrPercent(&maxSurgeValue, int(replicas), true)
if err != nil {
return nil, err
}

// podOverhead is the number of pods which can run more during a deployment
podOverheadInt := maxSurge - maxUnavailable
if podOverheadInt > math.MaxInt32 || podOverheadInt < math.MinInt32 {
return nil, fmt.Errorf("deploymentConfig: %s maxSurge - maxUnavailable (%d-%d) was out of bounds for int32", deploymentConfig.Name, maxSurge, maxUnavailable)
if maxSurgeInt < math.MinInt32 || maxSurgeInt > math.MaxInt32 {
return nil, errors.New("maxSurgeInt out of int32 boundaries")
}
podOverhead = int32(podOverheadInt) //nolint:gosec,wsl // gosec doesn't understand that the int conversion is already guarded, wsl wants to group the assignment with the next block

resourceOverhead = (float64(podOverhead) / float64(replicas)) + 1
maxSurge = int32(maxSurgeInt)

Check failure on line 94 in internal/calc/deploymentConfig.go

View workflow job for this annotation

GitHub Actions / lint

G115: integer overflow conversion int -> int32 (gosec)

Check failure on line 94 in internal/calc/deploymentConfig.go

View workflow job for this annotation

GitHub Actions / lint

G115: integer overflow conversion int -> int32 (gosec)
default:
return nil, fmt.Errorf("deploymentConfig: %s deploymentConfig strategy %q is unknown", deploymentConfig.Name, strategy.Type)
}

_ = maxUnavailable // fix complaining compiler. will need this field in the future

podResources := podResources(&deploymentConfig.Spec.Template.Spec)
strategyResources := ConvertToResources(&deploymentConfig.Spec.Strategy.Resources)
newResources := podResources.Mul(float64(replicas)).Mul(resourceOverhead).Add(strategyResources)
newResources := podResources.Mul(float64(replicas + maxSurge)).Add(strategyResources)

resourceUsage := ResourceUsage{
Resources: newResources,
Expand All @@ -104,7 +110,7 @@ func deploymentConfig(deploymentConfig openshiftAppsV1.DeploymentConfig) (*Resou
Name: deploymentConfig.Name,
Replicas: replicas,
Strategy: string(strategy.Type),
MaxReplicas: replicas + podOverhead,
MaxReplicas: replicas + maxSurge,
},
}

Expand Down
10 changes: 5 additions & 5 deletions internal/calc/deploymentConfig_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ func TestDeploymentConfig(t *testing.T) {
{
name: "normal deploymentConfig",
deploymentConfig: normalDeploymentConfig,
cpuMin: resource.MustParse("2750m"),
cpuMax: resource.MustParse("5500m"),
memoryMin: resource.MustParse("22Gi"),
memoryMax: resource.MustParse("44Gi"),
cpuMin: resource.MustParse("3250m"),
cpuMax: resource.MustParse("6500m"),
memoryMin: resource.MustParse("26Gi"),
memoryMax: resource.MustParse("52Gi"),
replicas: 10,
maxReplicas: 11,
maxReplicas: 13,
strategy: openshiftAppsV1.DeploymentStrategyTypeRolling,
},
//TODO add more tests
Expand Down
30 changes: 15 additions & 15 deletions internal/calc/deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,23 @@ func TestDeployment(t *testing.T) {
{
name: "normal deployment",
deployment: normalDeployment,
cpuMin: resource.MustParse("2750m"),
cpuMax: resource.MustParse("5500m"),
memoryMin: resource.MustParse("22Gi"),
memoryMax: resource.MustParse("44Gi"),
cpuMin: resource.MustParse("3250m"),
cpuMax: resource.MustParse("6500m"),
memoryMin: resource.MustParse("26Gi"),
memoryMax: resource.MustParse("52Gi"),
replicas: 10,
maxReplicas: 11,
maxReplicas: 13,
strategy: appsv1.RollingUpdateDeploymentStrategyType,
},
{
name: "deployment without strategy",
deployment: deploymentWithoutStrategy,
cpuMin: resource.MustParse("2750m"),
cpuMax: resource.MustParse("11"),
memoryMin: resource.MustParse("22Gi"),
memoryMax: resource.MustParse("44Gi"),
cpuMin: resource.MustParse("3250m"),
cpuMax: resource.MustParse("13"),
memoryMin: resource.MustParse("26Gi"),
memoryMax: resource.MustParse("52Gi"),
replicas: 10,
maxReplicas: 11,
maxReplicas: 13,
strategy: appsv1.RollingUpdateDeploymentStrategyType,
},
{
Expand Down Expand Up @@ -78,12 +78,12 @@ func TestDeployment(t *testing.T) {
{
name: "deployment without max unavailable/surge values",
deployment: deploymentWithoutValues,
cpuMin: resource.MustParse("2750m"),
cpuMax: resource.MustParse("11"),
memoryMin: resource.MustParse("22Gi"),
memoryMax: resource.MustParse("44Gi"),
cpuMin: resource.MustParse("3250m"),
cpuMax: resource.MustParse("13"),
memoryMin: resource.MustParse("26Gi"),
memoryMax: resource.MustParse("52Gi"),
replicas: 10,
maxReplicas: 11,
maxReplicas: 13,
strategy: appsv1.RollingUpdateDeploymentStrategyType,
},
{
Expand Down

0 comments on commit 86c2709

Please sign in to comment.