Skip to content

Commit

Permalink
fix: only include initContainer for rolling out pods
Browse files Browse the repository at this point in the history
For normally running pods init containers can be ignored. During init phase of a pod, so before it becomes ready, either the init or normal containers are running. Use the highest value of both for each resource. Use these `MaxResources` for all non-ready pods.
Refs: postfinance#17
  • Loading branch information
druppelt committed Sep 14, 2024
1 parent 7b56953 commit 9496499
Show file tree
Hide file tree
Showing 11 changed files with 305 additions and 47 deletions.
59 changes: 48 additions & 11 deletions internal/calc/calc.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,15 @@ type Resources struct {
MemoryMax resource.Quantity
}

// PodResources contain the sum of the resources required by the initContainer, the normal containers
// and the maximum the pod can require at any time for each resource quantity.
// In other words, max(Containers.MinCPU, InitContainers.MinCPU), max(Containers.MaxCPU, InitContainers.MaxCPU), etc.
type PodResources struct {
Containers Resources
InitContainers Resources
MaxResources Resources
}

// ConvertToResources converts a kubernetes/openshift ResourceRequirements struct to a Resources struct
func ConvertToResources(req *v1.ResourceRequirements) Resources {
return Resources{
Expand All @@ -89,6 +98,11 @@ func (r Resources) Add(y Resources) Resources {
return r
}

// MulInt32 multiplies all resource values by the given multiplier.
func (r Resources) MulInt32(y int32) Resources {
return r.Mul(float64(y))
}

// Mul multiplies all resource values by the given multiplier.
func (r Resources) Mul(y float64) Resources {
// TODO check if overflow issues due to milli instead of value are to be expected
Expand All @@ -100,33 +114,47 @@ func (r Resources) Mul(y float64) Resources {
return r
}

func podResources(podSpec *v1.PodSpec) (r *Resources) {
r = new(Resources)
func calcPodResources(podSpec *v1.PodSpec) (r *PodResources) {
r = new(PodResources)

for i := range podSpec.Containers {
container := podSpec.Containers[i]

r.CPUMin.Add(*container.Resources.Requests.Cpu())
r.CPUMax.Add(*container.Resources.Limits.Cpu())
r.MemoryMin.Add(*container.Resources.Requests.Memory())
r.MemoryMax.Add(*container.Resources.Limits.Memory())
r.Containers.CPUMin.Add(*container.Resources.Requests.Cpu())
r.Containers.CPUMax.Add(*container.Resources.Limits.Cpu())
r.Containers.MemoryMin.Add(*container.Resources.Requests.Memory())
r.Containers.MemoryMax.Add(*container.Resources.Limits.Memory())
}

for i := range podSpec.InitContainers {
container := podSpec.InitContainers[i]

r.CPUMin.Add(*container.Resources.Requests.Cpu())
r.CPUMax.Add(*container.Resources.Limits.Cpu())
r.MemoryMin.Add(*container.Resources.Requests.Memory())
r.MemoryMax.Add(*container.Resources.Limits.Memory())
r.InitContainers.CPUMin.Add(*container.Resources.Requests.Cpu())
r.InitContainers.CPUMax.Add(*container.Resources.Limits.Cpu())
r.InitContainers.MemoryMin.Add(*container.Resources.Requests.Memory())
r.InitContainers.MemoryMax.Add(*container.Resources.Limits.Memory())
}

r.MaxResources.CPUMin = maxQuantity(r.Containers.CPUMin, r.InitContainers.CPUMin)
r.MaxResources.CPUMax = maxQuantity(r.Containers.CPUMax, r.InitContainers.CPUMax)
r.MaxResources.MemoryMin = maxQuantity(r.Containers.MemoryMin, r.InitContainers.MemoryMin)
r.MaxResources.MemoryMax = maxQuantity(r.Containers.MemoryMax, r.InitContainers.MemoryMax)

return
}

func maxQuantity(q1, q2 resource.Quantity) resource.Quantity {
if q1.MilliValue() > q2.MilliValue() {
return q1
}

return q2
}

// ResourceQuotaFromYaml decodes a single yaml document into a k8s object. Then performs a type assertion
// on the object and calculates the resource needs of it.
// Currently supported:
// * apps.openshift.io/v1 - DeploymentConfig
// * apps/v1 - Deployment
// * apps/v1 - StatefulSet
// * apps/v1 - DaemonSet
Expand Down Expand Up @@ -189,7 +217,16 @@ func ResourceQuotaFromYaml(yamlData []byte) (*ResourceUsage, error) {

return usage, nil
case *appsv1.StatefulSet:
return statefulSet(*obj), nil
usage, err := statefulSet(*obj)
if err != nil {
return nil, CalculationError{
Version: gvk.Version,
Kind: gvk.Kind,
err: err,
}
}

return usage, nil
case *appsv1.DaemonSet:
return daemonSet(*obj), nil
case *batchV1.Job:
Expand Down
135 changes: 134 additions & 1 deletion internal/calc/calc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ package calc

import (
"errors"
"k8s.io/apimachinery/pkg/api/resource"
"testing"

"k8s.io/apimachinery/pkg/api/resource"

"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -549,6 +550,138 @@ spec:
memory: 2Gi
terminationGracePeriodSeconds: 30`

var multiContainerPod = `
---
apiVersion: v1
kind: Pod
metadata:
labels:
app: mypod
name: mypod
spec:
containers:
- image: mypod
imagePullPolicy: Always
name: myapp
resources:
limits:
cpu: "1"
memory: 4Gi
requests:
cpu: 250m
memory: 2Gi
- image: mypod2
imagePullPolicy: Always
name: myapp2
resources:
limits:
cpu: "750m"
memory: 3Gi
requests:
cpu: 150m
memory: 1Gi
terminationGracePeriodSeconds: 30`

var initContainerPod = `
---
apiVersion: v1
kind: Pod
metadata:
labels:
app: mypod
name: mypod
spec:
initContainers:
- image: mypod
imagePullPolicy: Always
name: myapp
resources:
limits:
cpu: "500m"
memory: 1Gi
requests:
cpu: 250m
memory: 1Gi
containers:
- image: mypod
imagePullPolicy: Always
name: myapp
resources:
limits:
cpu: "1"
memory: 4Gi
requests:
cpu: 250m
memory: 2Gi
terminationGracePeriodSeconds: 30`

var bigInitContainerPod = `
---
apiVersion: v1
kind: Pod
metadata:
labels:
app: mypod
name: mypod
spec:
initContainers:
- image: mypod
imagePullPolicy: Always
name: myapp
resources:
limits:
cpu: "2"
memory: 5Gi
requests:
cpu: 1
memory: 3Gi
containers:
- image: mypod
imagePullPolicy: Always
name: myapp
resources:
limits:
cpu: "1"
memory: 4Gi
requests:
cpu: 250m
memory: 2Gi
terminationGracePeriodSeconds: 30`

// the idea here is that for some resources init is bigger and for other the normal container is bigger
var mediumInitContainerPod = `
---
apiVersion: v1
kind: Pod
metadata:
labels:
app: mypod
name: mypod
spec:
initContainers:
- image: mypod
imagePullPolicy: Always
name: myapp
resources:
limits:
cpu: "2"
memory: 3Gi
requests:
cpu: 100m
memory: 3Gi
containers:
- image: mypod
imagePullPolicy: Always
name: myapp
resources:
limits:
cpu: "1"
memory: 4Gi
requests:
cpu: 250m
memory: 2Gi
terminationGracePeriodSeconds: 30`

var normalDaemonSet = `
apiVersion: apps/v1
kind: DaemonSet
Expand Down
4 changes: 2 additions & 2 deletions internal/calc/cronjob.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ package calc
import batchV1 "k8s.io/api/batch/v1"

func cronjob(cronjob batchV1.CronJob) *ResourceUsage {
podResources := podResources(&cronjob.Spec.JobTemplate.Spec.Template.Spec)
podResources := calcPodResources(&cronjob.Spec.JobTemplate.Spec.Template.Spec).MaxResources

resourceUsage := ResourceUsage{
Resources: *podResources,
Resources: podResources,
Details: Details{
Version: cronjob.APIVersion,
Kind: cronjob.Kind,
Expand Down
4 changes: 2 additions & 2 deletions internal/calc/daemonset.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ import (
)

func daemonSet(dSet appsv1.DaemonSet) *ResourceUsage {
podResources := podResources(&dSet.Spec.Template.Spec)
podResources := calcPodResources(&dSet.Spec.Template.Spec).MaxResources

resourceUsage := ResourceUsage{
Resources: *podResources,
Resources: podResources,
Details: Details{
Version: dSet.APIVersion,
Kind: dSet.Kind,
Expand Down
16 changes: 10 additions & 6 deletions internal/calc/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@ import (
// strategy are taken into account.
func deployment(deployment appsv1.Deployment) (*ResourceUsage, error) { //nolint:funlen // disable function length linting
var (
maxUnavailable int32 // max amount of unavailable pods during a deployment
maxSurge int32 // max amount of pods that are allowed in addition to replicas during deployment
maxUnavailable int32 // max amount of unavailable pods during a deployment
maxSurge int32 // max amount of pods that are allowed in addition to replicas during deployment
maxNonReadyPodCount int32 // max pods that are not ready during deployment,
// so either running init containers or already running normal containers,
// but probes haven't succeeded yet
)

replicas := deployment.Spec.Replicas
Expand All @@ -37,6 +40,7 @@ func deployment(deployment appsv1.Deployment) (*ResourceUsage, error) { //nolint
switch strategy.Type {
case appsv1.RecreateDeploymentStrategyType:
// kill all existing pods, then recreate new ones at once -> no overhead on recreate
maxNonReadyPodCount = *replicas
maxUnavailable = *replicas
maxSurge = 0
case "":
Expand Down Expand Up @@ -93,14 +97,14 @@ func deployment(deployment appsv1.Deployment) (*ResourceUsage, error) { //nolint

maxSurge = int32(maxSurgeInt)

// maxNonReadyPodCount is the max number of pods potentially in init phase during a deployment
maxNonReadyPodCount = maxSurge + maxUnavailable
default:
return nil, fmt.Errorf("deployment: %s deployment strategy %q is unknown", deployment.Name, strategy.Type)
}

_ = maxUnavailable // fix complaining compiler. will need this field in the future

podResources := podResources(&deployment.Spec.Template.Spec)
newResources := podResources.Mul(float64(*replicas + maxSurge))
podResources := calcPodResources(&deployment.Spec.Template.Spec)
newResources := podResources.Containers.MulInt32(*replicas - maxUnavailable).Add(podResources.MaxResources.MulInt32(maxNonReadyPodCount))

resourceUsage := ResourceUsage{
Resources: newResources,
Expand Down
17 changes: 11 additions & 6 deletions internal/calc/deploymentConfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@ import (
// strategy are taken into account.
func deploymentConfig(deploymentConfig openshiftAppsV1.DeploymentConfig) (*ResourceUsage, error) { //nolint:funlen // disable function length linting
var (
maxUnavailable int32 // max amount of unavailable pods during a deployment
maxSurge int32 // max amount of pods that are allowed in addition to replicas during deployment
maxUnavailable int32 // max amount of unavailable pods during a deployment
maxSurge int32 // max amount of pods that are allowed in addition to replicas during deployment
maxNonReadyPodCount int32 // max pods that are not ready during deployment,
// so either running init containers or already running normal containers,
// but probes haven't succeeded yet
)

replicas := deploymentConfig.Spec.Replicas
Expand All @@ -37,6 +40,7 @@ func deploymentConfig(deploymentConfig openshiftAppsV1.DeploymentConfig) (*Resou
switch strategy.Type {
case openshiftAppsV1.DeploymentStrategyTypeRecreate:
// kill all existing pods, then recreate new ones at once -> no overhead on recreate
maxNonReadyPodCount = replicas
maxUnavailable = replicas
maxSurge = 0
case "":
Expand Down Expand Up @@ -92,15 +96,16 @@ func deploymentConfig(deploymentConfig openshiftAppsV1.DeploymentConfig) (*Resou
}

maxSurge = int32(maxSurgeInt)

// maxNonReadyPodCount is the max number of pods potentially in init phase during a deployment
maxNonReadyPodCount = maxSurge + maxUnavailable
default:
return nil, fmt.Errorf("deploymentConfig: %s deploymentConfig strategy %q is unknown", deploymentConfig.Name, strategy.Type)
}

_ = maxUnavailable // fix complaining compiler. will need this field in the future

podResources := podResources(&deploymentConfig.Spec.Template.Spec)
podResources := calcPodResources(&deploymentConfig.Spec.Template.Spec)
strategyResources := ConvertToResources(&deploymentConfig.Spec.Strategy.Resources)
newResources := podResources.Mul(float64(replicas + maxSurge)).Add(strategyResources)
newResources := podResources.Containers.MulInt32(replicas - maxUnavailable).Add(podResources.MaxResources.MulInt32(maxNonReadyPodCount)).Add(strategyResources)

resourceUsage := ResourceUsage{
Resources: newResources,
Expand Down
14 changes: 6 additions & 8 deletions internal/calc/deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,12 @@ func TestDeployment(t *testing.T) {
strategy: appsv1.RollingUpdateDeploymentStrategyType,
},
{
name: "deployment with init container(s)",
deployment: initContainerDeployment,
// TODO the expectation shouldn't be for the init container resources to be considered for all containers,
// just the amount that can be in non-ready state simultaneously
cpuMin: resource.MustParse("1200m"),
cpuMax: resource.MustParse("4400m"),
memoryMin: resource.MustParse("8592Mi"),
memoryMax: resource.MustParse("17184Mi"),
name: "deployment with init container(s)",
deployment: initContainerDeployment,
cpuMin: resource.MustParse("1000m"),
cpuMax: resource.MustParse("4000m"),
memoryMin: resource.MustParse("8Gi"),
memoryMax: resource.MustParse("16Gi"),
replicas: 3,
maxReplicas: 4,
strategy: appsv1.RollingUpdateDeploymentStrategyType,
Expand Down
4 changes: 2 additions & 2 deletions internal/calc/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ package calc
import batchV1 "k8s.io/api/batch/v1"

func job(job batchV1.Job) *ResourceUsage {
podResources := podResources(&job.Spec.Template.Spec)
podResources := calcPodResources(&job.Spec.Template.Spec).MaxResources

resourceUsage := ResourceUsage{
Resources: *podResources,
Resources: podResources,
Details: Details{
Version: job.APIVersion,
Kind: job.Kind,
Expand Down
Loading

0 comments on commit 9496499

Please sign in to comment.