Skip to content

Commit

Permalink
ignore high replica number for the first 30 min
Browse files Browse the repository at this point in the history
  • Loading branch information
sanposhiho committed Mar 7, 2024
1 parent 0b7f568 commit 71c8847
Show file tree
Hide file tree
Showing 3 changed files with 186 additions and 23 deletions.
77 changes: 54 additions & 23 deletions pkg/recommender/recommender.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,35 @@ func New(
}

func (s *Service) updateVPARecommendation(ctx context.Context, tortoise *v1beta3.Tortoise, hpa *v2.HorizontalPodAutoscaler, replicaNum int32, now time.Time) (*v1beta3.Tortoise, error) {
scaledUpBasedOnPreferredMaxReplicas := false
if hasHorizontal(tortoise) {
// Handle TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas condition first.
if replicaNum >= s.preferredMaxReplicas &&
// If the current replica number is equal to the maximumMaxReplica,
// increasing the resource request would not change the situation that the replica number is higher than preferredMaxReplicas.
*hpa.Spec.MinReplicas < replicaNum &&
features.Contains(s.featureFlags, features.VerticalScalingBasedOnPreferredMaxReplicas) &&
allowVerticalScalingBasedOnPreferredMaxReplicas(tortoise, now) {

c := utils.GetTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas)
if c == nil || // no condition yet
c.Status == v1.ConditionFalse {
// It's the first time to notice that the current replica number is bigger than the preferred max replica number.
// First 30min, we don't use VerticalScalingBasedOnPreferredMaxReplicas because this replica increase might be very temporal.
// So, here we just change the condition to True, but doesn't trigger scaledUpBasedOnPreferredMaxReplicas.
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionTrue, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is bigger than the preferred max replica number", now)
} else {
// We keep increasing the size until we hit the maxResourceSize.
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionTrue, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is bigger than the preferred max replica number", now)
scaledUpBasedOnPreferredMaxReplicas = true
}
}
if replicaNum < s.preferredMaxReplicas {
// Change TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas to False.
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionFalse, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is not bigger than the preferred max replica number", now)
}
}

logger := log.FromContext(ctx)
requestMap := map[string]map[corev1.ResourceName]resource.Quantity{}
for _, r := range tortoise.Status.Conditions.ContainerResourceRequests {
Expand Down Expand Up @@ -159,7 +188,7 @@ func (s *Service) updateVPARecommendation(ctx context.Context, tortoise *v1beta3
var newSize int64
var reason string
var err error
newSize, reason, tortoise, err = s.calculateBestNewSize(ctx, tortoise, p, r.ContainerName, recom, k, hpa, replicaNum, req, minAllocatedResourcesMap[r.ContainerName], now)
newSize, reason, err = s.calculateBestNewSize(ctx, tortoise, p, r.ContainerName, recom, k, hpa, replicaNum, req, minAllocatedResourcesMap[r.ContainerName], scaledUpBasedOnPreferredMaxReplicas, now)
if err != nil {
return tortoise, err
}
Expand All @@ -185,7 +214,7 @@ func (s *Service) updateVPARecommendation(ctx context.Context, tortoise *v1beta3
func allowVerticalScalingBasedOnPreferredMaxReplicas(tortoise *v1beta3.Tortoise, now time.Time) bool {
for _, c := range tortoise.Status.Conditions.TortoiseConditions {
if c.Type == v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas && c.Status == v1.ConditionTrue {
if c.LastTransitionTime.Add(30*time.Minute).After(now) && !c.LastTransitionTime.Time.Equal(now) {
if c.LastTransitionTime.Add(30 * time.Minute).After(now) {
// If the last transition time is within 30 minutes,
// we don't allow the vertical scaling based on the preferred max replicas.
return false
Expand All @@ -209,11 +238,12 @@ func (s *Service) calculateBestNewSize(
replicaNum int32,
resourceRequest resource.Quantity,
minAllocatedResources corev1.ResourceList,
scaledUpBasedOnPreferredMaxReplicas bool,
now time.Time,
) (int64, string, *v1beta3.Tortoise, error) {
) (int64, string, error) {
if p == v1beta3.AutoscalingTypeOff {
// Just keep the current resource request.
return resourceRequest.MilliValue(), "", tortoise, nil
return resourceRequest.MilliValue(), "The autoscaling policy for this resource is Off", nil
}

if p == v1beta3.AutoscalingTypeVertical {
Expand All @@ -222,7 +252,7 @@ func (s *Service) calculateBestNewSize(
// We always follow the recommendation from VPA.
newSize := float64(recommendedResourceRequest.MilliValue()) * (1 + s.bufferRatioOnVerticalResource)
jastified := s.justifyNewSize(resourceRequest.MilliValue(), int64(newSize), k, minAllocatedResources, containerName)
return jastified, fmt.Sprintf("change %v request (%v) (%v → %v) based on VPA suggestion", k, containerName, resourceRequest.MilliValue(), jastified), tortoise, nil
return jastified, fmt.Sprintf("change %v request (%v) (%v → %v) based on VPA suggestion", k, containerName, resourceRequest.MilliValue(), jastified), nil
}

// p == v1beta3.AutoscalingTypeHorizontal
Expand All @@ -231,22 +261,12 @@ func (s *Service) calculateBestNewSize(
// make the container size bigger (just multiple by 1.3) so that the replica number will be descreased.
//
// Here also covers the scenario where the current replica num hits MaximumMaxReplicas.
if replicaNum >= s.preferredMaxReplicas &&
// If the current replica number is equal to the maximumMaxReplica, increasing the resource request would not change the situation that the replica number is higher than preferredMaxReplicas.
*hpa.Spec.MinReplicas < replicaNum &&
features.Contains(s.featureFlags, features.VerticalScalingBasedOnPreferredMaxReplicas) &&
allowVerticalScalingBasedOnPreferredMaxReplicas(tortoise, now) {
if scaledUpBasedOnPreferredMaxReplicas {
// We keep increasing the size until we hit the maxResourceSize.
newSize := int64(float64(resourceRequest.MilliValue()) * 1.3)
jastifiedNewSize := s.justifyNewSize(resourceRequest.MilliValue(), newSize, k, minAllocatedResources, containerName)
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionTrue, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is bigger than the preferred max replica number", now)
msg := fmt.Sprintf("the current number of replicas is bigger than the preferred max replica number in this cluster (%v), so make %v request (%s) bigger (%v → %v)", s.preferredMaxReplicas, k, containerName, resourceRequest.MilliValue(), jastifiedNewSize)
return jastifiedNewSize, msg, tortoise, nil
}

if replicaNum < s.preferredMaxReplicas {
// Change TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas to False.
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionFalse, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is not bigger than the preferred max replica number", now)
msg := fmt.Sprintf("the current number of replicas (%v) is bigger than the preferred max replica number in this cluster (%v), so make %v request (%s) bigger (%v → %v)", replicaNum, s.preferredMaxReplicas, k, containerName, resourceRequest.MilliValue(), jastifiedNewSize)
return jastifiedNewSize, msg, nil
}

if replicaNum <= s.minimumMinReplicas {
Expand All @@ -263,7 +283,7 @@ func (s *Service) calculateBestNewSize(
}
jastified := s.justifyNewSize(resourceRequest.MilliValue(), newSize, k, minAllocatedResources, containerName)

return jastified, fmt.Sprintf("the current number of replicas is equal or smaller than the minimum min replica number in this cluster (%v), so make %v request (%v) smaller (%v → %v) based on VPA suggestion", s.minimumMinReplicas, k, containerName, resourceRequest.MilliValue(), jastified), tortoise, nil
return jastified, fmt.Sprintf("the current number of replicas is equal or smaller than the minimum min replica number in this cluster (%v), so make %v request (%v) smaller (%v → %v) based on VPA suggestion", s.minimumMinReplicas, k, containerName, resourceRequest.MilliValue(), jastified), nil
}

// The replica number is OK based on minimumMinReplicas and preferredMaxReplicas.
Expand All @@ -273,12 +293,12 @@ func (s *Service) calculateBestNewSize(
// Also, if the current replica number is equal to the minReplicas,
// we don't change the resource request based on the current resource utilization
// because even if the resource utilization is low, it's due to the minReplicas.
return s.justifyNewSize(resourceRequest.MilliValue(), resourceRequest.MilliValue(), k, minAllocatedResources, containerName), "nothing to do", tortoise, nil
return s.justifyNewSize(resourceRequest.MilliValue(), resourceRequest.MilliValue(), k, minAllocatedResources, containerName), "nothing to do", nil
}

targetUtilizationValue, err := hpaservice.GetHPATargetValue(ctx, hpa, containerName, k)
if err != nil {
return 0, "", tortoise, fmt.Errorf("get the target value from HPA: %w", err)
return 0, "", fmt.Errorf("get the target value from HPA: %w", err)
}

upperUtilization := (float64(recommendedResourceRequest.MilliValue()) / float64(resourceRequest.MilliValue())) * 100
Expand All @@ -293,12 +313,23 @@ func (s *Service) calculateBestNewSize(
// so that the upper usage will be the target usage.
newSize := int64(float64(recommendedResourceRequest.MilliValue()) * 100.0 / float64(targetUtilizationValue))
jastified := s.justifyNewSize(resourceRequest.MilliValue(), newSize, k, minAllocatedResources, containerName)
return jastified, fmt.Sprintf("the current resource usage (%v, %v%%) is too small and it's due to unbalanced container size, so make %v request (%v) smaller (%v → %v) based on VPA's recommendation and HPA target utilization %v%%", recommendedResourceRequest.MilliValue(), int(upperUtilization), k, containerName, resourceRequest.MilliValue(), jastified, targetUtilizationValue), tortoise, nil
return jastified, fmt.Sprintf("the current resource usage (%v, %v%%) is too small and it's due to unbalanced container size, so make %v request (%v) smaller (%v → %v) based on VPA's recommendation and HPA target utilization %v%%", recommendedResourceRequest.MilliValue(), int(upperUtilization), k, containerName, resourceRequest.MilliValue(), jastified, targetUtilizationValue), nil
}

// Just keep the current resource request.
// Only do justification.
return s.justifyNewSize(resourceRequest.MilliValue(), resourceRequest.MilliValue(), k, minAllocatedResources, containerName), "nothing to do", tortoise, nil
return s.justifyNewSize(resourceRequest.MilliValue(), resourceRequest.MilliValue(), k, minAllocatedResources, containerName), "nothing to do", nil
}

func hasHorizontal(tortoise *v1beta3.Tortoise) bool {
for _, r := range tortoise.Status.AutoscalingPolicy {
for _, p := range r.Policy {
if p == v1beta3.AutoscalingTypeHorizontal {
return true
}
}
}
return false
}

func hasMultipleHorizontal(t *v1beta3.Tortoise) bool {
Expand Down
122 changes: 122 additions & 0 deletions pkg/recommender/recommender_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1613,6 +1613,14 @@ func TestService_UpdateVPARecommendation(t *testing.T) {
}).AddResourcePolicy(v1beta3.ContainerResourcePolicy{
ContainerName: "test-container",
MinAllocatedResources: createResourceList("100m", "100Mi"),
}).AddTortoiseConditions(v1beta3.TortoiseCondition{
Type: v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas,
Status: corev1.ConditionTrue,
Reason: "ScaledUpBasedOnPreferredMaxReplicas",
Message: "the current number of replicas is bigger than the preferred max replica number",
// not recently updated.
LastTransitionTime: metav1.NewTime(now.Add(-31 * time.Minute)),
LastUpdateTime: metav1.NewTime(now.Add(-31 * time.Minute)),
}).AddContainerRecommendationFromVPA(
v1beta3.ContainerRecommendationFromVPA{
ContainerName: "test-container",
Expand Down Expand Up @@ -1674,6 +1682,112 @@ func TestService_UpdateVPARecommendation(t *testing.T) {
}).Build(),
wantErr: false,
},
{
name: "all horizontal: the temporal replica count being above preferredMaxReplicas doesn't trigger the resource increase of VerticalScalingBasedOnPreferredMaxReplicas",
fields: fields{
preferredMaxReplicas: 3,
maxCPU: "1000m",
maxMemory: "1Gi",
features: []features.FeatureFlag{features.VerticalScalingBasedOnPreferredMaxReplicas},
},
args: args{
hpa: &v2.HorizontalPodAutoscaler{
Spec: v2.HorizontalPodAutoscalerSpec{
MinReplicas: ptr.To[int32](1),
Metrics: []v2.MetricSpec{
{
Type: v2.ContainerResourceMetricSourceType,
ContainerResource: &v2.ContainerResourceMetricSource{
Name: corev1.ResourceCPU,
Target: v2.MetricTarget{
AverageUtilization: ptr.To[int32](80),
},
Container: "test-container",
},
},
{
Type: v2.ContainerResourceMetricSourceType,
ContainerResource: &v2.ContainerResourceMetricSource{
Name: corev1.ResourceMemory,
Target: v2.MetricTarget{
AverageUtilization: ptr.To[int32](80),
},
Container: "test-container",
},
},
},
},
},
tortoise: utils.NewTortoiseBuilder().AddAutoscalingPolicy(v1beta3.ContainerAutoscalingPolicy{
ContainerName: "test-container",
Policy: map[corev1.ResourceName]v1beta3.AutoscalingType{
corev1.ResourceCPU: v1beta3.AutoscalingTypeHorizontal,
corev1.ResourceMemory: v1beta3.AutoscalingTypeHorizontal,
},
}).AddResourcePolicy(v1beta3.ContainerResourcePolicy{
ContainerName: "test-container",
MinAllocatedResources: createResourceList("100m", "100Mi"),
}).AddContainerRecommendationFromVPA(
v1beta3.ContainerRecommendationFromVPA{
ContainerName: "test-container",
Recommendation: map[corev1.ResourceName]v1beta3.ResourceQuantity{
corev1.ResourceCPU: {
Quantity: resource.MustParse("500m"),
},
corev1.ResourceMemory: {
Quantity: resource.MustParse("500Mi"),
},
},
},
).AddContainerResourceRequests(v1beta3.ContainerResourceRequests{
ContainerName: "test-container",
Resource: createResourceList("500m", "500Mi"),
}).Build(),
replicaNum: 4,
},
want: utils.NewTortoiseBuilder().AddAutoscalingPolicy(v1beta3.ContainerAutoscalingPolicy{
ContainerName: "test-container",
Policy: map[corev1.ResourceName]v1beta3.AutoscalingType{
corev1.ResourceCPU: v1beta3.AutoscalingTypeHorizontal,
corev1.ResourceMemory: v1beta3.AutoscalingTypeHorizontal,
},
}).AddResourcePolicy(v1beta3.ContainerResourcePolicy{
ContainerName: "test-container",
MinAllocatedResources: createResourceList("100m", "100Mi"),
}).AddContainerRecommendationFromVPA(
v1beta3.ContainerRecommendationFromVPA{
ContainerName: "test-container",
Recommendation: map[corev1.ResourceName]v1beta3.ResourceQuantity{
corev1.ResourceCPU: {
Quantity: resource.MustParse("500m"),
},
corev1.ResourceMemory: {
Quantity: resource.MustParse("500Mi"),
},
},
},
).AddTortoiseConditions(v1beta3.TortoiseCondition{
Type: v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas,
Status: corev1.ConditionTrue,
Reason: "ScaledUpBasedOnPreferredMaxReplicas",
Message: "the current number of replicas is bigger than the preferred max replica number",
LastTransitionTime: metav1.NewTime(now),
LastUpdateTime: metav1.NewTime(now),
}).AddContainerResourceRequests(v1beta3.ContainerResourceRequests{
ContainerName: "test-container",
Resource: createResourceList("500m", "500Mi"),
}).SetRecommendations(v1beta3.Recommendations{
Vertical: v1beta3.VerticalRecommendations{
ContainerResourceRecommendation: []v1beta3.RecommendedContainerResources{
{
ContainerName: "test-container",
RecommendedResource: createResourceList("500m", "500Mi"), // current
},
},
},
}).Build(),
wantErr: false,
},
{
name: "all horizontal: replica count above preferredMaxReplicas, but we recently increase the resource: don't increase the resources",
fields: fields{
Expand Down Expand Up @@ -2011,6 +2125,14 @@ func TestService_UpdateVPARecommendation(t *testing.T) {
}).AddResourcePolicy(v1beta3.ContainerResourcePolicy{
ContainerName: "test-container",
MinAllocatedResources: createResourceList("100m", "100Mi"),
}).AddTortoiseConditions(v1beta3.TortoiseCondition{
Type: v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas,
Status: corev1.ConditionTrue,
Reason: "ScaledUpBasedOnPreferredMaxReplicas",
Message: "the current number of replicas is bigger than the preferred max replica number",
// not recently updated.
LastTransitionTime: metav1.NewTime(now.Add(-31 * time.Minute)),
LastUpdateTime: metav1.NewTime(now.Add(-31 * time.Minute)),
}).AddContainerRecommendationFromVPA(
v1beta3.ContainerRecommendationFromVPA{
ContainerName: "test-container",
Expand Down
10 changes: 10 additions & 0 deletions pkg/utils/tortoise.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ func ChangeTortoiseCondition(t *v1beta3.Tortoise, conditionType v1beta3.Tortoise
return t
}

func GetTortoiseCondition(t *v1beta3.Tortoise, conditionType v1beta3.TortoiseConditionType) *v1beta3.TortoiseCondition {
for i := range t.Status.Conditions.TortoiseConditions {
if t.Status.Conditions.TortoiseConditions[i].Type == conditionType {
return &t.Status.Conditions.TortoiseConditions[i]
}
}

return nil
}

func ChangeTortoiseContainerResourcePhase(tortoise *v1beta3.Tortoise, containerName string, rn corev1.ResourceName, now time.Time, phase v1beta3.ContainerResourcePhase) *v1beta3.Tortoise {
found := false
for i, p := range tortoise.Status.ContainerResourcePhases {
Expand Down

0 comments on commit 71c8847

Please sign in to comment.