Skip to content

Commit

Permalink
Merge branch 'main' of github.com:mercari/tortoise into scheduledscaling
Browse files Browse the repository at this point in the history
  • Loading branch information
randytqwjp committed Mar 5, 2024
2 parents 29ed82d + 09e8828 commit 57b26db
Show file tree
Hide file tree
Showing 49 changed files with 1,272 additions and 503 deletions.
2 changes: 1 addition & 1 deletion api/autoscaling/v2/webhook_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ var _ = BeforeSuite(func() {
eventRecorder := mgr.GetEventRecorderFor("tortoise-controller")
tortoiseService, err := tortoise.New(mgr.GetClient(), eventRecorder, config.RangeOfMinMaxReplicasRecommendationHours, config.TimeZone, config.TortoiseUpdateInterval, config.GatheringDataPeriodType)
Expect(err).NotTo(HaveOccurred())
hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, 100, time.Hour, 1000, 10000, "")
hpaService, err := hpa.New(mgr.GetClient(), eventRecorder, config.ReplicaReductionFactor, config.MaximumTargetResourceUtilization, 100, time.Hour, 1000, 10000, 3, "")
Expect(err).NotTo(HaveOccurred())

hpaWebhook := New(tortoiseService, hpaService)
Expand Down
16 changes: 15 additions & 1 deletion api/v1beta3/tortoise_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,20 +217,34 @@ type TortoisePhase string
const (
// TortoisePhaseInitializing means tortoise is just created and initializing some components (HPA and VPA),
// and wait for those components to be ready.
// Possible flow: (none) → Initializing
TortoisePhaseInitializing TortoisePhase = "Initializing"
// TortoisePhaseGatheringData means tortoise is now gathering data and cannot make the accurate recommendations.
// TortoisePhaseGatheringData means tortoise is now gathering data for MinReplicas/MaxReplicas
// and cannot make the accurate recommendations.
// Possible flow: Initializing → GatheringData
TortoisePhaseGatheringData TortoisePhase = "GatheringData"
// TortoisePhaseWorking means tortoise is making the recommendations,
// and applying the recommendation values.
// Possible flow:
// - GatheringData → Working (when all the data is ready)
// - PartlyWorking → Working (when all the data is ready)
// - BackToNormal → Working (minReplica goes back to the normal number)
TortoisePhaseWorking TortoisePhase = "Working"
// TortoisePhasePartlyWorking means tortoise has maxReplicas and minReplicas recommendations ready,
// and applying the recommendation values.
// But, some of the resources are not scaled due to some reasons. (probably still gathering data)
// Possible flow:
// - GatheringData → PartlyWorking (only some of resources are ready)
// - Working → PartlyWorking (autoscaling policy is changed)
TortoisePhasePartlyWorking TortoisePhase = "PartlyWorking"
// TortoisePhaseEmergency means tortoise is in the emergency mode.
//
// Possible flow:
// - Working → Emergency
TortoisePhaseEmergency TortoisePhase = "Emergency"
// TortoisePhaseBackToNormal means tortoise was in the emergency mode, and now it's coming back to the normal operation.
// During TortoisePhaseBackToNormal, the number of replicas of workloads are gradually reduced to the usual value.
// - Emergency → BackToNormal
TortoisePhaseBackToNormal TortoisePhase = "BackToNormal"
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
metadata:
annotations:
tortoise.autoscaling.mercari.com/managed-by-tortoise: "true"
name: tortoise-hpa-mercari
namespace: default
spec:
Expand All @@ -18,7 +16,7 @@ spec:
value: 100
selectPolicy: Max
stabilizationWindowSeconds: 0
maxReplicas: 20
maxReplicas: 10000
metrics:
- containerResource:
container: app
Expand All @@ -27,7 +25,7 @@ spec:
averageUtilization: 70
type: Utilization
type: ContainerResource
minReplicas: 5
minReplicas: 3
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,6 @@ status:
reason: ScaledUpBasedOnPreferredMaxReplicas
status: "False"
type: ScaledUpBasedOnPreferredMaxReplicas
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
message: HPA target utilization is updated
reason: HPATargetUtilizationUpdated
status: "True"
type: HPATargetUtilizationUpdated
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
message: The recommendation is provided
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,4 @@ spec:
requests:
cpu: "10"
memory: 10Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ spec:
strategy: {}
template:
metadata:
annotations:
kubectl.kubernetes.io/restartedAt: "2023-01-01T00:00:00Z"
creationTimestamp: null
labels:
app: mercari
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ status:
- containerName: app
resource:
cpu: "4"
memory: 4Gi
memory: 3Gi
tortoiseConditions:
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
Expand All @@ -45,6 +45,11 @@ status:
reason: ScaledUpBasedOnPreferredMaxReplicas
status: "False"
type: ScaledUpBasedOnPreferredMaxReplicas
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
message: The recommendation is provided
status: "True"
type: VerticalRecommendationUpdated
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
status: "False"
Expand Down Expand Up @@ -90,4 +95,4 @@ status:
verticalPodAutoscalers:
- name: tortoise-monitor-mercari
role: Monitor
tortoisePhase: GatheringData
tortoisePhase: PartlyWorking
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
value: 100
selectPolicy: Max
stabilizationWindowSeconds: 0
maxReplicas: 20
maxReplicas: 100
metrics:
- external:
metric:
Expand All @@ -34,7 +34,7 @@ spec:
averageUtilization: 50
type: Utilization
type: ContainerResource
minReplicas: 5
minReplicas: 1
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,6 @@ status:
reason: ScaledUpBasedOnPreferredMaxReplicas
status: "False"
type: ScaledUpBasedOnPreferredMaxReplicas
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
message: HPA target utilization is updated
reason: HPATargetUtilizationUpdated
status: "True"
type: HPATargetUtilizationUpdated
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
message: The recommendation is provided
Expand Down Expand Up @@ -102,4 +96,4 @@ status:
verticalPodAutoscalers:
- name: tortoise-monitor-mercari
role: Monitor
tortoisePhase: Working
tortoisePhase: PartlyWorking
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
metadata:
annotations:
tortoise.autoscaling.mercari.com/managed-by-tortoise: "true"
name: tortoise-monitor-mercari
namespace: default
spec:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
value: 100
selectPolicy: Max
stabilizationWindowSeconds: 0
maxReplicas: 20
maxReplicas: 100
metrics:
- containerResource:
container: app
Expand All @@ -27,7 +27,7 @@ spec:
averageUtilization: 50
type: Utilization
type: ContainerResource
minReplicas: 5
minReplicas: 1
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,6 @@ status:
reason: ScaledUpBasedOnPreferredMaxReplicas
status: "False"
type: ScaledUpBasedOnPreferredMaxReplicas
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
message: HPA target utilization is updated
reason: HPATargetUtilizationUpdated
status: "True"
type: HPATargetUtilizationUpdated
- lastTransitionTime: "2023-01-01T00:00:00Z"
lastUpdateTime: "2023-01-01T00:00:00Z"
message: The recommendation is provided
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ spec:
requests:
cpu: "4"
memory: 4Gi
status:
replicas: 10
16 changes: 11 additions & 5 deletions controllers/tortoise_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,13 @@ func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_
logger.Error(err, "failed to get deployment", "tortoise", req.NamespacedName)
return ctrl.Result{}, err
}
currentReplicaNum := dm.Status.Replicas
if dm.Spec.Replicas == nil {
logger.Error(nil, "the deployment doesn't have the number of replicas and tortoise cannot calculate the recommendation", "tortoise", req.NamespacedName, "deployment", klog.KObj(dm))
return ctrl.Result{}, nil

}

currentDesiredReplicaNum := *dm.Spec.Replicas // Use the desired replica number.

if tortoise.Spec.UpdateMode == autoscalingv1beta3.UpdateModeOff /* When Off, ContainerResourceRequests should be reset */ ||
tortoise.Status.Conditions.ContainerResourceRequests == nil /* The first reconciliation */ {
Expand All @@ -191,7 +197,7 @@ func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_
if tortoise.Status.TortoisePhase == autoscalingv1beta3.TortoisePhaseInitializing {
logger.Info("initializing tortoise", "tortoise", req.NamespacedName)
// need to initialize HPA and VPA.
if err := r.initializeVPAAndHPA(ctx, tortoise, currentReplicaNum, now); err != nil {
if err := r.initializeVPAAndHPA(ctx, tortoise, currentDesiredReplicaNum, now); err != nil {
return ctrl.Result{}, fmt.Errorf("initialize VPA and HPA: %w", err)
}

Expand All @@ -204,7 +210,7 @@ func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_
return ctrl.Result{}, fmt.Errorf("add finalizer: %w", err)
}

tortoise, err = r.HpaService.UpdateHPASpecFromTortoiseAutoscalingPolicy(ctx, tortoise, hpa, currentReplicaNum, now)
tortoise, err = r.HpaService.UpdateHPASpecFromTortoiseAutoscalingPolicy(ctx, tortoise, hpa, currentDesiredReplicaNum, now)
if err != nil {
logger.Error(err, "update HPA spec from Tortoise autoscaling policy", "tortoise", req.NamespacedName)
return ctrl.Result{}, err
Expand Down Expand Up @@ -243,7 +249,7 @@ func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_

tortoise = r.TortoiseService.UpdateContainerRecommendationFromVPA(tortoise, monitorvpa, now)

tortoise, err = r.RecommenderService.UpdateRecommendations(ctx, tortoise, hpa, currentReplicaNum, now)
tortoise, err = r.RecommenderService.UpdateRecommendations(ctx, tortoise, hpa, currentDesiredReplicaNum, now)
if err != nil {
logger.Error(err, "update recommendation in tortoise", "tortoise", req.NamespacedName)
return ctrl.Result{}, err
Expand All @@ -266,7 +272,7 @@ func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_
return ctrl.Result{}, err
}

tortoise, err = r.TortoiseService.UpdateResourceRequest(ctx, tortoise, currentReplicaNum, now)
tortoise, err = r.TortoiseService.UpdateResourceRequest(ctx, tortoise, currentDesiredReplicaNum, now)
if err != nil {
logger.Error(err, "update VPA based on the recommendation in tortoise", "tortoise", req.NamespacedName)
return ctrl.Result{}, err
Expand Down
2 changes: 1 addition & 1 deletion controllers/tortoise_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func startController(ctx context.Context) func() {
Expect(err).ShouldNot(HaveOccurred())
cli, err := vpa.New(mgr.GetConfig(), recorder)
Expect(err).ShouldNot(HaveOccurred())
hpaS, err := hpa.New(mgr.GetClient(), recorder, 0.95, 90, 25, time.Hour, 1000, 10000, ".*-exclude-metric")
hpaS, err := hpa.New(mgr.GetClient(), recorder, 0.95, 90, 25, time.Hour, 1000, 10000, 3, ".*-exclude-metric")
Expect(err).ShouldNot(HaveOccurred())
reconciler := &TortoiseReconciler{
Scheme: scheme,
Expand Down
Loading

0 comments on commit 57b26db

Please sign in to comment.