Skip to content

Commit

Permalink
add(observability): emit events from the controller
Browse files Browse the repository at this point in the history
  • Loading branch information
sanposhiho committed Oct 2, 2023
1 parent 29c7517 commit 2997fa0
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 31 deletions.
8 changes: 8 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,11 @@ rules:
- get
- patch
- update
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- update
23 changes: 15 additions & 8 deletions controllers/tortoise_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
v1 "k8s.io/api/apps/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/log"

Expand All @@ -55,17 +56,25 @@ type TortoiseReconciler struct {
DeploymentService *deployment.Service
TortoiseService *tortoise.Service
RecommenderService *recommender.Service
EventRecorder record.EventRecorder
}

//+kubebuilder:rbac:groups=autoscaling.mercari.com,resources=tortoises,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=autoscaling.mercari.com,resources=tortoises/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=autoscaling.mercari.com,resources=tortoises/finalizers,verbs=update
//+kubebuilder:rbac:groups=core,resources=events,verbs=create;update;patch

//+kubebuilder:rbac:groups=autoscaling.k8s.io,resources=verticalpodautoscalers,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=autoscaling,resources=horizontalpodautoscalers,verbs=get;list;watch;create;update;patch;delete

func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
defer func() {
if reterr != nil {
r.EventRecorder.Event(&autoscalingv1alpha1.Tortoise{}, "Warning", "ReconcileError", reterr.Error())
}
}()

logger := log.FromContext(ctx)
now := time.Now()

Expand All @@ -90,7 +99,7 @@ func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
if err := r.TortoiseService.RemoveFinalizer(ctx, tortoise); err != nil {
return ctrl.Result{}, fmt.Errorf("remove finalizer: %w", err)
}
return ctrl.Result{}, nil
return ctrl.Result{RequeueAfter: r.Interval}, nil
}

// tortoise is not deleted. Make sure finalizer is added to tortoise.
Expand All @@ -117,9 +126,7 @@ func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
return ctrl.Result{}, fmt.Errorf("initialize VPAs and HPA: %w", err)
}

// VPA and HPA are just created, and they won't start working soon.
// So, return here and wait a few min for them to start to work.
return ctrl.Result{RequeueAfter: 2 * time.Minute}, nil
return ctrl.Result{RequeueAfter: r.Interval}, nil
}

vpa, ready, err := r.VpaService.GetTortoiseMonitorVPA(ctx, tortoise)
Expand All @@ -135,7 +142,7 @@ func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
logger.Error(err, "update Tortoise status", "tortoise", req.NamespacedName)
return ctrl.Result{}, err
}
return ctrl.Result{RequeueAfter: 2 * time.Minute}, nil
return ctrl.Result{RequeueAfter: r.Interval}, nil
}

logger.V(4).Info("VPA created by tortoise is ready, proceeding to generate the recommendation", "tortoise", req.NamespacedName)
Expand All @@ -161,7 +168,7 @@ func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c

if tortoise.Status.TortoisePhase == autoscalingv1alpha1.TortoisePhaseGatheringData {
logger.V(4).Info("tortoise is GatheringData phase; skip applying the recommendation to HPA or VPAs")
return ctrl.Result{}, nil
return ctrl.Result{RequeueAfter: r.Interval}, nil
}

_, tortoise, err = r.HpaService.UpdateHPAFromTortoiseRecommendation(ctx, tortoise, now)
Expand All @@ -182,7 +189,7 @@ func (r *TortoiseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
return ctrl.Result{}, err
}

return ctrl.Result{}, nil
return ctrl.Result{RequeueAfter: r.Interval}, nil
}

func (r *TortoiseReconciler) deleteVPAAndHPA(ctx context.Context, tortoise *autoscalingv1alpha1.Tortoise, now time.Time) error {
Expand Down
12 changes: 7 additions & 5 deletions controllers/tortoise_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
autoscalingv1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/retry"
"k8s.io/utils/pointer"
ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -67,13 +68,14 @@ var _ = Describe("Test TortoiseController", func() {
})
Expect(err).ShouldNot(HaveOccurred())

tortoiseService, err := tortoise.New(mgr.GetClient(), 1, "Asia/Tokyo", 1000*time.Minute, "weekly")
tortoiseService, err := tortoise.New(mgr.GetClient(), record.NewFakeRecorder(10), 1, "Asia/Tokyo", 1000*time.Minute, "weekly")
Expect(err).ShouldNot(HaveOccurred())
cli, err := vpa.New(mgr.GetConfig())
cli, err := vpa.New(mgr.GetConfig(), record.NewFakeRecorder(10))
Expect(err).ShouldNot(HaveOccurred())
reconciler := &TortoiseReconciler{
Scheme: scheme,
HpaService: hpa.New(mgr.GetClient(), 0.95, 90),
HpaService: hpa.New(mgr.GetClient(), record.NewFakeRecorder(10), 0.95, 90),
EventRecorder: record.NewFakeRecorder(10),
VpaService: cli,
DeploymentService: deployment.New(mgr.GetClient()),
TortoiseService: tortoiseService,
Expand Down Expand Up @@ -1632,7 +1634,7 @@ func (t *testCase) initializeResources(ctx context.Context, k8sClient client.Cli
}
if t.before.hpa == nil {
// create default HPA.
HpaClient := hpa.New(k8sClient, 0.95, 90)
HpaClient := hpa.New(k8sClient, record.NewFakeRecorder(10), 0.95, 90)
t.before.hpa, t.before.tortoise, err = HpaClient.CreateHPA(ctx, t.before.tortoise, t.before.deployment)
if err != nil {
return err
Expand All @@ -1641,7 +1643,7 @@ func (t *testCase) initializeResources(ctx context.Context, k8sClient client.Cli
if t.before.vpa == nil {
// create default VPAs.
t.before.vpa = map[v1alpha1.VerticalPodAutoscalerRole]*autoscalingv1.VerticalPodAutoscaler{}
VpaClient, err := vpa.New(config)
VpaClient, err := vpa.New(config, record.NewFakeRecorder(10))
if err != nil {
return err
}
Expand Down
10 changes: 6 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,19 +144,20 @@ func main() {
setupLog.Error(err, "unable to start manager")
os.Exit(1)
}
tortoiseService, err := tortoise.New(mgr.GetClient(), rangeOfMinMaxReplicasRecommendationHours, timeZone, tortoiseUpdateInterval, minMaxReplicasRoutine)
eventRecorder := mgr.GetEventRecorderFor("tortoise-controller")
tortoiseService, err := tortoise.New(mgr.GetClient(), eventRecorder, rangeOfMinMaxReplicasRecommendationHours, timeZone, tortoiseUpdateInterval, minMaxReplicasRoutine)
if err != nil {
setupLog.Error(err, "unable to start tortoise service")
os.Exit(1)
}

vpaClient, err := vpa.New(mgr.GetConfig())
vpaClient, err := vpa.New(mgr.GetConfig(), eventRecorder)
if err != nil {
setupLog.Error(err, "unable to start vpa client")
os.Exit(1)
}

hpaService := hpa.New(mgr.GetClient(), replicaReductionFactor, upperTargetResourceUtilization)
hpaService := hpa.New(mgr.GetClient(), eventRecorder, replicaReductionFactor, upperTargetResourceUtilization)

if err = (&controllers.TortoiseReconciler{
Scheme: mgr.GetScheme(),
Expand All @@ -165,7 +166,8 @@ func main() {
DeploymentService: deployment.New(mgr.GetClient()),
RecommenderService: recommender.New(tTLHoursOfMinMaxReplicasRecommendation, maxReplicasFactor, minReplicasFactor, upperTargetResourceUtilization, minimumMinReplicas, preferredReplicaNumUpperLimit, maxCPUPerContainer, maxMemoryPerContainer),
TortoiseService: tortoiseService,
Interval: 30 * time.Second,
Interval: tortoiseUpdateInterval,
EventRecorder: eventRecorder,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Tortoise")
os.Exit(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,11 @@ rules:
- get
- patch
- update
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- update
20 changes: 18 additions & 2 deletions pkg/hpa/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/retry"
"k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -27,13 +28,15 @@ type Service struct {

replicaReductionFactor float64
upperTargetResourceUtilization int32
recorder record.EventRecorder
}

func New(c client.Client, replicaReductionFactor float64, upperTargetResourceUtilization int) *Service {
func New(c client.Client, recorder record.EventRecorder, replicaReductionFactor float64, upperTargetResourceUtilization int) *Service {
return &Service{
c: c,
replicaReductionFactor: replicaReductionFactor,
upperTargetResourceUtilization: int32(upperTargetResourceUtilization),
recorder: recorder,
}
}

Expand All @@ -44,6 +47,9 @@ func (c *Service) InitializeHPA(ctx context.Context, tortoise *autoscalingv1alph
if err != nil {
return tortoise, fmt.Errorf("give annotations on a hpa specified in targetrefs: %w", err)
}

c.recorder.Event(tortoise, corev1.EventTypeNormal, "HPAUpdated", fmt.Sprintf("Updated HPA %s/%s", tortoise.Namespace, tortoise.Status.Targets.HorizontalPodAutoscaler))

return tortoise, nil
}

Expand All @@ -53,6 +59,8 @@ func (c *Service) InitializeHPA(ctx context.Context, tortoise *autoscalingv1alph
return tortoise, fmt.Errorf("create hpa: %w", err)
}

c.recorder.Event(tortoise, corev1.EventTypeNormal, "HPACreated", fmt.Sprintf("Initialized a HPA %s/%s", tortoise.Namespace, tortoise.Status.Targets.HorizontalPodAutoscaler))

return tortoise, nil
}

Expand Down Expand Up @@ -271,7 +279,15 @@ func (c *Service) UpdateHPAFromTortoiseRecommendation(ctx context.Context, torto
return c.c.Update(ctx, hpa)
}

return retHPA, retTortoise, retry.RetryOnConflict(retry.DefaultRetry, updateFn)
if err := retry.RetryOnConflict(retry.DefaultRetry, updateFn); err != nil {
return nil, nil, err
}

if tortoise.Spec.UpdateMode != autoscalingv1alpha1.UpdateModeOff {
c.recorder.Event(tortoise, corev1.EventTypeNormal, "HPAUpdated", fmt.Sprintf("HPA %s/%s is updated by the recommendation", retHPA.Namespace, retHPA.Name))
}

return retHPA, retTortoise, nil
}

// GetReplicasRecommendation finds the corresponding recommendations.
Expand Down
7 changes: 4 additions & 3 deletions pkg/hpa/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
v2 "k8s.io/api/autoscaling/v2"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
Expand Down Expand Up @@ -694,7 +695,7 @@ func TestClient_UpdateHPAFromTortoiseRecommendation(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), 0.95, 90)
c := New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90)
got, tortoise, err := c.UpdateHPAFromTortoiseRecommendation(tt.args.ctx, tt.args.tortoise, tt.args.now)
if (err != nil) != tt.wantErr {
t.Errorf("UpdateHPAFromTortoiseRecommendation() error = %v, wantErr %v", err, tt.wantErr)
Expand Down Expand Up @@ -902,9 +903,9 @@ func TestService_InitializeHPA(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := New(fake.NewClientBuilder().Build(), 0.95, 90)
c := New(fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), 0.95, 90)
if tt.initialHPA != nil {
c = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), 0.95, 90)
c = New(fake.NewClientBuilder().WithRuntimeObjects(tt.initialHPA).Build(), record.NewFakeRecorder(10), 0.95, 90)
}
_, err := c.InitializeHPA(context.Background(), tt.args.tortoise, tt.args.dm)
if (err != nil) != tt.wantErr {
Expand Down
31 changes: 28 additions & 3 deletions pkg/tortoise/tortoise.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
v1 "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/retry"
"k8s.io/klog/v2"
"k8s.io/utils/pointer"
Expand All @@ -25,7 +26,9 @@ import (
const tortoiseFinalizer = "tortoise.autoscaling.mercari.com/finalizer"

type Service struct {
c client.Client
c client.Client
recorder record.EventRecorder

rangeOfMinMaxReplicasRecommendationHour int
timeZone *time.Location
tortoiseUpdateInterval time.Duration
Expand All @@ -38,7 +41,7 @@ type Service struct {
lastTimeUpdateTortoise map[client.ObjectKey]time.Time
}

func New(c client.Client, rangeOfMinMaxReplicasRecommendationHour int, timeZone string, tortoiseUpdateInterval time.Duration, minMaxReplicasRoutine string) (*Service, error) {
func New(c client.Client, recorder record.EventRecorder, rangeOfMinMaxReplicasRecommendationHour int, timeZone string, tortoiseUpdateInterval time.Duration, minMaxReplicasRoutine string) (*Service, error) {
jst, err := time.LoadLocation(timeZone)
if err != nil {
return nil, fmt.Errorf("load location: %w", err)
Expand All @@ -47,6 +50,7 @@ func New(c client.Client, rangeOfMinMaxReplicasRecommendationHour int, timeZone
return &Service{
c: c,

recorder: recorder,
rangeOfMinMaxReplicasRecommendationHour: rangeOfMinMaxReplicasRecommendationHour,
minMaxReplicasRoutine: minMaxReplicasRoutine,
timeZone: jst,
Expand Down Expand Up @@ -75,16 +79,35 @@ func (s *Service) UpdateTortoisePhase(tortoise *v1alpha1.Tortoise, dm *appv1.Dep
switch tortoise.Status.TortoisePhase {
case "":
tortoise = s.initializeTortoise(tortoise, dm)
r := "1 week"
if s.minMaxReplicasRoutine == "daily" {
r = "1 day"
}
s.recorder.Event(tortoise, corev1.EventTypeNormal, "Initialized", fmt.Sprintf("Tortoise is initialized and starts to gather data to make recommendations. It will take %s to finish gathering data and then tortoise starts to work actually", r))

case v1alpha1.TortoisePhaseInitializing:
// change it to GatheringData anyway. Later the controller may change it back to initialize if VPA isn't ready.
tortoise.Status.TortoisePhase = v1alpha1.TortoisePhaseGatheringData
case v1alpha1.TortoisePhaseGatheringData:
tortoise = s.checkIfTortoiseFinishedGatheringData(tortoise)
if tortoise.Status.TortoisePhase == v1alpha1.TortoisePhaseWorking {
s.recorder.Event(tortoise, corev1.EventTypeNormal, "Working", "Tortoise finishes gathering data and it starts to work on autoscaling")
}
case v1alpha1.TortoisePhaseEmergency:
if tortoise.Spec.UpdateMode != v1alpha1.UpdateModeEmergency {
// Emergency mode is turned off.
s.recorder.Event(tortoise, corev1.EventTypeNormal, "Working", "Emergency mode is turned off. Tortoise starts to work on autoscaling normally")
tortoise.Status.TortoisePhase = v1alpha1.TortoisePhaseEmergency
}
}

if tortoise.Spec.UpdateMode == v1alpha1.UpdateModeEmergency {
tortoise.Status.TortoisePhase = v1alpha1.TortoisePhaseEmergency
if tortoise.Status.TortoisePhase != v1alpha1.TortoisePhaseEmergency {
s.recorder.Event(tortoise, corev1.EventTypeNormal, "Emergency", "Tortoise is in Emergency mode")
tortoise.Status.TortoisePhase = v1alpha1.TortoisePhaseEmergency
}
}

return tortoise
}

Expand Down Expand Up @@ -287,6 +310,8 @@ func (s *Service) UpdateTortoiseStatus(ctx context.Context, originalTortoise *v1
return originalTortoise, err
}

s.recorder.Event(originalTortoise, corev1.EventTypeNormal, "RecommendationUpdated", "The recommendation on Tortoise status is updated")

s.updateLastTimeUpdateTortoise(originalTortoise, now)

return originalTortoise, nil
Expand Down
Loading

0 comments on commit 2997fa0

Please sign in to comment.