Skip to content

Commit

Permalink
Include inform policies in configuration timeout calculation (#320)
Browse files Browse the repository at this point in the history
Signed-off-by: Angie Wang <[email protected]>
  • Loading branch information
Missxiaoguo authored Nov 24, 2024
1 parent 56c7a88 commit d6395c8
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 126 deletions.
36 changes: 18 additions & 18 deletions internal/controllers/provisioningrequest_clusterconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func (t *provisioningRequestReconcilerTask) handleClusterPolicyConfiguration(ctx
}

allPoliciesCompliant := true
nonCompliantPolicyInEnforce := false
allPoliciesInInform := true
var targetPolicies []provisioningv1alpha1.PolicyDetails
// Go through all the policies and get those that are matched with the managed cluster created
// by the current provisioning request.
Expand All @@ -57,13 +57,13 @@ func (t *provisioningRequestReconcilerTask) handleClusterPolicyConfiguration(ctx

if policy.Status.ComplianceState != policiesv1.Compliant {
allPoliciesCompliant = false
if strings.EqualFold(string(policy.Spec.RemediationAction), string(policiesv1.Enforce)) {
nonCompliantPolicyInEnforce = true
}
}
if !strings.EqualFold(string(policy.Spec.RemediationAction), string(policiesv1.Inform)) {
allPoliciesInInform = false
}
}
policyConfigTimedOut, err := t.updateConfigurationAppliedStatus(
ctx, targetPolicies, allPoliciesCompliant, nonCompliantPolicyInEnforce)
ctx, targetPolicies, allPoliciesCompliant, allPoliciesInInform)
if err != nil {
return false, err
}
Expand All @@ -78,14 +78,14 @@ func (t *provisioningRequestReconcilerTask) handleClusterPolicyConfiguration(ctx

// If there are policies that are not Compliant and the configuration has not timed out,
// we need to requeue and see if the timeout is reached.
return nonCompliantPolicyInEnforce && !policyConfigTimedOut, nil
return (!allPoliciesCompliant && !allPoliciesInInform) && !policyConfigTimedOut, nil
}

// updateConfigurationAppliedStatus updates the ProvisioningRequest ConfigurationApplied condition
// based on the state of the policies matched with the managed cluster.
func (t *provisioningRequestReconcilerTask) updateConfigurationAppliedStatus(
ctx context.Context, targetPolicies []provisioningv1alpha1.PolicyDetails, allPoliciesCompliant bool,
nonCompliantPolicyInEnforce bool) (policyConfigTimedOut bool, err error) {
allPoliciesInInform bool) (policyConfigTimedOut bool, err error) {
err = nil
policyConfigTimedOut = false

Expand Down Expand Up @@ -146,14 +146,23 @@ func (t *provisioningRequestReconcilerTask) updateConfigurationAppliedStatus(
"The Cluster is not yet ready",
)
if utils.IsClusterProvisionCompleted(t.object) &&
nonCompliantPolicyInEnforce {
!allPoliciesInInform {
utils.SetProvisioningStateInProgress(t.object,
"Waiting for cluster to be ready for policy configuration")
}
return
}

if nonCompliantPolicyInEnforce {
if allPoliciesInInform {
// No timeout is computed if all policies are in inform, just out of date.
t.object.Status.Extensions.ClusterDetails.NonCompliantAt = metav1.Time{}
utils.SetStatusCondition(&t.object.Status.Conditions,
utils.PRconditionTypes.ConfigurationApplied,
utils.CRconditionReasons.OutOfDate,
metav1.ConditionFalse,
"The configuration is out of date",
)
} else {
policyConfigTimedOut = t.hasPolicyConfigurationTimedOut(ctx)

message := "The configuration is still being applied"
Expand All @@ -172,15 +181,6 @@ func (t *provisioningRequestReconcilerTask) updateConfigurationAppliedStatus(
metav1.ConditionFalse,
message,
)
} else {
// No timeout is reported if all policies are in inform, just out of date.
t.object.Status.Extensions.ClusterDetails.NonCompliantAt = metav1.Time{}
utils.SetStatusCondition(&t.object.Status.Conditions,
utils.PRconditionTypes.ConfigurationApplied,
utils.CRconditionReasons.OutOfDate,
metav1.ConditionFalse,
"The configuration is out of date",
)
}

return
Expand Down
265 changes: 157 additions & 108 deletions internal/controllers/provisioningrequest_clusterconfig_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1032,7 +1032,7 @@ defaultHugepagesSize: "1G"`,
requeue, err = CRTask.handleClusterPolicyConfiguration(context.Background())
Expect(requeue).To(BeFalse())
Expect(err).ToNot(HaveOccurred())
// NonCompliantAt should still be zero since we don't consider inform policies in the timeout.
// NonCompliantAt should still be zero since we don't consider inform policies in the timeout if all policies are inform.
Expect(CRTask.object.Status.Extensions.ClusterDetails.NonCompliantAt).To(BeZero())
Expect(CRTask.object.Status.Extensions.Policies).To(ConsistOf(
[]provisioningv1alpha1.PolicyDetails{
Expand Down Expand Up @@ -1152,6 +1152,159 @@ defaultHugepagesSize: "1G"`,
Equal("The configuration is still being applied, but it timed out"))
})

It("Updates ProvisioningRequest ConfigurationApplied condition to InProgress when the enforce policy is "+
"Compliant but the inform policy is still NonCompliant and times out", func() {
req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "cluster-1"}}

result, err := CRReconciler.Reconcile(ctx, req)
Expect(err).ToNot(HaveOccurred())
// Expect to not requeue on valid provisioning request.
Expect(result.Requeue).To(BeFalse())

// Create policies, one Compliant enforce policy and one NonCompliant inform policy.
newPolicies := []client.Object{
&policiesv1.Policy{
ObjectMeta: metav1.ObjectMeta{
Name: "ztp-clustertemplate-a-v4-16.v1-validator-policy",
Namespace: "cluster-1",
Labels: map[string]string{
utils.ChildPolicyRootPolicyLabel: "ztp-clustertemplate-a-v4-16.v1-validator-policy",
utils.ChildPolicyClusterNameLabel: "cluster-1",
utils.ChildPolicyClusterNamespaceLabel: "cluster-1",
},
},
Spec: policiesv1.PolicySpec{
RemediationAction: "inform",
},
Status: policiesv1.PolicyStatus{
ComplianceState: "NonCompliant",
},
},
&policiesv1.Policy{
ObjectMeta: metav1.ObjectMeta{
Name: "ztp-clustertemplate-a-v4-16.v1-sriov-configuration-policy",
Namespace: "cluster-1",
Labels: map[string]string{
utils.ChildPolicyRootPolicyLabel: "ztp-clustertemplate-a-v4-16.v1-sriov-configuration-policy",
utils.ChildPolicyClusterNameLabel: "cluster-1",
utils.ChildPolicyClusterNamespaceLabel: "cluster-1",
},
},
Spec: policiesv1.PolicySpec{
RemediationAction: "enforce",
},
Status: policiesv1.PolicyStatus{
ComplianceState: "Compliant",
},
},
}
for _, newPolicy := range newPolicies {
Expect(c.Create(ctx, newPolicy)).To(Succeed())
}

provisioningRequest := &provisioningv1alpha1.ProvisioningRequest{}

// Create the ProvisioningRequest reconciliation task.
err = CRReconciler.Client.Get(
context.TODO(),
types.NamespacedName{
Name: "cluster-1",
},
provisioningRequest)
Expect(err).ToNot(HaveOccurred())

CRTask = &provisioningRequestReconcilerTask{
logger: CRReconciler.Logger,
client: CRReconciler.Client,
object: provisioningRequest, // cluster-1 request
ctDetails: &clusterTemplateDetails{
namespace: ctNamespace,
},
timeouts: &timeouts{
hardwareProvisioning: utils.DefaultHardwareProvisioningTimeout,
clusterProvisioning: utils.DefaultClusterInstallationTimeout,
clusterConfiguration: 1 * time.Minute,
},
}

// Call the handleClusterPolicyConfiguration function.
requeue, err := CRTask.handleClusterPolicyConfiguration(context.Background())
// There are NonCompliant policies in inform and the configuration has not timed out,
// so we need to requeue to re-evaluate the timeout.
Expect(requeue).To(BeTrue())
Expect(err).ToNot(HaveOccurred())
Expect(CRTask.object.Status.Extensions.ClusterDetails.NonCompliantAt).ToNot(BeZero())
Expect(CRTask.object.Status.Extensions.Policies).To(ConsistOf(
[]provisioningv1alpha1.PolicyDetails{
{
Compliant: "Compliant",
PolicyName: "v1-sriov-configuration-policy",
PolicyNamespace: "ztp-clustertemplate-a-v4-16",
RemediationAction: "enforce",
},
{
Compliant: "NonCompliant",
PolicyName: "v1-validator-policy",
PolicyNamespace: "ztp-clustertemplate-a-v4-16",
RemediationAction: "inform",
},
},
))

// Verify that the ConfigurationApplied condition is set to InProgress.
conditions := CRTask.object.Status.Conditions
configAppliedCond := meta.FindStatusCondition(
conditions, string(utils.PRconditionTypes.ConfigurationApplied))
Expect(configAppliedCond).ToNot(BeNil())
Expect(configAppliedCond.Type).To(Equal(string(utils.PRconditionTypes.ConfigurationApplied)))
Expect(configAppliedCond.Status).To(Equal(metav1.ConditionFalse))
Expect(configAppliedCond.Reason).To(Equal(string(utils.CRconditionReasons.InProgress)))
Expect(configAppliedCond.Message).To(Equal("The configuration is still being applied"))

// Take 2 minutes to the NonCompliantAt timestamp to mock timeout.
CRTask.object.Status.Extensions.ClusterDetails.NonCompliantAt.Time =
CRTask.object.Status.Extensions.ClusterDetails.NonCompliantAt.Add(-2 * time.Minute)
Expect(c.Status().Update(ctx, CRTask.object)).To(Succeed())

// Call the handleClusterPolicyConfiguration function.
requeue, err = CRTask.handleClusterPolicyConfiguration(context.Background())
// There are NonCompliant policies in inform, but the configuration has timed out,
// so we do not need to requeue.
Expect(requeue).To(BeFalse())
Expect(err).ToNot(HaveOccurred())
Expect(CRTask.object.Status.Extensions.ClusterDetails.NonCompliantAt).ToNot(BeZero())

// Verify that the ConfigurationApplied condition is set to TimedOut.
conditions = CRTask.object.Status.Conditions
configAppliedCond = meta.FindStatusCondition(
conditions, string(utils.PRconditionTypes.ConfigurationApplied))
Expect(configAppliedCond).ToNot(BeNil())
Expect(configAppliedCond.Type).To(Equal(string(utils.PRconditionTypes.ConfigurationApplied)))
Expect(configAppliedCond.Status).To(Equal(metav1.ConditionFalse))
Expect(configAppliedCond.Reason).To(Equal(string(utils.CRconditionReasons.TimedOut)))
Expect(configAppliedCond.Message).To(
Equal("The configuration is still being applied, but it timed out"))

// Check that another handleClusterPolicyConfiguration call doesn't change the status if
// the policies are the same.
requeue, err = CRTask.handleClusterPolicyConfiguration(context.Background())
// There are NonCompliant policies in inform, but the configuration has timed out,
// so we do not need to requeue.
Expect(requeue).To(BeFalse())
Expect(err).ToNot(HaveOccurred())

// Check the status conditions.
conditions = CRTask.object.Status.Conditions
configAppliedCond = meta.FindStatusCondition(
conditions, string(utils.PRconditionTypes.ConfigurationApplied))
Expect(configAppliedCond).ToNot(BeNil())
Expect(configAppliedCond.Type).To(Equal(string(utils.PRconditionTypes.ConfigurationApplied)))
Expect(configAppliedCond.Status).To(Equal(metav1.ConditionFalse))
Expect(configAppliedCond.Reason).To(Equal(string(utils.CRconditionReasons.TimedOut)))
Expect(configAppliedCond.Message).To(
Equal("The configuration is still being applied, but it timed out"))
})

It("Updates ProvisioningRequest ConfigurationApplied condition to Missing if there are no policies", func() {
req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "cluster-1"}}

Expand Down Expand Up @@ -1317,113 +1470,6 @@ defaultHugepagesSize: "1G"`,
Expect(len(res)).To(Equal(0))
})

It("Updates ProvisioningRequest ConfigurationApplied condition to OutOfDate when the cluster is "+
"NonCompliant with at least one matched policies and the policy is not in enforce", func() {
req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "cluster-1"}}

result, err := CRReconciler.Reconcile(ctx, req)
Expect(err).ToNot(HaveOccurred())
// Expect to not requeue on valid provisioning request.
Expect(result.Requeue).To(BeFalse())

newPolicies := []client.Object{
&policiesv1.Policy{
ObjectMeta: metav1.ObjectMeta{
Name: "ztp-clustertemplate-a-v4-16.v1-subscriptions-policy",
Namespace: "cluster-1",
Labels: map[string]string{
utils.ChildPolicyRootPolicyLabel: "ztp-clustertemplate-a-v4-16.v1-subscriptions-policy",
utils.ChildPolicyClusterNameLabel: "cluster-1",
utils.ChildPolicyClusterNamespaceLabel: "cluster-1",
},
},
Spec: policiesv1.PolicySpec{
RemediationAction: "inform",
},
Status: policiesv1.PolicyStatus{
ComplianceState: "NonCompliant",
},
},
&policiesv1.Policy{
ObjectMeta: metav1.ObjectMeta{
Name: "ztp-clustertemplate-a-v4-16.v1-sriov-configuration-policy",
Namespace: "cluster-1",
Labels: map[string]string{
utils.ChildPolicyRootPolicyLabel: "ztp-clustertemplate-a-v4-16.v1-sriov-configuration-policy",
utils.ChildPolicyClusterNameLabel: "cluster-1",
utils.ChildPolicyClusterNamespaceLabel: "cluster-1",
},
},
Spec: policiesv1.PolicySpec{
RemediationAction: "enforce",
},
Status: policiesv1.PolicyStatus{
ComplianceState: "Compliant",
},
},
}
// Create all the ACM policies.
for _, newPolicy := range newPolicies {
Expect(c.Create(ctx, newPolicy)).To(Succeed())
}
provisioningRequest := &provisioningv1alpha1.ProvisioningRequest{}

// Create the ProvisioningRequest reconciliation task.
err = CRReconciler.Client.Get(
context.TODO(),
types.NamespacedName{
Name: "cluster-1",
},
provisioningRequest)
Expect(err).ToNot(HaveOccurred())

CRTask = &provisioningRequestReconcilerTask{
logger: CRReconciler.Logger,
client: CRReconciler.Client,
object: provisioningRequest, // cluster-1 request
clusterInput: &clusterInput{},
ctDetails: &clusterTemplateDetails{
namespace: ctNamespace,
},
timeouts: &timeouts{
hardwareProvisioning: utils.DefaultHardwareProvisioningTimeout,
clusterProvisioning: utils.DefaultClusterInstallationTimeout,
clusterConfiguration: utils.DefaultClusterConfigurationTimeout,
},
}

// Call the handleClusterPolicyConfiguration function.
requeue, err := CRTask.handleClusterPolicyConfiguration(context.Background())
Expect(requeue).To(BeFalse())
Expect(err).ToNot(HaveOccurred())
Expect(CRTask.object.Status.Extensions.Policies).To(ConsistOf(
[]provisioningv1alpha1.PolicyDetails{
{
Compliant: "Compliant",
PolicyName: "v1-sriov-configuration-policy",
PolicyNamespace: "ztp-clustertemplate-a-v4-16",
RemediationAction: "enforce",
},
{
Compliant: "NonCompliant",
PolicyName: "v1-subscriptions-policy",
PolicyNamespace: "ztp-clustertemplate-a-v4-16",
RemediationAction: "inform",
},
},
))

// Check the status conditions.
conditions := CRTask.object.Status.Conditions
configAppliedCond := meta.FindStatusCondition(
conditions, string(utils.PRconditionTypes.ConfigurationApplied))
Expect(configAppliedCond).ToNot(BeNil())
Expect(configAppliedCond.Type).To(Equal(string(utils.PRconditionTypes.ConfigurationApplied)))
Expect(configAppliedCond.Status).To(Equal(metav1.ConditionFalse))
Expect(configAppliedCond.Reason).To(Equal(string(utils.CRconditionReasons.OutOfDate)))
Expect(configAppliedCond.Message).To(Equal("The configuration is out of date"))
})

It("Updates ProvisioningRequest ConfigurationApplied condition to Completed when the cluster is "+
"Compliant with all the matched policies", func() {
req := reconcile.Request{NamespacedName: types.NamespacedName{Name: "cluster-1"}}
Expand Down Expand Up @@ -1503,6 +1549,7 @@ defaultHugepagesSize: "1G"`,
requeue, err := CRTask.handleClusterPolicyConfiguration(context.Background())
Expect(requeue).To(BeFalse())
Expect(err).ToNot(HaveOccurred())
Expect(CRTask.object.Status.Extensions.ClusterDetails.NonCompliantAt).To(BeZero())
Expect(CRTask.object.Status.Extensions.Policies).To(ConsistOf(
[]provisioningv1alpha1.PolicyDetails{
{
Expand Down Expand Up @@ -1609,6 +1656,7 @@ defaultHugepagesSize: "1G"`,
requeue, err := CRTask.handleClusterPolicyConfiguration(context.Background())
Expect(requeue).To(BeTrue())
Expect(err).ToNot(HaveOccurred())
Expect(CRTask.object.Status.Extensions.ClusterDetails.NonCompliantAt).ToNot(BeZero())
Expect(CRTask.object.Status.Extensions.Policies).To(ConsistOf(
[]provisioningv1alpha1.PolicyDetails{
{
Expand Down Expand Up @@ -1715,6 +1763,7 @@ defaultHugepagesSize: "1G"`,
requeue, err := CRTask.handleClusterPolicyConfiguration(context.Background())
Expect(requeue).To(BeTrue())
Expect(err).ToNot(HaveOccurred())
Expect(CRTask.object.Status.Extensions.ClusterDetails.NonCompliantAt).ToNot(BeZero())
Expect(CRTask.object.Status.Extensions.Policies).To(ConsistOf(
[]provisioningv1alpha1.PolicyDetails{
{
Expand Down

0 comments on commit d6395c8

Please sign in to comment.