Skip to content

Commit

Permalink
reconcile premature on probes nil -> healthy
Browse files Browse the repository at this point in the history
Signed-off-by: Maskym Vavilov <[email protected]>
  • Loading branch information
maksymvavilov committed Nov 26, 2024
1 parent bee684b commit 5969746
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 109 deletions.
2 changes: 1 addition & 1 deletion api/v1alpha1/dnshealthcheckprobe_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ type DNSHealthCheckProbeStatus struct {
ConsecutiveFailures int `json:"consecutiveFailures,omitempty"`
Reason string `json:"reason,omitempty"`
Status int `json:"status,omitempty"`
Healthy *bool `json:"healthy"`
Healthy *bool `json:"healthy,omitempty"`
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
}

Expand Down
2 changes: 1 addition & 1 deletion bundle/manifests/dns-operator.clusterserviceversion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ metadata:
capabilities: Basic Install
categories: Integration & Delivery
containerImage: quay.io/kuadrant/dns-operator:latest
createdAt: "2024-11-15T11:06:08Z"
createdAt: "2024-11-26T11:14:48Z"
description: A Kubernetes Operator to manage the lifecycle of DNS resources
operators.operatorframework.io/builder: operator-sdk-v1.33.0
operators.operatorframework.io/project_layout: go.kubebuilder.io/v4
Expand Down
2 changes: 0 additions & 2 deletions bundle/manifests/kuadrant.io_dnshealthcheckprobes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,6 @@ spec:
type: string
status:
type: integer
required:
- healthy
type: object
type: object
served: true
Expand Down
2 changes: 0 additions & 2 deletions charts/dns-operator/templates/manifests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,6 @@ spec:
type: string
status:
type: integer
required:
- healthy
type: object
type: object
served: true
Expand Down
2 changes: 0 additions & 2 deletions config/crd/bases/kuadrant.io_dnshealthcheckprobes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,6 @@ spec:
type: string
status:
type: integer
required:
- healthy
type: object
type: object
served: true
Expand Down
42 changes: 22 additions & 20 deletions internal/controller/dnsrecord_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -389,36 +389,39 @@ func (r *DNSRecordReconciler) SetupWithManager(mgr ctrl.Manager, maxRequeue, val
logger.V(1).Info("unexpected object type", "error", fmt.Sprintf("%T is not a *v1alpha1.DNSHealthCheckProbe", o))
return []reconcile.Request{}
}
onCluster := &v1alpha1.DNSHealthCheckProbe{}
if err := mgr.GetClient().Get(ctx, client.ObjectKeyFromObject(probe), onCluster); client.IgnoreNotFound(err) != nil {
logger.Error(err, "failed to get probe")

// haven't probed yet or deleting - nothing to do
if probe.Status.Healthy == nil {
return []reconcile.Request{}
}

probeOwner := &v1alpha1.DNSRecord{
ObjectMeta: metav1.ObjectMeta{},
}
record := &v1alpha1.DNSRecord{ObjectMeta: metav1.ObjectMeta{}}
for _, ro := range probe.GetOwnerReferences() {
if ro.Kind == "DNSRecord" {
probeOwner.Name = ro.Name
probeOwner.Namespace = probe.Namespace
record.Name = ro.Name
record.Namespace = probe.Namespace
break
}
}

// haven't probed yet or deleting - nothing to do
if probe.Status.Healthy == nil {
if err := mgr.GetClient().Get(ctx, client.ObjectKeyFromObject(record), record); client.IgnoreNotFound(err) != nil {
logger.Error(err, "failed to get record")
return []reconcile.Request{}
}

// probe.Status.Healthy is not nil here
// we probed for the first time - reconcile
// or none are nil and status changed - reconcile
if onCluster.Status.Healthy == nil || *probe.Status.Healthy != *onCluster.Status.Healthy {
return []reconcile.Request{{NamespacedName: client.ObjectKeyFromObject(probeOwner)}}
condition := meta.FindStatusCondition(record.Status.Conditions, string(v1alpha1.ConditionTypeHealthy))
// no condition - record is not precessed yet
if condition == nil {
return []reconcile.Request{}
}

// none are nil and status is the same - nothing to do
isHealthy := condition.Status == metav1.ConditionTrue

// record and probe disagree on health - requeue
if *probe.Status.Healthy != isHealthy {
return []reconcile.Request{{NamespacedName: client.ObjectKeyFromObject(record)}}
}
// nothing to do
return []reconcile.Request{}
})).
Complete(r)
Expand Down Expand Up @@ -490,14 +493,13 @@ func recordReceivedPrematurely(record *v1alpha1.DNSRecord, probes *v1alpha1.DNSH
// healthy is true only if we have probes and they are all healthy
isHealthy := healthyCond.Status == metav1.ConditionTrue

// if at least one not healthy - this will lock in false
allProbesHealthy := true
// if at least one is healthy - this will lock in true
allProbesHealthy := false
for _, probe := range probes.Items {
if probe.Status.Healthy != nil {
allProbesHealthy = allProbesHealthy && *probe.Status.Healthy
allProbesHealthy = allProbesHealthy || *probe.Status.Healthy
}
}

// prematurely is true here. return false in case we need full reconcile
return isHealthy == allProbesHealthy, requeueIn
}
Expand Down
81 changes: 0 additions & 81 deletions internal/controller/dnsrecord_healthchecks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,87 +265,6 @@ var _ = Describe("DNSRecordReconciler_HealthChecks", func() {
}, TestTimeoutMedium, time.Second).Should(Succeed())
})

It("Should reconcile prematurely on healthcheck change", func() {
// Create a default record
Expect(k8sClient.Create(ctx, dnsRecord)).To(Succeed())

By("Making probes healthy")
Eventually(func(g Gomega) {
probes := &v1alpha1.DNSHealthCheckProbeList{}

g.Expect(k8sClient.List(ctx, probes, &client.ListOptions{
LabelSelector: labels.SelectorFromSet(map[string]string{
ProbeOwnerLabel: BuildOwnerLabelValue(dnsRecord),
}),
Namespace: dnsRecord.Namespace,
})).To(Succeed())
g.Expect(len(probes.Items)).To(Equal(2))

// make probes healthy
for _, probe := range probes.Items {
probe.Status.Healthy = ptr.To(true)
probe.Status.LastCheckedAt = metav1.Now()
probe.Status.ConsecutiveFailures = 0
g.Expect(k8sClient.Status().Update(ctx, &probe)).To(Succeed())
}
}, TestTimeoutMedium, time.Second).Should(Succeed())

// Override valid for to 1 hour to make sure we are always premature
Eventually(func(g Gomega) {
g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(dnsRecord), dnsRecord)).To(Succeed())
// update only after we are ready
g.Expect(dnsRecord.Status.Conditions).To(
ContainElement(MatchFields(IgnoreExtras, Fields{
"Type": Equal(string(v1alpha1.ConditionTypeReady)),
"Status": Equal(metav1.ConditionTrue),
})),
)
dnsRecord.Status.ValidFor = "1h"
g.Expect(k8sClient.Status().Update(ctx, dnsRecord)).To(Succeed())
}, TestTimeoutMedium, time.Second).Should(Succeed())

// validate that it is still 1 hour - we hit the premature loop
// we purposefully are not updating anything on premature loops
// so can't be sure if we have had a short loop or not yet
By("Verify premature loop did not occur")
Eventually(func(g Gomega) {
newRecord := &v1alpha1.DNSRecord{}
g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(dnsRecord), newRecord)).To(Succeed())
g.Expect(newRecord.Status.ValidFor).To(Equal("1h"))
}, TestTimeoutMedium, time.Second).Should(Succeed())

By("Making probes unhealthy")
Eventually(func(g Gomega) {
probes := &v1alpha1.DNSHealthCheckProbeList{}

g.Expect(k8sClient.List(ctx, probes, &client.ListOptions{
LabelSelector: labels.SelectorFromSet(map[string]string{
ProbeOwnerLabel: BuildOwnerLabelValue(dnsRecord),
}),
Namespace: dnsRecord.Namespace,
})).To(Succeed())
g.Expect(len(probes.Items)).To(Equal(2))

// make probes healthy
for _, probe := range probes.Items {
probe.Status.Healthy = ptr.To(false)
probe.Status.LastCheckedAt = metav1.Now()
probe.Status.ConsecutiveFailures = 5
g.Expect(k8sClient.Status().Update(ctx, &probe)).To(Succeed())
}
}, TestTimeoutMedium, time.Second).Should(Succeed())

// check that record has valid for not an hour - we did the full reconcile and updated valid for
By("Validate premature loop occurred")
Eventually(func(g Gomega) {
g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(dnsRecord), dnsRecord)).To(Succeed())
// on the full loop we will always update ValidFor unless it is max duration.
// since we had 1 hour the logic should set it to the max.
// meaning that changing probes triggered dnsrecornd reconciler
// and we had a fool loop before we were supposed to
g.Expect(dnsRecord.Status.ValidFor).To(Equal(RequeueDuration.String()))
}, TestTimeoutMedium, time.Second).Should(Succeed())
})
It("Should not publish unhealthy enpoints", func() {
//Create default test dnsrecord
Expect(k8sClient.Create(ctx, dnsRecord)).To(Succeed())
Expand Down

0 comments on commit 5969746

Please sign in to comment.