diff --git a/api/v1alpha1/ctlog_types.go b/api/v1alpha1/ctlog_types.go index 9870d8345..f0ba70a40 100644 --- a/api/v1alpha1/ctlog_types.go +++ b/api/v1alpha1/ctlog_types.go @@ -48,6 +48,8 @@ type CTlogStatus struct { RootCertificates []SecretKeySelector `json:"rootCertificates,omitempty"` // The ID of a Trillian tree that stores the log data. TreeID *int64 `json:"treeID,omitempty"` + // Number of component restarts. + Restarts int `json:"restarts"` // +listType=map // +listMapKey=type // +patchStrategy=merge diff --git a/bundle/manifests/rhtas.redhat.com_ctlogs.yaml b/bundle/manifests/rhtas.redhat.com_ctlogs.yaml index deece3f75..d730f03ca 100644 --- a/bundle/manifests/rhtas.redhat.com_ctlogs.yaml +++ b/bundle/manifests/rhtas.redhat.com_ctlogs.yaml @@ -278,6 +278,9 @@ spec: - name type: object x-kubernetes-map-type: atomic + restarts: + description: Number of component restarts. + type: integer rootCertificates: items: description: SecretKeySelector selects a key of a Secret. @@ -316,6 +319,8 @@ spec: description: The ID of a Trillian tree that stores the log data. format: int64 type: integer + required: + - restarts type: object type: object served: true diff --git a/config/crd/bases/rhtas.redhat.com_ctlogs.yaml b/config/crd/bases/rhtas.redhat.com_ctlogs.yaml index ca762bb52..6326de55d 100644 --- a/config/crd/bases/rhtas.redhat.com_ctlogs.yaml +++ b/config/crd/bases/rhtas.redhat.com_ctlogs.yaml @@ -278,6 +278,9 @@ spec: - name type: object x-kubernetes-map-type: atomic + restarts: + description: Number of component restarts. + type: integer rootCertificates: items: description: SecretKeySelector selects a key of a Secret. @@ -316,6 +319,8 @@ spec: description: The ID of a Trillian tree that stores the log data. format: int64 type: integer + required: + - restarts type: object type: object served: true diff --git a/internal/controller/ctlog/actions/error.go b/internal/controller/ctlog/actions/error.go new file mode 100644 index 000000000..e07d4d37b --- /dev/null +++ b/internal/controller/ctlog/actions/error.go @@ -0,0 +1,77 @@ +package actions + +import ( + "context" + + rhtasv1alpha1 "github.com/securesign/operator/api/v1alpha1" + "github.com/securesign/operator/internal/controller/common/action" + "github.com/securesign/operator/internal/controller/constants" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func NewHandleErrorAction() action.Action[rhtasv1alpha1.CTlog] { + return &handleErrorAction{} +} + +type handleErrorAction struct { + action.BaseAction +} + +func (i handleErrorAction) Name() string { + return "error handler" +} + +func (i handleErrorAction) CanHandle(_ context.Context, instance *rhtasv1alpha1.CTlog) bool { + c := meta.FindStatusCondition(instance.Status.Conditions, constants.Ready) + if c == nil { + return false + } + return c.Reason == constants.Failure && instance.Status.Restarts < constants.AllowedRestarts +} + +func (i handleErrorAction) Handle(ctx context.Context, instance *rhtasv1alpha1.CTlog) *action.Result { + i.Recorder.Event(instance, v1.EventTypeWarning, constants.Failure, "Restarted by error handler") + + newStatus := rhtasv1alpha1.CTlogStatus{} + + newStatus.Restarts = instance.Status.Restarts + 1 + if newStatus.Restarts == constants.AllowedRestarts { + meta.SetStatusCondition(&newStatus.Conditions, metav1.Condition{ + Type: constants.Ready, + Status: metav1.ConditionFalse, + Reason: constants.Failure, + Message: "Restart threshold reached", + }) + instance.Status = newStatus + return i.StatusUpdate(ctx, instance) + } + + // - keep the status.treeId if not nil + newStatus.TreeID = instance.Status.TreeID + + newStatus.PrivateKeyRef = instance.Status.PrivateKeyRef.DeepCopy() + + newStatus.PublicKeyRef = instance.Status.PublicKeyRef.DeepCopy() + + newStatus.PrivateKeyPasswordRef = instance.Status.PrivateKeyPasswordRef.DeepCopy() + + if meta.IsStatusConditionTrue(instance.Status.Conditions, CertCondition) { + copy(newStatus.RootCertificates, instance.Status.RootCertificates) + } + + if meta.IsStatusConditionTrue(instance.Status.Conditions, ServerCondition) { + instance.Status.ServerConfigRef.DeepCopyInto(newStatus.ServerConfigRef) + // do not append server condition - let controller to redeploy + } + + meta.SetStatusCondition(&newStatus.Conditions, metav1.Condition{ + Type: constants.Ready, + Status: metav1.ConditionFalse, + Reason: constants.Pending, + Message: "Restarted by error handler", + }) + instance.Status = newStatus + return i.StatusUpdate(ctx, instance) +} diff --git a/internal/controller/ctlog/ctlog_controller.go b/internal/controller/ctlog/ctlog_controller.go index d407bb4a3..521468d37 100644 --- a/internal/controller/ctlog/ctlog_controller.go +++ b/internal/controller/ctlog/ctlog_controller.go @@ -21,7 +21,10 @@ import ( olpredicate "github.com/operator-framework/operator-lib/predicate" "github.com/securesign/operator/internal/controller/annotations" + "github.com/securesign/operator/internal/controller/constants" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/event" "github.com/securesign/operator/internal/controller/ctlog/actions" actions2 "github.com/securesign/operator/internal/controller/fulcio/actions" @@ -83,6 +86,9 @@ func (r *CTlogReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl } target := instance.DeepCopy() acs := []action.Action[rhtasv1alpha1.CTlog]{ + // register error handler + actions.NewHandleErrorAction(), + actions.NewPendingAction(), actions.NewHandleFulcioCertAction(), @@ -144,7 +150,17 @@ func (r *CTlogReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). WithEventFilter(pause). - For(&rhtasv1alpha1.CTlog{}). + For(&rhtasv1alpha1.CTlog{}, builder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, predicate.Funcs{UpdateFunc: func(event event.UpdateEvent) bool { + // do not requeue failed object updates + instance, ok := event.ObjectNew.(*rhtasv1alpha1.CTlog) + if !ok { + return false + } + if c := meta.FindStatusCondition(instance.Status.Conditions, constants.Ready); c != nil { + return c.Reason != constants.Failure + } + return true + }}))). Owns(&v1.Deployment{}). Owns(&v12.Service{}). WatchesMetadata(partialSecret, handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, object client.Object) []reconcile.Request { diff --git a/internal/controller/ctlog/ctlog_controller_test.go b/internal/controller/ctlog/ctlog_controller_test.go index d45967785..bc0222937 100644 --- a/internal/controller/ctlog/ctlog_controller_test.go +++ b/internal/controller/ctlog/ctlog_controller_test.go @@ -42,15 +42,14 @@ var _ = Describe("CTlog controller", func() { const ( Name = "test" - Namespace = "default" + Namespace = "test" ) ctx := context.Background() namespace := &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ - Name: Name, - Namespace: Namespace, + Name: Namespace, }, } diff --git a/internal/controller/ctlog/ctlog_error_handler_test.go b/internal/controller/ctlog/ctlog_error_handler_test.go new file mode 100644 index 000000000..75da9c3de --- /dev/null +++ b/internal/controller/ctlog/ctlog_error_handler_test.go @@ -0,0 +1,174 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ctlog + +import ( + "context" + "os" + "time" + + "github.com/securesign/operator/internal/controller/common/utils" + "github.com/securesign/operator/internal/controller/common/utils/kubernetes" + fulcio "github.com/securesign/operator/internal/controller/fulcio/actions" + trillian "github.com/securesign/operator/internal/controller/trillian/actions" + appsv1 "k8s.io/api/apps/v1" + runtimeClient "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/securesign/operator/api/v1alpha1" + "github.com/securesign/operator/internal/controller/constants" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +var _ = Describe("CTlog ErrorHandler", func() { + Context("CTlog ErrorHandler test", func() { + + const ( + Name = "test" + Namespace = "errorhandler" + ) + + ctx := context.Background() + + namespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: Namespace, + }, + } + + typeNamespaceName := types.NamespacedName{Name: Name, Namespace: Namespace} + instance := &v1alpha1.CTlog{} + + BeforeEach(func() { + // workaround - disable "host" mode in CreateTrillianTree function + Expect(os.Setenv("CONTAINER_MODE", "true")).To(Not(HaveOccurred())) + + By("Creating the Namespace to perform the tests") + err := k8sClient.Create(ctx, namespace) + Expect(err).To(Not(HaveOccurred())) + }) + + AfterEach(func() { + By("removing the custom resource for the Kind CTlog") + found := &v1alpha1.CTlog{} + err := k8sClient.Get(ctx, typeNamespaceName, found) + Expect(err).To(Not(HaveOccurred())) + + Eventually(func() error { + return k8sClient.Delete(context.TODO(), found) + }, 2*time.Minute, time.Second).Should(Succeed()) + + // TODO(user): Attention if you improve this code by adding other context test you MUST + // be aware of the current delete namespace limitations. + // More info: https://book.kubebuilder.io/reference/envtest.html#testing-considerations + By("Deleting the Namespace to perform the tests") + _ = k8sClient.Delete(ctx, namespace) + }) + + It("should successfully reconcile a custom resource for CTlog", func() { + By("creating the custom resource for the Kind CTlog") + err := k8sClient.Get(ctx, typeNamespaceName, instance) + if err != nil && errors.IsNotFound(err) { + // Let's mock our custom resource at the same way that we would + // apply on the cluster the manifest under config/samples + instance := &v1alpha1.CTlog{ + ObjectMeta: metav1.ObjectMeta{ + Name: Name, + Namespace: Namespace, + }, + Spec: v1alpha1.CTlogSpec{}, + } + err = k8sClient.Create(ctx, instance) + Expect(err).To(Not(HaveOccurred())) + } + + Expect(k8sClient.Create(ctx, kubernetes.CreateSecret("test", Namespace, + map[string][]byte{"cert": []byte("fakeCert")}, + map[string]string{fulcio.FulcioCALabel: "cert"}, + ))).To(Succeed()) + + Expect(k8sClient.Create(ctx, kubernetes.CreateService(Namespace, trillian.LogserverDeploymentName, trillian.ServerPortName, trillian.ServerPort, constants.LabelsForComponent(trillian.LogServerComponentName, instance.Name)))).To(Succeed()) + + found := &v1alpha1.CTlog{} + + By("Deployment should fail") + Eventually(func() string { + + Expect(k8sClient.Get(ctx, typeNamespaceName, found)).Should(Succeed()) + condition := meta.FindStatusCondition(found.Status.Conditions, constants.Ready) + if condition == nil { + return "" + } + return condition.Reason + }).Should(Equal(constants.Failure)) + + key := found.Status.PrivateKeyRef.Name + Expect(key).To(Not(BeEmpty())) + + By("Periodically trying to restart deployment") + Eventually(func() string { + found := &v1alpha1.CTlog{} + Expect(k8sClient.Get(ctx, typeNamespaceName, found)).Should(Succeed()) + return meta.FindStatusCondition(found.Status.Conditions, constants.Ready).Reason + }).Should(Not(Equal(constants.Failure))) + Eventually(func() string { + found := &v1alpha1.CTlog{} + Expect(k8sClient.Get(ctx, typeNamespaceName, found)).Should(Succeed()) + return meta.FindStatusCondition(found.Status.Conditions, constants.Ready).Reason + }).Should(Equal(constants.Failure)) + + By("After fixing the problem the CTlog instance is Ready") + Eventually(func() error { + Expect(k8sClient.Get(ctx, typeNamespaceName, found)).Should(Succeed()) + found.Spec.TreeID = utils.Pointer(int64(1)) + return k8sClient.Update(ctx, found) + }).Should(Succeed()) + + By("Waiting until CTlog instance is Initialization") + Eventually(func() string { + found := &v1alpha1.CTlog{} + Expect(k8sClient.Get(ctx, typeNamespaceName, found)).Should(Succeed()) + return meta.FindStatusCondition(found.Status.Conditions, constants.Ready).Reason + }).Should(Equal(constants.Initialize)) + + deployments := &appsv1.DeploymentList{} + Expect(k8sClient.List(ctx, deployments, runtimeClient.InNamespace(Namespace))).To(Succeed()) + By("Move to Ready phase") + for _, d := range deployments.Items { + d.Status.Conditions = []appsv1.DeploymentCondition{ + {Status: corev1.ConditionTrue, Type: appsv1.DeploymentAvailable, Reason: constants.Ready}} + Expect(k8sClient.Status().Update(ctx, &d)).Should(Succeed()) + } + // Workaround to succeed condition for Ready phase + + Eventually(func() bool { + found := &v1alpha1.CTlog{} + Expect(k8sClient.Get(ctx, typeNamespaceName, found)).Should(Succeed()) + return meta.IsStatusConditionTrue(found.Status.Conditions, constants.Ready) + }).Should(BeTrue()) + + By("Pregenerated resources are reused") + Expect(key).To(Equal(found.Status.PrivateKeyRef.Name)) + }) + }) +}) diff --git a/internal/controller/rekor/rekor_error_handler_test.go b/internal/controller/rekor/rekor_error_handler_test.go index 70d83a0e6..39feb2af7 100644 --- a/internal/controller/rekor/rekor_error_handler_test.go +++ b/internal/controller/rekor/rekor_error_handler_test.go @@ -112,8 +112,7 @@ var _ = Describe("Rekor ErrorHandler", func() { err = k8sClient.Create(ctx, instance) Expect(err).To(Not(HaveOccurred())) } - err = k8sClient.Create(ctx, kubernetes.CreateService(Namespace, trillian.LogserverDeploymentName, 8090, instance.Labels)) - Expect(err).To(Not(HaveOccurred())) + Expect(k8sClient.Create(ctx, kubernetes.CreateService(Namespace, trillian.LogserverDeploymentName, trillian.ServerPortName, trillian.ServerPort, constants.LabelsForComponent(trillian.LogServerComponentName, instance.Name)))).To(Succeed()) found := &v1alpha1.Rekor{}