Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add prometheus metrics #4056

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 30 additions & 10 deletions controllers/elbv2/targetgroupbinding_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ package controllers
import (
"context"
"fmt"
"time"

discv1 "k8s.io/api/discovery/v1"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/pkg/errors"
Expand All @@ -31,16 +32,19 @@ import (
"k8s.io/client-go/util/workqueue"
"sigs.k8s.io/aws-load-balancer-controller/controllers/elbv2/eventhandlers"
"sigs.k8s.io/aws-load-balancer-controller/pkg/config"
errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error"
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s"
"sigs.k8s.io/aws-load-balancer-controller/pkg/runtime"
"sigs.k8s.io/aws-load-balancer-controller/pkg/targetgroupbinding"
"sigs.k8s.io/controller-runtime/pkg/controller"

"github.com/go-logr/logr"
lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1"
metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util"
)

const (
Expand All @@ -51,7 +55,7 @@ const (
// NewTargetGroupBindingReconciler constructs new targetGroupBindingReconciler
func NewTargetGroupBindingReconciler(k8sClient client.Client, eventRecorder record.EventRecorder, finalizerManager k8s.FinalizerManager,
tgbResourceManager targetgroupbinding.ResourceManager, config config.ControllerConfig, deferredTargetGroupBindingReconciler DeferredTargetGroupBindingReconciler,
logger logr.Logger) *targetGroupBindingReconciler {
logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) *targetGroupBindingReconciler {

return &targetGroupBindingReconciler{
k8sClient: k8sClient,
Expand All @@ -60,6 +64,8 @@ func NewTargetGroupBindingReconciler(k8sClient client.Client, eventRecorder reco
tgbResourceManager: tgbResourceManager,
deferredTargetGroupBindingReconciler: deferredTargetGroupBindingReconciler,
logger: logger,
metricsCollector: metricsCollector,
reconcileCounters: reconcileCounters,

maxConcurrentReconciles: config.TargetGroupBindingMaxConcurrentReconciles,
maxExponentialBackoffDelay: config.TargetGroupBindingMaxExponentialBackoffDelay,
Expand All @@ -75,6 +81,8 @@ type targetGroupBindingReconciler struct {
tgbResourceManager targetgroupbinding.ResourceManager
deferredTargetGroupBindingReconciler DeferredTargetGroupBindingReconciler
logger logr.Logger
metricsCollector lbcmetrics.MetricCollector
reconcileCounters *metricsutil.ReconcileCounters

maxConcurrentReconciles int
maxExponentialBackoffDelay time.Duration
Expand All @@ -93,6 +101,7 @@ type targetGroupBindingReconciler struct {
// +kubebuilder:rbac:groups="discovery.k8s.io",resources=endpointslices,verbs=get;list;watch

func (r *targetGroupBindingReconciler) Reconcile(ctx context.Context, req reconcile.Request) (ctrl.Result, error) {
r.reconcileCounters.IncrementTGB(req.NamespacedName)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice!

r.logger.V(1).Info("Reconcile request", "name", req.Name)
return runtime.HandleReconcileError(r.reconcile(ctx, req), r.logger)
}
Expand All @@ -110,25 +119,36 @@ func (r *targetGroupBindingReconciler) reconcile(ctx context.Context, req reconc
}

func (r *targetGroupBindingReconciler) reconcileTargetGroupBinding(ctx context.Context, tgb *elbv2api.TargetGroupBinding) error {
if err := r.finalizerManager.AddFinalizers(ctx, tgb, targetGroupBindingFinalizer); err != nil {
var err error
finalizerFn := func() {
err = r.finalizerManager.AddFinalizers(ctx, tgb, targetGroupBindingFinalizer)
}
r.metricsCollector.ObserveControllerReconcileLatency("targetGroupBinding", "add_finalizers", finalizerFn)
if err != nil {
r.eventRecorder.Event(tgb, corev1.EventTypeWarning, k8s.TargetGroupBindingEventReasonFailedAddFinalizer, fmt.Sprintf("Failed add finalizer due to %v", err))
return err
return errmetrics.NewErrorWithMetrics("targetGroupBinding", "add_finalizers_error", err, r.metricsCollector)
}

deferred, err := r.tgbResourceManager.Reconcile(ctx, tgb)

var deferred bool
tgbResourceFn := func() {
deferred, err = r.tgbResourceManager.Reconcile(ctx, tgb)
}
r.metricsCollector.ObserveControllerReconcileLatency("targetGroupBinding", "reconcile_targetgroupblinding", tgbResourceFn)
if err != nil {
return err
return errmetrics.NewErrorWithMetrics("targetGroupBinding", "reconcile_targetgroupblinding_error", err, r.metricsCollector)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo : blinding -> binding

}

if deferred {
r.deferredTargetGroupBindingReconciler.Enqueue(tgb)
return nil
}

if err := r.updateTargetGroupBindingStatus(ctx, tgb); err != nil {
r.eventRecorder.Event(tgb, corev1.EventTypeWarning, k8s.TargetGroupBindingEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err))
return err
updateTargetGroupBindingStatusFn := func() {
err = r.updateTargetGroupBindingStatus(ctx, tgb)
}
r.metricsCollector.ObserveControllerReconcileLatency("targetGroupBinding", "update_status", updateTargetGroupBindingStatusFn)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please define the resource as a const, it's re-used for every metric collection step. I think the step names are unique and don't need be consts.

if err != nil {
return errmetrics.NewErrorWithMetrics("targetGroupBinding", "update_status_error", err, r.metricsCollector)
}

r.eventRecorder.Event(tgb, corev1.EventTypeNormal, k8s.TargetGroupBindingEventReasonSuccessfullyReconciled, "Successfully reconciled")
Expand Down
81 changes: 60 additions & 21 deletions controllers/ingress/group_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package ingress
import (
"context"
"fmt"

"sigs.k8s.io/controller-runtime/pkg/reconcile"

"github.com/go-logr/logr"
Expand All @@ -21,8 +22,11 @@ import (
"sigs.k8s.io/aws-load-balancer-controller/pkg/deploy"
elbv2deploy "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2"
"sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/tracking"
errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error"
"sigs.k8s.io/aws-load-balancer-controller/pkg/ingress"
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s"
lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc"
metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util"
"sigs.k8s.io/aws-load-balancer-controller/pkg/model/core"
elbv2model "sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2"
networkingpkg "sigs.k8s.io/aws-load-balancer-controller/pkg/networking"
Expand All @@ -48,7 +52,7 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco
finalizerManager k8s.FinalizerManager, networkingSGManager networkingpkg.SecurityGroupManager,
networkingSGReconciler networkingpkg.SecurityGroupReconciler, subnetsResolver networkingpkg.SubnetsResolver,
elbv2TaggingManager elbv2deploy.TaggingManager, controllerConfig config.ControllerConfig, backendSGProvider networkingpkg.BackendSGProvider,
sgResolver networkingpkg.SecurityGroupResolver, logger logr.Logger) *groupReconciler {
sgResolver networkingpkg.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) *groupReconciler {

annotationParser := annotations.NewSuffixAnnotationParser(annotations.AnnotationPrefixIngress)
authConfigBuilder := ingress.NewDefaultAuthConfigBuilder(annotationParser)
Expand All @@ -61,10 +65,10 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco
authConfigBuilder, enhancedBackendBuilder, trackingProvider, elbv2TaggingManager, controllerConfig.FeatureGates,
cloud.VpcID(), controllerConfig.ClusterName, controllerConfig.DefaultTags, controllerConfig.ExternalManagedTags,
controllerConfig.DefaultSSLPolicy, controllerConfig.DefaultTargetType, controllerConfig.DefaultLoadBalancerScheme, backendSGProvider, sgResolver,
controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, controllerConfig.IngressConfig.AllowedCertificateAuthorityARNs, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), logger)
controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, controllerConfig.IngressConfig.AllowedCertificateAuthorityARNs, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), logger, metricsCollector)
stackMarshaller := deploy.NewDefaultStackMarshaller()
stackDeployer := deploy.NewDefaultStackDeployer(cloud, k8sClient, networkingSGManager, networkingSGReconciler, elbv2TaggingManager,
controllerConfig, ingressTagPrefix, logger)
controllerConfig, ingressTagPrefix, logger, metricsCollector, controllerName)
classLoader := ingress.NewDefaultClassLoader(k8sClient, true)
classAnnotationMatcher := ingress.NewDefaultClassAnnotationMatcher(controllerConfig.IngressConfig.IngressClass)
manageIngressesWithoutIngressClass := controllerConfig.IngressConfig.IngressClass == ""
Expand All @@ -83,6 +87,9 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco
groupLoader: groupLoader,
groupFinalizerManager: groupFinalizerManager,
logger: logger,
metricsCollector: metricsCollector,
controllerName: controllerName,
reconcileCounters: reconcileCounters,

maxConcurrentReconciles: controllerConfig.IngressConfig.MaxConcurrentReconciles,
}
Expand All @@ -102,6 +109,9 @@ type groupReconciler struct {
groupLoader ingress.GroupLoader
groupFinalizerManager ingress.FinalizerManager
logger logr.Logger
metricsCollector lbcmetrics.MetricCollector
controllerName string
reconcileCounters *metricsutil.ReconcileCounters

maxConcurrentReconciles int
}
Expand All @@ -116,40 +126,57 @@ type groupReconciler struct {
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch

func (r *groupReconciler) Reconcile(ctx context.Context, req reconcile.Request) (ctrl.Result, error) {
r.reconcileCounters.IncrementIngress(req.NamespacedName)
return runtime.HandleReconcileError(r.reconcile(ctx, req), r.logger)
}

func (r *groupReconciler) reconcile(ctx context.Context, req reconcile.Request) error {
ingGroupID := ingress.DecodeGroupIDFromReconcileRequest(req)
ingGroup, err := r.groupLoader.Load(ctx, ingGroupID)
var err error
var ingGroup ingress.Group
loadIngressFn := func() {
ingGroup, err = r.groupLoader.Load(ctx, ingGroupID)
}
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "fetch_ingress", loadIngressFn)
if err != nil {
return err
return errmetrics.NewErrorWithMetrics("ingress", "fetch_ingress_error", err, r.metricsCollector)
}

if err := r.groupFinalizerManager.AddGroupFinalizer(ctx, ingGroupID, ingGroup.Members); err != nil {
addFinalizerFn := func() {
err = r.groupFinalizerManager.AddGroupFinalizer(ctx, ingGroupID, ingGroup.Members)
}
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "add_group_finalizer", addFinalizerFn)
if err != nil {
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedAddFinalizer, fmt.Sprintf("Failed add finalizer due to %v", err))
return err
return errmetrics.NewErrorWithMetrics("ingress", "add_group_finalizer_error", err, r.metricsCollector)
}

_, lb, err := r.buildAndDeployModel(ctx, ingGroup)
if err != nil {
return err
}

if len(ingGroup.Members) > 0 && lb != nil {
lbDNS, err := lb.DNSName().Resolve(ctx)
if err != nil {
return err
}
if err := r.updateIngressGroupStatus(ctx, ingGroup, lbDNS); err != nil {
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err))
return err
dnsResolveAndUpdateStatus := func() {
lbDNS, err := lb.DNSName().Resolve(ctx)
if err != nil {
return
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe you still need to propagate this error to the parent function. Right now, it will silently fail.

}
if err := r.updateIngressGroupStatus(ctx, ingGroup, lbDNS); err != nil {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment, when using this nested function approach we need to propagate the error to the parent function.

r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err))
}
}
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "dns_resolve_and_update_status", dnsResolveAndUpdateStatus)
}

if len(ingGroup.InactiveMembers) > 0 {
if err := r.groupFinalizerManager.RemoveGroupFinalizer(ctx, ingGroupID, ingGroup.InactiveMembers); err != nil {
removeGroupFinalizerFn := func() {
err = r.groupFinalizerManager.RemoveGroupFinalizer(ctx, ingGroupID, ingGroup.InactiveMembers)
}
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "remove_group_finalizer", removeGroupFinalizerFn)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please define ingress as a constant.

if err != nil {
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedRemoveFinalizer, fmt.Sprintf("Failed remove finalizer due to %v", err))
return err
return errmetrics.NewErrorWithMetrics("ingress", "remove_group_finalizer_error", err, r.metricsCollector)
}
}

Expand All @@ -158,10 +185,18 @@ func (r *groupReconciler) reconcile(ctx context.Context, req reconcile.Request)
}

func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingress.Group) (core.Stack, *elbv2model.LoadBalancer, error) {
stack, lb, secrets, backendSGRequired, err := r.modelBuilder.Build(ctx, ingGroup)
var stack core.Stack
var lb *elbv2model.LoadBalancer
var secrets []types.NamespacedName
var backendSGRequired bool
var err error
buildModelFn := func() {
stack, lb, secrets, backendSGRequired, err = r.modelBuilder.Build(ctx, ingGroup, r.metricsCollector)
}
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "build_model", buildModelFn)
if err != nil {
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedBuildModel, fmt.Sprintf("Failed build model due to %v", err))
return nil, nil, err
return nil, nil, errmetrics.NewErrorWithMetrics("ingress", "build_model_error", err, r.metricsCollector)
}
stackJSON, err := r.stackMarshaller.Marshal(stack)
if err != nil {
Expand All @@ -170,13 +205,17 @@ func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingr
}
r.logger.Info("successfully built model", "model", stackJSON)

if err := r.stackDeployer.Deploy(ctx, stack); err != nil {
deployModelFn := func() {
err = r.stackDeployer.Deploy(ctx, stack, r.metricsCollector, "ingress")
}
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "deploy_model", deployModelFn)
if err != nil {
var requeueNeededAfter *runtime.RequeueNeededAfter
if errors.As(err, &requeueNeededAfter) {
return nil, nil, err
}
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedDeployModel, fmt.Sprintf("Failed deploy model due to %v", err))
return nil, nil, err
return nil, nil, errmetrics.NewErrorWithMetrics("ingress", "deploy_model_error", err, r.metricsCollector)
}
r.logger.Info("successfully deployed model", "ingressGroup", ingGroup.ID)
r.secretsManager.MonitorSecrets(ingGroup.ID.String(), secrets)
Expand All @@ -186,7 +225,7 @@ func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingr
inactiveResources = append(inactiveResources, k8s.ToSliceOfNamespacedNames(ingGroup.Members)...)
}
if err := r.backendSGProvider.Release(ctx, networkingpkg.ResourceTypeIngress, inactiveResources); err != nil {
return nil, nil, err
return nil, nil, errmetrics.NewErrorWithMetrics("ingress", "release_auto_generated_backend_sg_error", err, r.metricsCollector)
}
return stack, lb, nil
}
Expand Down
Loading
Loading