Skip to content

Commit

Permalink
Add prometheus metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
wweiwei-li committed Feb 20, 2025
1 parent 526b830 commit 5a7af88
Show file tree
Hide file tree
Showing 28 changed files with 693 additions and 188 deletions.
36 changes: 26 additions & 10 deletions controllers/elbv2/targetgroupbinding_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ package controllers
import (
"context"
"fmt"
"time"

discv1 "k8s.io/api/discovery/v1"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/pkg/errors"
Expand All @@ -31,12 +32,14 @@ import (
"k8s.io/client-go/util/workqueue"
"sigs.k8s.io/aws-load-balancer-controller/controllers/elbv2/eventhandlers"
"sigs.k8s.io/aws-load-balancer-controller/pkg/config"
errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error"
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s"
"sigs.k8s.io/aws-load-balancer-controller/pkg/runtime"
"sigs.k8s.io/aws-load-balancer-controller/pkg/targetgroupbinding"
"sigs.k8s.io/controller-runtime/pkg/controller"

"github.com/go-logr/logr"
lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

Expand All @@ -51,7 +54,7 @@ const (
// NewTargetGroupBindingReconciler constructs new targetGroupBindingReconciler
func NewTargetGroupBindingReconciler(k8sClient client.Client, eventRecorder record.EventRecorder, finalizerManager k8s.FinalizerManager,
tgbResourceManager targetgroupbinding.ResourceManager, config config.ControllerConfig, deferredTargetGroupBindingReconciler DeferredTargetGroupBindingReconciler,
logger logr.Logger) *targetGroupBindingReconciler {
logger logr.Logger, metricsCollector lbcmetrics.MetricCollector) *targetGroupBindingReconciler {

return &targetGroupBindingReconciler{
k8sClient: k8sClient,
Expand All @@ -60,6 +63,7 @@ func NewTargetGroupBindingReconciler(k8sClient client.Client, eventRecorder reco
tgbResourceManager: tgbResourceManager,
deferredTargetGroupBindingReconciler: deferredTargetGroupBindingReconciler,
logger: logger,
metricsCollector: metricsCollector,

maxConcurrentReconciles: config.TargetGroupBindingMaxConcurrentReconciles,
maxExponentialBackoffDelay: config.TargetGroupBindingMaxExponentialBackoffDelay,
Expand All @@ -75,6 +79,7 @@ type targetGroupBindingReconciler struct {
tgbResourceManager targetgroupbinding.ResourceManager
deferredTargetGroupBindingReconciler DeferredTargetGroupBindingReconciler
logger logr.Logger
metricsCollector lbcmetrics.MetricCollector

maxConcurrentReconciles int
maxExponentialBackoffDelay time.Duration
Expand Down Expand Up @@ -110,25 +115,36 @@ func (r *targetGroupBindingReconciler) reconcile(ctx context.Context, req reconc
}

func (r *targetGroupBindingReconciler) reconcileTargetGroupBinding(ctx context.Context, tgb *elbv2api.TargetGroupBinding) error {
if err := r.finalizerManager.AddFinalizers(ctx, tgb, targetGroupBindingFinalizer); err != nil {
var err error
finalizerFn := func() {
err = r.finalizerManager.AddFinalizers(ctx, tgb, targetGroupBindingFinalizer)
}
r.metricsCollector.ObserveControllerReconcileLatency("targetGroupBinding", "add_finalizers", finalizerFn)
if err != nil {
r.eventRecorder.Event(tgb, corev1.EventTypeWarning, k8s.TargetGroupBindingEventReasonFailedAddFinalizer, fmt.Sprintf("Failed add finalizer due to %v", err))
return err
return errmetrics.NewErrorWithMetrics("targetGroupBinding", "add_finalizers_error", err, r.metricsCollector)
}

deferred, err := r.tgbResourceManager.Reconcile(ctx, tgb)

var deferred bool
tgbResourceFn := func() {
deferred, err = r.tgbResourceManager.Reconcile(ctx, tgb)
}
r.metricsCollector.ObserveControllerReconcileLatency("targetGroupBinding", "reconcile_targetgroupblinding", tgbResourceFn)
if err != nil {
return err
return errmetrics.NewErrorWithMetrics("targetGroupBinding", "reconcile_targetgroupblinding_error", err, r.metricsCollector)
}

if deferred {
r.deferredTargetGroupBindingReconciler.Enqueue(tgb)
return nil
}

if err := r.updateTargetGroupBindingStatus(ctx, tgb); err != nil {
r.eventRecorder.Event(tgb, corev1.EventTypeWarning, k8s.TargetGroupBindingEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err))
return err
updateTargetGroupBindingStatusFn := func() {
err = r.updateTargetGroupBindingStatus(ctx, tgb)
}
defer r.metricsCollector.ObserveControllerReconcileLatency("targetGroupBinding", "update_status", updateTargetGroupBindingStatusFn)
if err != nil {
return errmetrics.NewErrorWithMetrics("targetGroupBinding", "update_status_error", err, r.metricsCollector)
}

r.eventRecorder.Event(tgb, corev1.EventTypeNormal, k8s.TargetGroupBindingEventReasonSuccessfullyReconciled, "Successfully reconciled")
Expand Down
79 changes: 59 additions & 20 deletions controllers/ingress/group_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package ingress
import (
"context"
"fmt"

"sigs.k8s.io/controller-runtime/pkg/reconcile"

"github.com/go-logr/logr"
Expand All @@ -21,8 +22,10 @@ import (
"sigs.k8s.io/aws-load-balancer-controller/pkg/deploy"
elbv2deploy "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2"
"sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/tracking"
errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error"
"sigs.k8s.io/aws-load-balancer-controller/pkg/ingress"
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s"
lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc"
"sigs.k8s.io/aws-load-balancer-controller/pkg/model/core"
elbv2model "sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2"
networkingpkg "sigs.k8s.io/aws-load-balancer-controller/pkg/networking"
Expand All @@ -48,7 +51,7 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco
finalizerManager k8s.FinalizerManager, networkingSGManager networkingpkg.SecurityGroupManager,
networkingSGReconciler networkingpkg.SecurityGroupReconciler, subnetsResolver networkingpkg.SubnetsResolver,
elbv2TaggingManager elbv2deploy.TaggingManager, controllerConfig config.ControllerConfig, backendSGProvider networkingpkg.BackendSGProvider,
sgResolver networkingpkg.SecurityGroupResolver, logger logr.Logger) *groupReconciler {
sgResolver networkingpkg.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector) *groupReconciler {

annotationParser := annotations.NewSuffixAnnotationParser(annotations.AnnotationPrefixIngress)
authConfigBuilder := ingress.NewDefaultAuthConfigBuilder(annotationParser)
Expand All @@ -61,10 +64,10 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco
authConfigBuilder, enhancedBackendBuilder, trackingProvider, elbv2TaggingManager, controllerConfig.FeatureGates,
cloud.VpcID(), controllerConfig.ClusterName, controllerConfig.DefaultTags, controllerConfig.ExternalManagedTags,
controllerConfig.DefaultSSLPolicy, controllerConfig.DefaultTargetType, controllerConfig.DefaultLoadBalancerScheme, backendSGProvider, sgResolver,
controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, controllerConfig.IngressConfig.AllowedCertificateAuthorityARNs, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), logger)
controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, controllerConfig.IngressConfig.AllowedCertificateAuthorityARNs, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), logger, metricsCollector)
stackMarshaller := deploy.NewDefaultStackMarshaller()
stackDeployer := deploy.NewDefaultStackDeployer(cloud, k8sClient, networkingSGManager, networkingSGReconciler, elbv2TaggingManager,
controllerConfig, ingressTagPrefix, logger)
controllerConfig, ingressTagPrefix, logger, metricsCollector, controllerName)
classLoader := ingress.NewDefaultClassLoader(k8sClient, true)
classAnnotationMatcher := ingress.NewDefaultClassAnnotationMatcher(controllerConfig.IngressConfig.IngressClass)
manageIngressesWithoutIngressClass := controllerConfig.IngressConfig.IngressClass == ""
Expand All @@ -83,6 +86,8 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco
groupLoader: groupLoader,
groupFinalizerManager: groupFinalizerManager,
logger: logger,
metricsCollector: metricsCollector,
controllerName: controllerName,

maxConcurrentReconciles: controllerConfig.IngressConfig.MaxConcurrentReconciles,
}
Expand All @@ -102,6 +107,8 @@ type groupReconciler struct {
groupLoader ingress.GroupLoader
groupFinalizerManager ingress.FinalizerManager
logger logr.Logger
metricsCollector lbcmetrics.MetricCollector
controllerName string

maxConcurrentReconciles int
}
Expand All @@ -121,33 +128,53 @@ func (r *groupReconciler) Reconcile(ctx context.Context, req reconcile.Request)

func (r *groupReconciler) reconcile(ctx context.Context, req reconcile.Request) error {
ingGroupID := ingress.DecodeGroupIDFromReconcileRequest(req)
ingGroup, err := r.groupLoader.Load(ctx, ingGroupID)
var err error
var ingGroup ingress.Group
loadIngressFn := func() {
ingGroup, err = r.groupLoader.Load(ctx, ingGroupID)
}
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "fetch_ingress", loadIngressFn)
if err != nil {
return err
return errmetrics.NewErrorWithMetrics("ingress", "fetch_ingress_error", err, r.metricsCollector)
}

if err := r.groupFinalizerManager.AddGroupFinalizer(ctx, ingGroupID, ingGroup.Members); err != nil {
addFinalizerFn := func() {
err = r.groupFinalizerManager.AddGroupFinalizer(ctx, ingGroupID, ingGroup.Members)
}
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "add_group_finalizer", addFinalizerFn)
if err != nil {
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedAddFinalizer, fmt.Sprintf("Failed add finalizer due to %v", err))
return err
return errmetrics.NewErrorWithMetrics("ingress", "add_group_finalizer_error", err, r.metricsCollector)
}

var lb *elbv2model.LoadBalancer
buildAndDeployModelFn := func() {
_, lb, err = r.buildAndDeployModel(ctx, ingGroup)
}
_, lb, err := r.buildAndDeployModel(ctx, ingGroup)
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "build_and_deploy_model", buildAndDeployModelFn)
if err != nil {
return err
return errmetrics.NewErrorWithMetrics("ingress", "build_and_deploy_model_error", err, r.metricsCollector)
}

if len(ingGroup.Members) > 0 && lb != nil {
lbDNS, err := lb.DNSName().Resolve(ctx)
if err != nil {
return err
}
if err := r.updateIngressGroupStatus(ctx, ingGroup, lbDNS); err != nil {
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err))
return err
dnsResolveAndUpdateStatus := func() {
lbDNS, err := lb.DNSName().Resolve(ctx)
if err != nil {
return
}
if err := r.updateIngressGroupStatus(ctx, ingGroup, lbDNS); err != nil {
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err))
}
}
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "dns_resolve_and_update_status", dnsResolveAndUpdateStatus)
}

if len(ingGroup.InactiveMembers) > 0 {
if err := r.groupFinalizerManager.RemoveGroupFinalizer(ctx, ingGroupID, ingGroup.InactiveMembers); err != nil {
removeGroupFinalizerFn := func() {
err = r.groupFinalizerManager.RemoveGroupFinalizer(ctx, ingGroupID, ingGroup.InactiveMembers)
}
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "remove_group_finalizer", removeGroupFinalizerFn)
if err != nil {
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedRemoveFinalizer, fmt.Sprintf("Failed remove finalizer due to %v", err))
return err
}
Expand All @@ -158,7 +185,15 @@ func (r *groupReconciler) reconcile(ctx context.Context, req reconcile.Request)
}

func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingress.Group) (core.Stack, *elbv2model.LoadBalancer, error) {
stack, lb, secrets, backendSGRequired, err := r.modelBuilder.Build(ctx, ingGroup)
var stack core.Stack
var lb *elbv2model.LoadBalancer
var secrets []types.NamespacedName
var backendSGRequired bool
var err error
buildModelFn := func() {
stack, lb, secrets, backendSGRequired, err = r.modelBuilder.Build(ctx, ingGroup, r.metricsCollector)
}
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "build_model", buildModelFn)
if err != nil {
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedBuildModel, fmt.Sprintf("Failed build model due to %v", err))
return nil, nil, err
Expand All @@ -170,7 +205,11 @@ func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingr
}
r.logger.Info("successfully built model", "model", stackJSON)

if err := r.stackDeployer.Deploy(ctx, stack); err != nil {
deployModelFn := func() {
err = r.stackDeployer.Deploy(ctx, stack, r.metricsCollector, "ingress")
}
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "deploy_model", deployModelFn)
if err != nil {
var requeueNeededAfter *runtime.RequeueNeededAfter
if errors.As(err, &requeueNeededAfter) {
return nil, nil, err
Expand All @@ -186,7 +225,7 @@ func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingr
inactiveResources = append(inactiveResources, k8s.ToSliceOfNamespacedNames(ingGroup.Members)...)
}
if err := r.backendSGProvider.Release(ctx, networkingpkg.ResourceTypeIngress, inactiveResources); err != nil {
return nil, nil, err
return nil, nil, errmetrics.NewErrorWithMetrics("ingress", "release_auto_generated_backend_sg_error", err, r.metricsCollector)
}
return stack, lb, nil
}
Expand Down
Loading

0 comments on commit 5a7af88

Please sign in to comment.