-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add prometheus metrics #4056
base: main
Are you sure you want to change the base?
add prometheus metrics #4056
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,10 +19,11 @@ package controllers | |
import ( | ||
"context" | ||
"fmt" | ||
"time" | ||
|
||
discv1 "k8s.io/api/discovery/v1" | ||
"sigs.k8s.io/controller-runtime/pkg/handler" | ||
"sigs.k8s.io/controller-runtime/pkg/reconcile" | ||
"time" | ||
|
||
"github.com/aws/aws-sdk-go-v2/aws" | ||
"github.com/pkg/errors" | ||
|
@@ -31,16 +32,19 @@ import ( | |
"k8s.io/client-go/util/workqueue" | ||
"sigs.k8s.io/aws-load-balancer-controller/controllers/elbv2/eventhandlers" | ||
"sigs.k8s.io/aws-load-balancer-controller/pkg/config" | ||
errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error" | ||
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s" | ||
"sigs.k8s.io/aws-load-balancer-controller/pkg/runtime" | ||
"sigs.k8s.io/aws-load-balancer-controller/pkg/targetgroupbinding" | ||
"sigs.k8s.io/controller-runtime/pkg/controller" | ||
|
||
"github.com/go-logr/logr" | ||
lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" | ||
ctrl "sigs.k8s.io/controller-runtime" | ||
"sigs.k8s.io/controller-runtime/pkg/client" | ||
|
||
elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1" | ||
metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util" | ||
) | ||
|
||
const ( | ||
|
@@ -51,7 +55,7 @@ const ( | |
// NewTargetGroupBindingReconciler constructs new targetGroupBindingReconciler | ||
func NewTargetGroupBindingReconciler(k8sClient client.Client, eventRecorder record.EventRecorder, finalizerManager k8s.FinalizerManager, | ||
tgbResourceManager targetgroupbinding.ResourceManager, config config.ControllerConfig, deferredTargetGroupBindingReconciler DeferredTargetGroupBindingReconciler, | ||
logger logr.Logger) *targetGroupBindingReconciler { | ||
logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) *targetGroupBindingReconciler { | ||
|
||
return &targetGroupBindingReconciler{ | ||
k8sClient: k8sClient, | ||
|
@@ -60,6 +64,8 @@ func NewTargetGroupBindingReconciler(k8sClient client.Client, eventRecorder reco | |
tgbResourceManager: tgbResourceManager, | ||
deferredTargetGroupBindingReconciler: deferredTargetGroupBindingReconciler, | ||
logger: logger, | ||
metricsCollector: metricsCollector, | ||
reconcileCounters: reconcileCounters, | ||
|
||
maxConcurrentReconciles: config.TargetGroupBindingMaxConcurrentReconciles, | ||
maxExponentialBackoffDelay: config.TargetGroupBindingMaxExponentialBackoffDelay, | ||
|
@@ -75,6 +81,8 @@ type targetGroupBindingReconciler struct { | |
tgbResourceManager targetgroupbinding.ResourceManager | ||
deferredTargetGroupBindingReconciler DeferredTargetGroupBindingReconciler | ||
logger logr.Logger | ||
metricsCollector lbcmetrics.MetricCollector | ||
reconcileCounters *metricsutil.ReconcileCounters | ||
|
||
maxConcurrentReconciles int | ||
maxExponentialBackoffDelay time.Duration | ||
|
@@ -93,6 +101,7 @@ type targetGroupBindingReconciler struct { | |
// +kubebuilder:rbac:groups="discovery.k8s.io",resources=endpointslices,verbs=get;list;watch | ||
|
||
func (r *targetGroupBindingReconciler) Reconcile(ctx context.Context, req reconcile.Request) (ctrl.Result, error) { | ||
r.reconcileCounters.IncrementTGB(req.NamespacedName) | ||
r.logger.V(1).Info("Reconcile request", "name", req.Name) | ||
return runtime.HandleReconcileError(r.reconcile(ctx, req), r.logger) | ||
} | ||
|
@@ -110,25 +119,36 @@ func (r *targetGroupBindingReconciler) reconcile(ctx context.Context, req reconc | |
} | ||
|
||
func (r *targetGroupBindingReconciler) reconcileTargetGroupBinding(ctx context.Context, tgb *elbv2api.TargetGroupBinding) error { | ||
if err := r.finalizerManager.AddFinalizers(ctx, tgb, targetGroupBindingFinalizer); err != nil { | ||
var err error | ||
finalizerFn := func() { | ||
err = r.finalizerManager.AddFinalizers(ctx, tgb, targetGroupBindingFinalizer) | ||
} | ||
r.metricsCollector.ObserveControllerReconcileLatency("targetGroupBinding", "add_finalizers", finalizerFn) | ||
if err != nil { | ||
r.eventRecorder.Event(tgb, corev1.EventTypeWarning, k8s.TargetGroupBindingEventReasonFailedAddFinalizer, fmt.Sprintf("Failed add finalizer due to %v", err)) | ||
return err | ||
return errmetrics.NewErrorWithMetrics("targetGroupBinding", "add_finalizers_error", err, r.metricsCollector) | ||
} | ||
|
||
deferred, err := r.tgbResourceManager.Reconcile(ctx, tgb) | ||
|
||
var deferred bool | ||
tgbResourceFn := func() { | ||
deferred, err = r.tgbResourceManager.Reconcile(ctx, tgb) | ||
} | ||
r.metricsCollector.ObserveControllerReconcileLatency("targetGroupBinding", "reconcile_targetgroupblinding", tgbResourceFn) | ||
if err != nil { | ||
return err | ||
return errmetrics.NewErrorWithMetrics("targetGroupBinding", "reconcile_targetgroupblinding_error", err, r.metricsCollector) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo : |
||
} | ||
|
||
if deferred { | ||
r.deferredTargetGroupBindingReconciler.Enqueue(tgb) | ||
return nil | ||
} | ||
|
||
if err := r.updateTargetGroupBindingStatus(ctx, tgb); err != nil { | ||
r.eventRecorder.Event(tgb, corev1.EventTypeWarning, k8s.TargetGroupBindingEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err)) | ||
return err | ||
updateTargetGroupBindingStatusFn := func() { | ||
err = r.updateTargetGroupBindingStatus(ctx, tgb) | ||
} | ||
r.metricsCollector.ObserveControllerReconcileLatency("targetGroupBinding", "update_status", updateTargetGroupBindingStatusFn) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please define the resource as a const, it's re-used for every metric collection step. I think the step names are unique and don't need be consts. |
||
if err != nil { | ||
return errmetrics.NewErrorWithMetrics("targetGroupBinding", "update_status_error", err, r.metricsCollector) | ||
} | ||
|
||
r.eventRecorder.Event(tgb, corev1.EventTypeNormal, k8s.TargetGroupBindingEventReasonSuccessfullyReconciled, "Successfully reconciled") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ package ingress | |
import ( | ||
"context" | ||
"fmt" | ||
|
||
"sigs.k8s.io/controller-runtime/pkg/reconcile" | ||
|
||
"github.com/go-logr/logr" | ||
|
@@ -21,8 +22,11 @@ import ( | |
"sigs.k8s.io/aws-load-balancer-controller/pkg/deploy" | ||
elbv2deploy "sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/elbv2" | ||
"sigs.k8s.io/aws-load-balancer-controller/pkg/deploy/tracking" | ||
errmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/error" | ||
"sigs.k8s.io/aws-load-balancer-controller/pkg/ingress" | ||
"sigs.k8s.io/aws-load-balancer-controller/pkg/k8s" | ||
lbcmetrics "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/lbc" | ||
metricsutil "sigs.k8s.io/aws-load-balancer-controller/pkg/metrics/util" | ||
"sigs.k8s.io/aws-load-balancer-controller/pkg/model/core" | ||
elbv2model "sigs.k8s.io/aws-load-balancer-controller/pkg/model/elbv2" | ||
networkingpkg "sigs.k8s.io/aws-load-balancer-controller/pkg/networking" | ||
|
@@ -48,7 +52,7 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco | |
finalizerManager k8s.FinalizerManager, networkingSGManager networkingpkg.SecurityGroupManager, | ||
networkingSGReconciler networkingpkg.SecurityGroupReconciler, subnetsResolver networkingpkg.SubnetsResolver, | ||
elbv2TaggingManager elbv2deploy.TaggingManager, controllerConfig config.ControllerConfig, backendSGProvider networkingpkg.BackendSGProvider, | ||
sgResolver networkingpkg.SecurityGroupResolver, logger logr.Logger) *groupReconciler { | ||
sgResolver networkingpkg.SecurityGroupResolver, logger logr.Logger, metricsCollector lbcmetrics.MetricCollector, reconcileCounters *metricsutil.ReconcileCounters) *groupReconciler { | ||
|
||
annotationParser := annotations.NewSuffixAnnotationParser(annotations.AnnotationPrefixIngress) | ||
authConfigBuilder := ingress.NewDefaultAuthConfigBuilder(annotationParser) | ||
|
@@ -61,10 +65,10 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco | |
authConfigBuilder, enhancedBackendBuilder, trackingProvider, elbv2TaggingManager, controllerConfig.FeatureGates, | ||
cloud.VpcID(), controllerConfig.ClusterName, controllerConfig.DefaultTags, controllerConfig.ExternalManagedTags, | ||
controllerConfig.DefaultSSLPolicy, controllerConfig.DefaultTargetType, controllerConfig.DefaultLoadBalancerScheme, backendSGProvider, sgResolver, | ||
controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, controllerConfig.IngressConfig.AllowedCertificateAuthorityARNs, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), logger) | ||
controllerConfig.EnableBackendSecurityGroup, controllerConfig.DisableRestrictedSGRules, controllerConfig.IngressConfig.AllowedCertificateAuthorityARNs, controllerConfig.FeatureGates.Enabled(config.EnableIPTargetType), logger, metricsCollector) | ||
stackMarshaller := deploy.NewDefaultStackMarshaller() | ||
stackDeployer := deploy.NewDefaultStackDeployer(cloud, k8sClient, networkingSGManager, networkingSGReconciler, elbv2TaggingManager, | ||
controllerConfig, ingressTagPrefix, logger) | ||
controllerConfig, ingressTagPrefix, logger, metricsCollector, controllerName) | ||
classLoader := ingress.NewDefaultClassLoader(k8sClient, true) | ||
classAnnotationMatcher := ingress.NewDefaultClassAnnotationMatcher(controllerConfig.IngressConfig.IngressClass) | ||
manageIngressesWithoutIngressClass := controllerConfig.IngressConfig.IngressClass == "" | ||
|
@@ -83,6 +87,9 @@ func NewGroupReconciler(cloud services.Cloud, k8sClient client.Client, eventReco | |
groupLoader: groupLoader, | ||
groupFinalizerManager: groupFinalizerManager, | ||
logger: logger, | ||
metricsCollector: metricsCollector, | ||
controllerName: controllerName, | ||
reconcileCounters: reconcileCounters, | ||
|
||
maxConcurrentReconciles: controllerConfig.IngressConfig.MaxConcurrentReconciles, | ||
} | ||
|
@@ -102,6 +109,9 @@ type groupReconciler struct { | |
groupLoader ingress.GroupLoader | ||
groupFinalizerManager ingress.FinalizerManager | ||
logger logr.Logger | ||
metricsCollector lbcmetrics.MetricCollector | ||
controllerName string | ||
reconcileCounters *metricsutil.ReconcileCounters | ||
|
||
maxConcurrentReconciles int | ||
} | ||
|
@@ -116,40 +126,57 @@ type groupReconciler struct { | |
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch | ||
|
||
func (r *groupReconciler) Reconcile(ctx context.Context, req reconcile.Request) (ctrl.Result, error) { | ||
r.reconcileCounters.IncrementIngress(req.NamespacedName) | ||
return runtime.HandleReconcileError(r.reconcile(ctx, req), r.logger) | ||
} | ||
|
||
func (r *groupReconciler) reconcile(ctx context.Context, req reconcile.Request) error { | ||
ingGroupID := ingress.DecodeGroupIDFromReconcileRequest(req) | ||
ingGroup, err := r.groupLoader.Load(ctx, ingGroupID) | ||
var err error | ||
var ingGroup ingress.Group | ||
loadIngressFn := func() { | ||
ingGroup, err = r.groupLoader.Load(ctx, ingGroupID) | ||
} | ||
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "fetch_ingress", loadIngressFn) | ||
if err != nil { | ||
return err | ||
return errmetrics.NewErrorWithMetrics("ingress", "fetch_ingress_error", err, r.metricsCollector) | ||
} | ||
|
||
if err := r.groupFinalizerManager.AddGroupFinalizer(ctx, ingGroupID, ingGroup.Members); err != nil { | ||
addFinalizerFn := func() { | ||
err = r.groupFinalizerManager.AddGroupFinalizer(ctx, ingGroupID, ingGroup.Members) | ||
} | ||
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "add_group_finalizer", addFinalizerFn) | ||
if err != nil { | ||
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedAddFinalizer, fmt.Sprintf("Failed add finalizer due to %v", err)) | ||
return err | ||
return errmetrics.NewErrorWithMetrics("ingress", "add_group_finalizer_error", err, r.metricsCollector) | ||
} | ||
|
||
_, lb, err := r.buildAndDeployModel(ctx, ingGroup) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if len(ingGroup.Members) > 0 && lb != nil { | ||
lbDNS, err := lb.DNSName().Resolve(ctx) | ||
if err != nil { | ||
return err | ||
} | ||
if err := r.updateIngressGroupStatus(ctx, ingGroup, lbDNS); err != nil { | ||
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err)) | ||
return err | ||
dnsResolveAndUpdateStatus := func() { | ||
lbDNS, err := lb.DNSName().Resolve(ctx) | ||
if err != nil { | ||
return | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe you still need to propagate this error to the parent function. Right now, it will silently fail. |
||
} | ||
if err := r.updateIngressGroupStatus(ctx, ingGroup, lbDNS); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment, when using this nested function approach we need to propagate the error to the parent function. |
||
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedUpdateStatus, fmt.Sprintf("Failed update status due to %v", err)) | ||
} | ||
} | ||
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "dns_resolve_and_update_status", dnsResolveAndUpdateStatus) | ||
} | ||
|
||
if len(ingGroup.InactiveMembers) > 0 { | ||
if err := r.groupFinalizerManager.RemoveGroupFinalizer(ctx, ingGroupID, ingGroup.InactiveMembers); err != nil { | ||
removeGroupFinalizerFn := func() { | ||
err = r.groupFinalizerManager.RemoveGroupFinalizer(ctx, ingGroupID, ingGroup.InactiveMembers) | ||
} | ||
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "remove_group_finalizer", removeGroupFinalizerFn) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please define |
||
if err != nil { | ||
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedRemoveFinalizer, fmt.Sprintf("Failed remove finalizer due to %v", err)) | ||
return err | ||
return errmetrics.NewErrorWithMetrics("ingress", "remove_group_finalizer_error", err, r.metricsCollector) | ||
} | ||
} | ||
|
||
|
@@ -158,10 +185,18 @@ func (r *groupReconciler) reconcile(ctx context.Context, req reconcile.Request) | |
} | ||
|
||
func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingress.Group) (core.Stack, *elbv2model.LoadBalancer, error) { | ||
stack, lb, secrets, backendSGRequired, err := r.modelBuilder.Build(ctx, ingGroup) | ||
var stack core.Stack | ||
var lb *elbv2model.LoadBalancer | ||
var secrets []types.NamespacedName | ||
var backendSGRequired bool | ||
var err error | ||
buildModelFn := func() { | ||
stack, lb, secrets, backendSGRequired, err = r.modelBuilder.Build(ctx, ingGroup, r.metricsCollector) | ||
} | ||
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "build_model", buildModelFn) | ||
if err != nil { | ||
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedBuildModel, fmt.Sprintf("Failed build model due to %v", err)) | ||
return nil, nil, err | ||
return nil, nil, errmetrics.NewErrorWithMetrics("ingress", "build_model_error", err, r.metricsCollector) | ||
} | ||
stackJSON, err := r.stackMarshaller.Marshal(stack) | ||
if err != nil { | ||
|
@@ -170,13 +205,17 @@ func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingr | |
} | ||
r.logger.Info("successfully built model", "model", stackJSON) | ||
|
||
if err := r.stackDeployer.Deploy(ctx, stack); err != nil { | ||
deployModelFn := func() { | ||
err = r.stackDeployer.Deploy(ctx, stack, r.metricsCollector, "ingress") | ||
} | ||
r.metricsCollector.ObserveControllerReconcileLatency("ingress", "deploy_model", deployModelFn) | ||
if err != nil { | ||
var requeueNeededAfter *runtime.RequeueNeededAfter | ||
if errors.As(err, &requeueNeededAfter) { | ||
return nil, nil, err | ||
} | ||
r.recordIngressGroupEvent(ctx, ingGroup, corev1.EventTypeWarning, k8s.IngressEventReasonFailedDeployModel, fmt.Sprintf("Failed deploy model due to %v", err)) | ||
return nil, nil, err | ||
return nil, nil, errmetrics.NewErrorWithMetrics("ingress", "deploy_model_error", err, r.metricsCollector) | ||
} | ||
r.logger.Info("successfully deployed model", "ingressGroup", ingGroup.ID) | ||
r.secretsManager.MonitorSecrets(ingGroup.ID.String(), secrets) | ||
|
@@ -186,7 +225,7 @@ func (r *groupReconciler) buildAndDeployModel(ctx context.Context, ingGroup ingr | |
inactiveResources = append(inactiveResources, k8s.ToSliceOfNamespacedNames(ingGroup.Members)...) | ||
} | ||
if err := r.backendSGProvider.Release(ctx, networkingpkg.ResourceTypeIngress, inactiveResources); err != nil { | ||
return nil, nil, err | ||
return nil, nil, errmetrics.NewErrorWithMetrics("ingress", "release_auto_generated_backend_sg_error", err, r.metricsCollector) | ||
} | ||
return stack, lb, nil | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nice!