Offload evaluable configs from Incident to a common place

yhabteab · yhabteab · commit 4d3bf720ba6e · 2025-11-03T16:54:10.000+01:00
diff --git a/internal/config/rule.go b/internal/config/rule.go
@@ -3,6 +3,7 @@ package config
 import (
 	"fmt"
 	"github.com/icinga/icinga-notifications/internal/rule"
+	"go.uber.org/zap"
 	"slices"
 	"time"
 )
@@ -205,3 +206,107 @@ func (r *RuntimeConfig) applyPendingRules() {
 			return nil
 		})
 }
+
+// EvalOptions specifies optional callbacks that are executed upon certain filter evaluation events.
+//
+// The EvalOptions type is used to configure the behaviour of the evaluation process when evaluating
+// filter expressions against a set of [rule.Escalation] entries. It allows you to hook into specific
+// events during the evaluation process and perform custom actions based on your requirements.
+type EvalOptions struct {
+	// OnPreEvaluate can be used to perform some actions before evaluating the filter for the current entry.
+	//
+	// This callback receives the current [rule.Escalation] entry as an argument, which is about to be
+	// evaluated. If this callback returns "false", the filter evaluation for the current entry is skipped,
+	// and the evaluation continues with the next one. If it returns "true" or is nil, the filter evaluation
+	// proceeds as normal.
+	//
+	// Note that if you skip the evaluation of an entry using this callback, the OnFilterMatch callback
+	// will not be triggered for that entry, even if its filter would have matched on the filterable object.
+	OnPreEvaluate func(*rule.Escalation) bool
+
+	// OnError is called when an error occurs during the filter evaluation.
+	//
+	// This callback receives the current [rule.Escalation] entry and the error that occurred as arguments.
+	// By default, the evaluation continues even if some entries fail, but you can override this behaviour
+	// by returning "false" in your handler, which aborts the evaluation prematurely. If you return "true"
+	// or if this callback is nil, the evaluation continues with the remaining entries.
+	//
+	// Note that if you choose to abort the evaluation by returning "false", the OnAllConfigEvaluated callback
+	// will not be triggered, as the evaluation did not complete successfully.
+	OnError func(*rule.Escalation, error) bool
+
+	// OnFilterMatch is called when the filter for an entry matches successfully.
+	//
+	// This callback receives the current [rule.Escalation] entry as an argument. If this callback returns
+	// an error, the evaluation is aborted prematurely, and the error is returned. Otherwise, the evaluation
+	// continues with the remaining entries.
+	//
+	// Note that if you return an error from this callback, the OnAllConfigEvaluated callback will not be triggered,
+	// as the evaluation did not complete successfully.
+	OnFilterMatch func(*rule.Escalation) error
+
+	// OnAllConfigEvaluated is called after all configured entries have been evaluated.
+	//
+	// This callback receives a value of type [time.Duration] derived from the evaluation process as an argument.
+	// This callback is guaranteed to be called if none of the individual evaluation callbacks return prematurely
+	// with an error. If any of the callbacks return prematurely, this callback will not be triggered.
+	//
+	// The [time.Duration] argument can be used to indicate a duration after which a re-evaluation might be necessary,
+	// based on the evaluation results. This is optional and can be ignored if not needed.
+	OnAllConfigEvaluated func(time.Duration)
+}
+
+// RuleEntries is a map of rule.Escalation entries, keyed by their ID.
+//
+// This type is used to store the results of evaluating rule.Escalation entries against a filterable object.
+// It allows for efficient lookups and ensures that each entry is unique based on its ID.
+type RuleEntries map[int64]*rule.Escalation
+
+// Evaluate evaluates the rule.Escalation entries against the provided filterable object.
+//
+// Depending on the provided EvalOptions, various callbacks may be triggered during the evaluation process.
+// The results of the evaluation are stored in the RuleEntries map, with entries that match the filter
+// being added to the map.
+//
+// If an error occurs during the evaluation of an entry, the OnError callback is triggered (if provided).
+// If this callback returns "false", the evaluation is aborted prematurely, and the error is returned.
+// Otherwise, the evaluation continues with the remaining entries.
+func (re RuleEntries) Evaluate(res Resources, filterable *rule.EscalationFilter, rules map[int64]struct{}, opts EvalOptions) error {
+	retryAfter := rule.RetryNever
+
+	for ruleID := range rules {
+		r := res.RuntimeConfig.Rules[ruleID]
+		if r == nil {
+			res.Logger.Debugw("Referenced rule does not exist", zap.Int64("rule_id", ruleID))
+			continue
+		}
+
+		for _, entry := range r.Escalations {
+			if opts.OnPreEvaluate != nil && !opts.OnPreEvaluate(entry) {
+				continue
+			}
+
+			if matched, err := entry.Eval(filterable); err != nil {
+				if opts.OnError != nil && !opts.OnError(entry, err) {
+					return err
+				}
+			} else if !matched {
+				incidentAgeFilter := filterable.ReevaluateAfter(entry.Condition)
+				retryAfter = min(retryAfter, incidentAgeFilter)
+			} else {
+				if opts.OnFilterMatch != nil {
+					if err := opts.OnFilterMatch(entry); err != nil {
+						return err
+					}
+				}
+				re[entry.ID] = entry
+			}
+		}
+	}
+
+	if opts.OnAllConfigEvaluated != nil {
+		opts.OnAllConfigEvaluated(retryAfter)
+	}
+
+	return nil
+}
diff --git a/internal/config/rule_test.go b/internal/config/rule_test.go
@@ -0,0 +1,146 @@
+package config
+
+import (
+	"fmt"
+	"maps"
+	"testing"
+	"time"
+
+	"github.com/icinga/icinga-go-library/logging"
+	"github.com/icinga/icinga-go-library/notifications/event"
+	"github.com/icinga/icinga-notifications/internal/filter"
+	"github.com/icinga/icinga-notifications/internal/rule"
+	"github.com/icinga/icinga-notifications/internal/testutils"
+	"github.com/stretchr/testify/require"
+	"go.uber.org/zap"
+)
+
+const defaultDivisor = 3 // Every third rule gets a valid escalation condition.
+
+func TestRuleEntries(t *testing.T) {
+	t.Parallel()
+
+	logs := logging.NewLoggingWithFactory("rule-entries-test", zap.DebugLevel, time.Hour, testutils.NewTestLoggerFactory(t))
+	runtimeConfig := NewRuntimeConfig(logs, nil)
+	runtimeConfig.Rules = make(map[int64]*rule.Rule)
+	for i := 1; i <= 50; i++ {
+		runtimeConfig.Rules[int64(i)] = makeRule(t, i)
+	}
+
+	t.Run("Evaluate", func(t *testing.T) {
+		t.Parallel()
+
+		res := MakeResources(runtimeConfig, "test-evaluate-rule-entries")
+		ruleEntries := make(RuleEntries)
+
+		expectedLen := 0
+		filterContext := &rule.EscalationFilter{IncidentSeverity: event.SeverityEmerg}
+		assertEntries := func(rules map[int64]struct{}, expectedLen *int, expectError bool, opts EvalOptions) {
+			if expectError {
+				require.Error(t, ruleEntries.Evaluate(res, filterContext, rules, opts))
+			} else {
+				require.NoError(t, ruleEntries.Evaluate(res, filterContext, rules, opts))
+			}
+			require.Len(t, ruleEntries, *expectedLen)
+			clear(ruleEntries) // Clear the entries for the next run.
+		}
+		expectedLen = len(runtimeConfig.Rules)/defaultDivisor - 5 // 15/3 => (5) valid entries are going to be deleted below.
+
+		// Drop some random rules from the runtime config to simulate a runtime config deletion!
+		maps.DeleteFunc(runtimeConfig.Rules, func(ruleID int64, _ *rule.Rule) bool { return ruleID > 35 && ruleID%defaultDivisor == 0 })
+
+		opts := EvalOptions{
+			OnPreEvaluate: func(re *rule.Escalation) bool {
+				if re.RuleID > 35 && re.RuleID%defaultDivisor == 0 { // Those rules are deleted from our runtime config.
+					require.Failf(t, "OnPreEvaluate() shouldn't have been called", "rule %d was deleted from runtime config", re.RuleID)
+				}
+				require.Nilf(t, ruleEntries[re.ID], "Evaluate() shouldn't evaluate entry %d twice", re.ID)
+				return true
+			},
+			OnError: func(re *rule.Escalation, err error) bool {
+				require.EqualError(t, err, `unknown severity "evaluable"`)
+				return true
+			},
+			OnFilterMatch: func(re *rule.Escalation) error {
+				require.Nilf(t, ruleEntries[re.ID], "OnPreEvaluate() shouldn't evaluate %d twice", re.ID)
+				return nil
+			},
+		}
+
+		rules := make(map[int64]struct{}, len(runtimeConfig.Rules))
+		for id := range runtimeConfig.Rules {
+			rules[id] = struct{}{}
+		}
+		assertEntries(rules, &expectedLen, false, opts)
+
+		lenBeforeError := new(int)
+		opts.OnError = func(re *rule.Escalation, err error) bool {
+			if *lenBeforeError != 0 {
+				require.Fail(t, "OnError() shouldn't have been called again")
+			}
+			require.EqualError(t, err, `unknown severity "evaluable"`)
+
+			*lenBeforeError = len(ruleEntries)
+			return false // This should let the evaluation fail completely!
+		}
+		assertEntries(rules, lenBeforeError, true, opts)
+
+		*lenBeforeError = 0
+		opts.OnError = nil
+		opts.OnFilterMatch = func(re *rule.Escalation) error {
+			if *lenBeforeError != 0 {
+				require.Fail(t, "OnFilterMatch() shouldn't have been called again")
+			}
+			*lenBeforeError = len(ruleEntries)
+			return fmt.Errorf("OnFilterMatch() failed badly") // This should let the evaluation fail completely!
+		}
+		assertEntries(rules, lenBeforeError, true, opts)
+
+		expectedLen = 0
+		filterContext.IncidentSeverity = event.SeverityOK
+		filterContext.IncidentAge = 5 * time.Minute
+
+		opts.OnFilterMatch = nil
+		opts.OnPreEvaluate = func(re *rule.Escalation) bool { return re.RuleID < 5 }
+		opts.OnAllConfigEvaluated = func(result time.Duration) {
+			// The filter string of the escalation condition is incident_age>=10m and the actual incident age is 5m.
+			require.Equal(t, 5*time.Minute, result)
+		}
+		assertEntries(rules, &expectedLen, false, opts)
+	})
+}
+
+// makeRule creates a rule with some escalation entries.
+//
+// Every rule gets one invalid escalation condition that always fails to evaluate.
+// Additionally, every third (defaultDivisor) rule gets a valid escalation condition that matches
+// on `incident_severity>warning||incident_age>=10m` to simulate some real-world conditions.
+func makeRule(t *testing.T, i int) *rule.Rule {
+	r := new(rule.Rule)
+	r.ID = int64(i)
+	r.Name = fmt.Sprintf("rule-%d", i)
+	r.Escalations = make(map[int64]*rule.Escalation)
+
+	invalidSeverity, err := filter.Parse("incident_severity=evaluable")
+	require.NoError(t, err, "parsing incident_severity=evaluable shouldn't fail")
+
+	redundant := new(rule.Escalation)
+	redundant.ID = r.ID * 150 // It must be large enough to avoid colliding with others!
+	redundant.RuleID = r.ID
+	redundant.Condition = invalidSeverity
+
+	r.Escalations[redundant.ID] = redundant
+	if i%defaultDivisor == 0 {
+		escalationCond, err := filter.Parse("incident_severity>warning||incident_age>=10m")
+		require.NoError(t, err, "parsing incident_severity>warning||incident_age>=10m shouldn't fail")
+
+		entry := new(rule.Escalation)
+		entry.ID = r.ID * 2
+		entry.RuleID = r.ID
+		entry.Condition = escalationCond
+
+		r.Escalations[entry.ID] = entry
+	}
+
+	return r
+}
diff --git a/internal/incident/incident.go b/internal/incident/incident.go