diff --git a/pkg/logs/patterns/eviction/BUILD.bazel b/pkg/logs/patterns/eviction/BUILD.bazel new file mode 100644 index 000000000000..04b23f19b66a --- /dev/null +++ b/pkg/logs/patterns/eviction/BUILD.bazel @@ -0,0 +1,27 @@ +load("@rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "eviction", + srcs = [ + "eviction.go", + "eviction_manager.go", + "score.go", + "types.go", + ], + importpath = "github.com/DataDog/datadog-agent/pkg/logs/patterns/eviction", + visibility = ["//visibility:public"], +) + +go_test( + name = "eviction_test", + srcs = [ + "eviction_manager_test.go", + "eviction_test.go", + "score_test.go", + ], + embed = [":eviction"], + deps = [ + "@com_github_stretchr_testify//assert", + "@com_github_stretchr_testify//require", + ], +) diff --git a/pkg/logs/patterns/eviction/eviction.go b/pkg/logs/patterns/eviction/eviction.go new file mode 100644 index 000000000000..7e7bc9353744 --- /dev/null +++ b/pkg/logs/patterns/eviction/eviction.go @@ -0,0 +1,161 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package eviction provides shared eviction scoring algorithms for patterns and tags. +package eviction + +import ( + "container/heap" + "math" + "time" +) + +// EvictLowestScoring evicts up to numToEvict items with the lowest eviction scores. +// Uses quickselect (partial sort) for O(N) average-case selection of the K lowest items, +// avoiding the O(N + K log N) cost of a full heap build + extraction. +func EvictLowestScoring(collection EvictableCollection, numToEvict int, decayFactor float64, gracePeriod time.Duration) (evicted []Evictable) { + if numToEvict <= 0 { + return nil + } + + now := time.Now() + + allItems := collection.CollectEvictables() + n := len(allItems) + if n == 0 { + return nil + } + + scored := make([]heapItem, n) + for i, item := range allItems { + score := CalculateScore( + item.GetFrequency(), + item.GetCreatedAt(), + item.GetLastAccessAt(), + now, + decayFactor, + ) + // Grace period: newly created items are ineligible for eviction + if gracePeriod > 0 && now.Sub(item.GetCreatedAt()) < gracePeriod { + score = math.MaxFloat64 + } + scored[i] = heapItem{ + item: item, + score: score, + } + } + + k := numToEvict + if k > n { + k = n + } + + // Quickselect partitions so scored[0:k] contains the k lowest-scoring items (unordered). + if k < n { + quickselectLowest(scored, k) + } + + evicted = make([]Evictable, k) + for i := 0; i < k; i++ { + collection.RemoveEvictable(scored[i].item) + evicted[i] = scored[i].item + } + + return evicted +} + +// quickselectLowest rearranges items so that the k items with the smallest scores +// end up at indices [0, k). Iterative with median-of-three pivot selection. +func quickselectLowest(items []heapItem, k int) { + lo, hi := 0, len(items)-1 + target := k - 1 + for lo < hi { + pivotIdx := medianOfThreePivot(items, lo, hi) + pivotIdx = partitionItems(items, lo, hi, pivotIdx) + if pivotIdx == target { + return + } else if pivotIdx < target { + lo = pivotIdx + 1 + } else { + hi = pivotIdx - 1 + } + } +} + +func medianOfThreePivot(items []heapItem, lo, hi int) int { + mid := lo + (hi-lo)/2 + if items[lo].score > items[mid].score { + items[lo], items[mid] = items[mid], items[lo] + } + if items[lo].score > items[hi].score { + items[lo], items[hi] = items[hi], items[lo] + } + if items[mid].score > items[hi].score { + items[mid], items[hi] = items[hi], items[mid] + } + return mid +} + +func partitionItems(items []heapItem, lo, hi, pivotIdx int) int { + pivot := items[pivotIdx].score + items[pivotIdx], items[hi] = items[hi], items[pivotIdx] + storeIdx := lo + for i := lo; i < hi; i++ { + if items[i].score < pivot { + items[i], items[storeIdx] = items[storeIdx], items[i] + storeIdx++ + } + } + items[storeIdx], items[hi] = items[hi], items[storeIdx] + return storeIdx +} + +// EvictToMemoryTarget evicts items until the target memory is freed. +// It uses actual item sizes rather than averages for precision. +func EvictToMemoryTarget(collection EvictableCollection, targetBytesToFree int64, decayFactor float64, gracePeriod time.Duration) (evicted []Evictable) { + if targetBytesToFree <= 0 { + return nil + } + + now := time.Now() + + // Build heap of all items sorted by eviction score + h := &evictionHeap{ + items: make([]heapItem, 0), + } + + for _, item := range collection.CollectEvictables() { + score := CalculateScore( + item.GetFrequency(), + item.GetCreatedAt(), + item.GetLastAccessAt(), + now, + decayFactor, + ) + // Grace period: newly created items are ineligible for eviction + if gracePeriod > 0 && now.Sub(item.GetCreatedAt()) < gracePeriod { + score = math.MaxFloat64 + } + h.items = append(h.items, heapItem{ + item: item, + score: score, + }) + } + + heap.Init(h) + + // Evict items until we've freed enough memory + evicted = make([]Evictable, 0) + bytesFreed := int64(0) + + for h.Len() > 0 && bytesFreed < targetBytesToFree { + item := heap.Pop(h).(heapItem) + collection.RemoveEvictable(item.item) + bytesFreed += item.item.EstimatedBytes() + evicted = append(evicted, item.item) + } + + return evicted +} diff --git a/pkg/logs/patterns/eviction/eviction_manager.go b/pkg/logs/patterns/eviction/eviction_manager.go new file mode 100644 index 000000000000..db63e596fff4 --- /dev/null +++ b/pkg/logs/patterns/eviction/eviction_manager.go @@ -0,0 +1,71 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package eviction provides shared eviction scoring algorithms for patterns and tags. +package eviction + +import "time" + +// Policy is the eviction policy type +type Policy int + +const ( + // PolicyLFUDecay uses LFU with power-law age decay + PolicyLFUDecay Policy = iota +) + +// Strategy indicates which eviction method to use +type Strategy int + +const ( + // StrategyNone indicates no eviction is needed + StrategyNone Strategy = iota + // StrategyByCount evicts a specific number of items + StrategyByCount + // StrategyByBytes evicts items until enough memory is freed + StrategyByBytes +) + +// Manager handles eviction using dual watermark system. +type Manager struct { + MaxItemCount int + MaxMemoryBytes int64 + EvictionHighWatermark float64 // Trigger eviction at this threshold + EvictionLowWatermark float64 // Evict back to this target + AgeDecayFactor float64 + GracePeriod time.Duration // Newly created items are ineligible for eviction during this period +} + +// ShouldEvict checks if eviction should be triggered based on high watermark thresholds. +func (m *Manager) ShouldEvict(itemCount int, estimatedBytes int64) (bool, bool) { + countOverLimit := float64(itemCount) > float64(m.MaxItemCount)*m.EvictionHighWatermark + bytesOverLimit := float64(estimatedBytes) > float64(m.MaxMemoryBytes)*m.EvictionHighWatermark + return countOverLimit, bytesOverLimit +} + +// EvictionTargets calculates how much to evict based on which threshold was exceeded. +func (m *Manager) EvictionTargets(itemCount int, estimatedBytes int64, countOverLimit, bytesOverLimit bool) (itemsToEvict int, bytesToFree int64, strategy Strategy) { + switch { + case bytesOverLimit: + // Memory eviction + targetBytes := m.applyWatermark(m.MaxMemoryBytes) + bytesToFree := estimatedBytes - targetBytes + return 0, bytesToFree, StrategyByBytes + + case countOverLimit: + // Count-based eviction + targetCount := int(m.applyWatermark(int64(m.MaxItemCount))) + itemsToEvict := max(itemCount-targetCount, 1) + return itemsToEvict, 0, StrategyByCount + + default: + return 0, 0, StrategyNone + } +} + +// applyWatermark applies the low watermark to a limit value +func (m *Manager) applyWatermark(limit int64) int64 { + return int64(float64(limit) * m.EvictionLowWatermark) +} diff --git a/pkg/logs/patterns/eviction/eviction_manager_test.go b/pkg/logs/patterns/eviction/eviction_manager_test.go new file mode 100644 index 000000000000..566d0d5f3641 --- /dev/null +++ b/pkg/logs/patterns/eviction/eviction_manager_test.go @@ -0,0 +1,395 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package eviction + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestManager_ShouldEvict_CountThreshold tests count-based threshold detection +func TestManager_ShouldEvict_CountThreshold(t *testing.T) { + manager := &Manager{ + MaxItemCount: 1000, + MaxMemoryBytes: 1_000_000, + EvictionHighWatermark: 0.9, + EvictionLowWatermark: 0.7, + AgeDecayFactor: 0.5, + } + + tests := []struct { + name string + itemCount int + estimatedBytes int64 + expectCountLimit bool + expectBytesLimit bool + }{ + { + name: "below both thresholds", + itemCount: 500, + estimatedBytes: 500_000, + expectCountLimit: false, + expectBytesLimit: false, + }, + { + name: "exactly at count high watermark", + itemCount: 900, // 1000 * 0.9 + estimatedBytes: 500_000, + expectCountLimit: false, // not greater than + expectBytesLimit: false, + }, + { + name: "just above count high watermark", + itemCount: 901, + estimatedBytes: 500_000, + expectCountLimit: true, + expectBytesLimit: false, + }, + { + name: "at max count", + itemCount: 1000, + estimatedBytes: 500_000, + expectCountLimit: true, + expectBytesLimit: false, + }, + { + name: "over max count", + itemCount: 1100, + estimatedBytes: 500_000, + expectCountLimit: true, + expectBytesLimit: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + countLimit, bytesLimit := manager.ShouldEvict(tt.itemCount, tt.estimatedBytes) + assert.Equal(t, tt.expectCountLimit, countLimit, "count limit mismatch") + assert.Equal(t, tt.expectBytesLimit, bytesLimit, "bytes limit mismatch") + }) + } +} + +// TestManager_ShouldEvict_MemoryThreshold tests memory-based threshold detection +func TestManager_ShouldEvict_MemoryThreshold(t *testing.T) { + manager := &Manager{ + MaxItemCount: 1000, + MaxMemoryBytes: 1_000_000, + EvictionHighWatermark: 0.9, + EvictionLowWatermark: 0.7, + AgeDecayFactor: 0.5, + } + + tests := []struct { + name string + itemCount int + estimatedBytes int64 + expectCountLimit bool + expectBytesLimit bool + }{ + { + name: "exactly at memory high watermark", + itemCount: 500, + estimatedBytes: 900_000, // 1_000_000 * 0.9 + expectCountLimit: false, + expectBytesLimit: false, // not greater than + }, + { + name: "just above memory high watermark", + itemCount: 500, + estimatedBytes: 900_001, + expectCountLimit: false, + expectBytesLimit: true, + }, + { + name: "at max memory", + itemCount: 500, + estimatedBytes: 1_000_000, + expectCountLimit: false, + expectBytesLimit: true, + }, + { + name: "over max memory", + itemCount: 500, + estimatedBytes: 1_100_000, + expectCountLimit: false, + expectBytesLimit: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + countLimit, bytesLimit := manager.ShouldEvict(tt.itemCount, tt.estimatedBytes) + assert.Equal(t, tt.expectCountLimit, countLimit, "count limit mismatch") + assert.Equal(t, tt.expectBytesLimit, bytesLimit, "bytes limit mismatch") + }) + } +} + +// TestManager_ShouldEvict_BothThresholds tests when both thresholds are exceeded +func TestManager_ShouldEvict_BothThresholds(t *testing.T) { + manager := &Manager{ + MaxItemCount: 1000, + MaxMemoryBytes: 1_000_000, + EvictionHighWatermark: 0.9, + EvictionLowWatermark: 0.7, + AgeDecayFactor: 0.5, + } + + // Both count and memory over limit + countLimit, bytesLimit := manager.ShouldEvict(950, 950_000) + assert.True(t, countLimit, "count should be over limit") + assert.True(t, bytesLimit, "bytes should be over limit") +} + +// TestManager_EvictionTargets_CountBased tests count-based eviction target calculation +func TestManager_EvictionTargets_CountBased(t *testing.T) { + manager := &Manager{ + MaxItemCount: 1000, + MaxMemoryBytes: 1_000_000, + EvictionHighWatermark: 0.9, + EvictionLowWatermark: 0.7, + AgeDecayFactor: 0.5, + } + + tests := []struct { + name string + itemCount int + estimatedBytes int64 + countOverLimit bool + bytesOverLimit bool + expectItemsToEvict int + expectBytesToFree int64 + expectStrategy Strategy + }{ + { + name: "count over limit - evict to low watermark", + itemCount: 950, + estimatedBytes: 500_000, + countOverLimit: true, + bytesOverLimit: false, + expectItemsToEvict: 250, // 950 - (1000 * 0.7) = 950 - 700 + expectBytesToFree: 0, + expectStrategy: StrategyByCount, + }, + { + name: "count at max - evict to low watermark", + itemCount: 1000, + estimatedBytes: 500_000, + countOverLimit: true, + bytesOverLimit: false, + expectItemsToEvict: 300, // 1000 - 700 + expectBytesToFree: 0, + expectStrategy: StrategyByCount, + }, + { + name: "count barely over - evict at least 1", + itemCount: 701, + estimatedBytes: 500_000, + countOverLimit: true, + bytesOverLimit: false, + expectItemsToEvict: 1, // max(701 - 700, 1) + expectBytesToFree: 0, + expectStrategy: StrategyByCount, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + itemsToEvict, bytesToFree, strategy := manager.EvictionTargets( + tt.itemCount, tt.estimatedBytes, tt.countOverLimit, tt.bytesOverLimit) + + assert.Equal(t, tt.expectItemsToEvict, itemsToEvict, "itemsToEvict mismatch") + assert.Equal(t, tt.expectBytesToFree, bytesToFree, "bytesToFree mismatch") + assert.Equal(t, tt.expectStrategy, strategy, "strategy mismatch") + }) + } +} + +// TestManager_EvictionTargets_MemoryBased tests memory-based eviction target calculation +func TestManager_EvictionTargets_MemoryBased(t *testing.T) { + manager := &Manager{ + MaxItemCount: 1000, + MaxMemoryBytes: 1_000_000, + EvictionHighWatermark: 0.9, + EvictionLowWatermark: 0.7, + AgeDecayFactor: 0.5, + } + + tests := []struct { + name string + itemCount int + estimatedBytes int64 + countOverLimit bool + bytesOverLimit bool + expectItemsToEvict int + expectBytesToFree int64 + expectStrategy Strategy + }{ + { + name: "memory over limit - evict to low watermark", + itemCount: 500, + estimatedBytes: 950_000, + countOverLimit: false, + bytesOverLimit: true, + expectItemsToEvict: 0, + expectBytesToFree: 250_000, // 950_000 - (1_000_000 * 0.7) = 950_000 - 700_000 + expectStrategy: StrategyByBytes, + }, + { + name: "memory at max - evict to low watermark", + itemCount: 500, + estimatedBytes: 1_000_000, + countOverLimit: false, + bytesOverLimit: true, + expectItemsToEvict: 0, + expectBytesToFree: 300_000, // 1_000_000 - 700_000 + expectStrategy: StrategyByBytes, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + itemsToEvict, bytesToFree, strategy := manager.EvictionTargets( + tt.itemCount, tt.estimatedBytes, tt.countOverLimit, tt.bytesOverLimit) + + assert.Equal(t, tt.expectItemsToEvict, itemsToEvict, "itemsToEvict mismatch") + assert.Equal(t, tt.expectBytesToFree, bytesToFree, "bytesToFree mismatch") + assert.Equal(t, tt.expectStrategy, strategy, "strategy mismatch") + }) + } +} + +// TestManager_EvictionTargets_PrioritizeMemory tests that memory eviction takes priority over count +func TestManager_EvictionTargets_PrioritizeMemory(t *testing.T) { + manager := &Manager{ + MaxItemCount: 1000, + MaxMemoryBytes: 1_000_000, + EvictionHighWatermark: 0.9, + EvictionLowWatermark: 0.7, + AgeDecayFactor: 0.5, + } + + // Both limits exceeded - memory should take priority + itemsToEvict, bytesToFree, strategy := manager.EvictionTargets(1000, 1_000_000, true, true) + + assert.Equal(t, 0, itemsToEvict, "itemsToEvict should be 0 when evicting by memory") + assert.Equal(t, int64(300_000), bytesToFree, "should evict to memory low watermark") + assert.Equal(t, StrategyByBytes, strategy, "should prioritize memory-based eviction") +} + +// TestManager_EvictionTargets_NeitherLimit tests when no limits are exceeded +func TestManager_EvictionTargets_NeitherLimit(t *testing.T) { + manager := &Manager{ + MaxItemCount: 1000, + MaxMemoryBytes: 1_000_000, + EvictionHighWatermark: 0.9, + EvictionLowWatermark: 0.7, + AgeDecayFactor: 0.5, + } + + // Neither limit exceeded + itemsToEvict, bytesToFree, strategy := manager.EvictionTargets(500, 500_000, false, false) + + assert.Equal(t, 0, itemsToEvict, "itemsToEvict should be 0") + assert.Equal(t, int64(0), bytesToFree, "bytesToFree should be 0") + assert.Equal(t, StrategyNone, strategy, "strategy should be StrategyNone") +} + +// TestManager_applyWatermark tests watermark calculation +func TestManager_applyWatermark(t *testing.T) { + manager := &Manager{ + MaxItemCount: 1000, + MaxMemoryBytes: 1_000_000, + EvictionHighWatermark: 0.9, + EvictionLowWatermark: 0.7, + AgeDecayFactor: 0.5, + } + + tests := []struct { + name string + limit int64 + expected int64 + }{ + {"watermark on 1000", 1000, 700}, // 1000 * 0.7 + {"watermark on 1000000", 1_000_000, 700_000}, // 1_000_000 * 0.7 + {"watermark on 100", 100, 70}, // 100 * 0.7 + {"watermark on 0", 0, 0}, // 0 * 0.7 + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := manager.applyWatermark(tt.limit) + assert.Equal(t, tt.expected, result) + }) + } +} + +// TestManager_DifferentWatermarks tests manager with different watermark values +func TestManager_DifferentWatermarks(t *testing.T) { + tests := []struct { + name string + highWatermark float64 + lowWatermark float64 + itemCount int + expectCountLimit bool + expectItemsToEvict int + }{ + { + name: "high watermark 0.8, low 0.6", + highWatermark: 0.8, + lowWatermark: 0.6, + itemCount: 850, + expectCountLimit: true, // 850 > 800 + expectItemsToEvict: 250, // 850 - 600 + }, + { + name: "high watermark 0.95, low 0.85", + highWatermark: 0.95, + lowWatermark: 0.85, + itemCount: 960, + expectCountLimit: true, // 960 > 950 + expectItemsToEvict: 110, // 960 - 850 + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + manager := &Manager{ + MaxItemCount: 1000, + MaxMemoryBytes: 1_000_000, + EvictionHighWatermark: tt.highWatermark, + EvictionLowWatermark: tt.lowWatermark, + AgeDecayFactor: 0.5, + } + + countLimit, _ := manager.ShouldEvict(tt.itemCount, 0) + assert.Equal(t, tt.expectCountLimit, countLimit) + + if tt.expectCountLimit { + itemsToEvict, _, _ := manager.EvictionTargets(tt.itemCount, 0, true, false) + assert.Equal(t, tt.expectItemsToEvict, itemsToEvict) + } + }) + } +} + +// TestManager_ZeroLimits tests manager with zero limits +func TestManager_ZeroLimits(t *testing.T) { + manager := &Manager{ + MaxItemCount: 0, + MaxMemoryBytes: 0, + EvictionHighWatermark: 0.9, + EvictionLowWatermark: 0.7, + AgeDecayFactor: 0.5, + } + + // With zero limits, any non-zero usage exceeds the threshold + countLimit, bytesLimit := manager.ShouldEvict(100, 1000) + assert.True(t, countLimit, "any count exceeds 0") + assert.True(t, bytesLimit, "any bytes exceed 0") +} diff --git a/pkg/logs/patterns/eviction/eviction_test.go b/pkg/logs/patterns/eviction/eviction_test.go new file mode 100644 index 000000000000..4f1890a238b6 --- /dev/null +++ b/pkg/logs/patterns/eviction/eviction_test.go @@ -0,0 +1,317 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package eviction + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// mockEvictable implements the Evictable interface for testing +type mockEvictable struct { + id int + frequency float64 + createdAt time.Time + lastAccessAt time.Time + bytes int64 +} + +func (m *mockEvictable) GetFrequency() float64 { + return m.frequency +} + +func (m *mockEvictable) GetCreatedAt() time.Time { + return m.createdAt +} + +func (m *mockEvictable) GetLastAccessAt() time.Time { + return m.lastAccessAt +} + +func (m *mockEvictable) EstimatedBytes() int64 { + return m.bytes +} + +// mockCollection implements the EvictableCollection interface for testing +type mockCollection struct { + items []*mockEvictable +} + +func (m *mockCollection) CollectEvictables() []Evictable { + result := make([]Evictable, len(m.items)) + for i, item := range m.items { + result[i] = item + } + return result +} + +func (m *mockCollection) RemoveEvictable(item Evictable) { + mockItem := item.(*mockEvictable) + for i, existing := range m.items { + if existing.id == mockItem.id { + m.items = append(m.items[:i], m.items[i+1:]...) + return + } + } +} + +// TestEvictLowestScoring_Basic tests basic eviction by count +func TestEvictLowestScoring_Basic(t *testing.T) { + now := time.Now() + + collection := &mockCollection{ + items: []*mockEvictable{ + {id: 1, frequency: 100, createdAt: now.Add(-10 * time.Minute), lastAccessAt: now, bytes: 100}, + {id: 2, frequency: 50, createdAt: now.Add(-20 * time.Minute), lastAccessAt: now.Add(-15 * time.Minute), bytes: 150}, + {id: 3, frequency: 200, createdAt: now.Add(-5 * time.Minute), lastAccessAt: now, bytes: 200}, + {id: 4, frequency: 10, createdAt: now.Add(-30 * time.Minute), lastAccessAt: now.Add(-25 * time.Minute), bytes: 50}, + {id: 5, frequency: 150, createdAt: now.Add(-15 * time.Minute), lastAccessAt: now.Add(-5 * time.Minute), bytes: 120}, + }, + } + + evicted := EvictLowestScoring(collection, 2, 0.5, 0) + + // Should evict 2 items with lowest scores + require.Len(t, evicted, 2, "should evict exactly 2 items") + assert.Len(t, collection.items, 3, "collection should have 3 items remaining") + + // Item 4 should definitely be evicted (lowest frequency, oldest) + evictedIDs := []int{evicted[0].(*mockEvictable).id, evicted[1].(*mockEvictable).id} + assert.Contains(t, evictedIDs, 4, "item 4 should be evicted (lowest score)") +} + +// TestEvictLowestScoring_EvictAll tests evicting more items than exist +func TestEvictLowestScoring_EvictAll(t *testing.T) { + now := time.Now() + + collection := &mockCollection{ + items: []*mockEvictable{ + {id: 1, frequency: 100, createdAt: now, lastAccessAt: now, bytes: 100}, + {id: 2, frequency: 50, createdAt: now, lastAccessAt: now, bytes: 150}, + }, + } + + evicted := EvictLowestScoring(collection, 10, 0.5, 0) + + assert.Len(t, evicted, 2, "should evict only available items") + assert.Empty(t, collection.items, "collection should be empty") +} + +// TestEvictLowestScoring_ZeroOrNegative tests edge cases +func TestEvictLowestScoring_ZeroOrNegative(t *testing.T) { + now := time.Now() + + collection := &mockCollection{ + items: []*mockEvictable{ + {id: 1, frequency: 100, createdAt: now, lastAccessAt: now, bytes: 100}, + }, + } + + tests := []struct { + name string + numToEvict int + }{ + {"zero items", 0}, + {"negative items", -5}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + evicted := EvictLowestScoring(collection, tt.numToEvict, 0.5, 0) + assert.Nil(t, evicted, "should return nil") + assert.Len(t, collection.items, 1, "collection should be unchanged") + }) + } +} + +// TestEvictLowestScoring_EmptyCollection tests evicting from empty collection +func TestEvictLowestScoring_EmptyCollection(t *testing.T) { + collection := &mockCollection{items: []*mockEvictable{}} + + evicted := EvictLowestScoring(collection, 5, 0.5, 0) + + assert.Empty(t, evicted, "should return empty slice") +} + +// TestEvictLowestScoring_DecayFactor tests that decay factor affects eviction order +func TestEvictLowestScoring_DecayFactor(t *testing.T) { + now := time.Now() + + // Create items where decay factor matters + collection := &mockCollection{ + items: []*mockEvictable{ + {id: 1, frequency: 1000, createdAt: now.Add(-24 * time.Hour), lastAccessAt: now.Add(-24 * time.Hour), bytes: 100}, // Old, high freq + {id: 2, frequency: 50, createdAt: now.Add(-1 * time.Minute), lastAccessAt: now, bytes: 100}, // New, low freq + }, + } + + // With high decay (0.8), older items get penalized more + // Item 2 has low frequency and should still score lower despite being newer + evicted := EvictLowestScoring(collection, 1, 0.8, 0) + + require.Len(t, evicted, 1) + // Item 2 (new but very low freq) should be evicted + assert.Equal(t, 2, evicted[0].(*mockEvictable).id, "low frequency item should be evicted") +} + +// TestEvictToMemoryTarget_Basic tests memory-based eviction +func TestEvictToMemoryTarget_Basic(t *testing.T) { + now := time.Now() + + collection := &mockCollection{ + items: []*mockEvictable{ + {id: 1, frequency: 100, createdAt: now, lastAccessAt: now, bytes: 100}, + {id: 2, frequency: 50, createdAt: now.Add(-10 * time.Minute), lastAccessAt: now.Add(-5 * time.Minute), bytes: 150}, + {id: 3, frequency: 200, createdAt: now, lastAccessAt: now, bytes: 200}, + {id: 4, frequency: 10, createdAt: now.Add(-20 * time.Minute), lastAccessAt: now.Add(-15 * time.Minute), bytes: 50}, + }, + } + + // Need to free 200 bytes + evicted := EvictToMemoryTarget(collection, 200, 0.5, 0) + + // Calculate total bytes freed + totalFreed := int64(0) + for _, item := range evicted { + totalFreed += item.EstimatedBytes() + } + + assert.GreaterOrEqual(t, totalFreed, int64(200), "should free at least target bytes") + assert.NotEmpty(t, evicted, "should evict some items") + + // Item 4 should be evicted first (lowest score) + assert.Equal(t, 4, evicted[0].(*mockEvictable).id, "should evict lowest scoring item first") +} + +// TestEvictToMemoryTarget_ExactTarget tests hitting exact memory target +func TestEvictToMemoryTarget_ExactTarget(t *testing.T) { + now := time.Now() + + collection := &mockCollection{ + items: []*mockEvictable{ + {id: 1, frequency: 100, createdAt: now, lastAccessAt: now, bytes: 100}, + {id: 2, frequency: 50, createdAt: now.Add(-10 * time.Minute), lastAccessAt: now, bytes: 100}, + {id: 3, frequency: 10, createdAt: now.Add(-20 * time.Minute), lastAccessAt: now, bytes: 100}, + }, + } + + // Need to free exactly 100 bytes + evicted := EvictToMemoryTarget(collection, 100, 0.5, 0) + + totalFreed := int64(0) + for _, item := range evicted { + totalFreed += item.EstimatedBytes() + } + + assert.GreaterOrEqual(t, totalFreed, int64(100), "should free at least 100 bytes") + assert.Len(t, collection.items, 2, "should have 2 items remaining") +} + +// TestEvictToMemoryTarget_EvictAll tests evicting all items for memory +func TestEvictToMemoryTarget_EvictAll(t *testing.T) { + now := time.Now() + + collection := &mockCollection{ + items: []*mockEvictable{ + {id: 1, frequency: 100, createdAt: now, lastAccessAt: now, bytes: 100}, + {id: 2, frequency: 50, createdAt: now, lastAccessAt: now, bytes: 150}, + }, + } + + // Need more bytes than available + evicted := EvictToMemoryTarget(collection, 10000, 0.5, 0) + + assert.Len(t, evicted, 2, "should evict all items") + assert.Empty(t, collection.items, "collection should be empty") + + totalFreed := int64(0) + for _, item := range evicted { + totalFreed += item.EstimatedBytes() + } + assert.Equal(t, int64(250), totalFreed, "should free all available bytes") +} + +// TestEvictToMemoryTarget_ZeroOrNegative tests edge cases +func TestEvictToMemoryTarget_ZeroOrNegative(t *testing.T) { + now := time.Now() + + collection := &mockCollection{ + items: []*mockEvictable{ + {id: 1, frequency: 100, createdAt: now, lastAccessAt: now, bytes: 100}, + }, + } + + tests := []struct { + name string + targetByte int64 + }{ + {"zero bytes", 0}, + {"negative bytes", -100}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + evicted := EvictToMemoryTarget(collection, tt.targetByte, 0.5, 0) + assert.Nil(t, evicted, "should return nil") + assert.Len(t, collection.items, 1, "collection should be unchanged") + }) + } +} + +// TestEvictToMemoryTarget_EmptyCollection tests evicting from empty collection +func TestEvictToMemoryTarget_EmptyCollection(t *testing.T) { + collection := &mockCollection{items: []*mockEvictable{}} + + evicted := EvictToMemoryTarget(collection, 1000, 0.5, 0) + + assert.Empty(t, evicted, "should return empty slice") +} + +// TestEvictToMemoryTarget_IncrementalEviction tests that items are evicted one by one in score order +func TestEvictToMemoryTarget_IncrementalEviction(t *testing.T) { + now := time.Now() + + collection := &mockCollection{ + items: []*mockEvictable{ + {id: 1, frequency: 100, createdAt: now, lastAccessAt: now, bytes: 50}, // High score + {id: 2, frequency: 50, createdAt: now.Add(-10 * time.Minute), lastAccessAt: now.Add(-5 * time.Minute), bytes: 50}, // Medium score + {id: 3, frequency: 10, createdAt: now.Add(-30 * time.Minute), lastAccessAt: now.Add(-25 * time.Minute), bytes: 50}, // Low score + }, + } + + // Need to free 60 bytes (should evict 2 items) + evicted := EvictToMemoryTarget(collection, 60, 0.5, 0) + + require.Len(t, evicted, 2, "should evict 2 items to reach target") + assert.Equal(t, 3, evicted[0].(*mockEvictable).id, "should evict lowest scoring item first") + assert.Equal(t, 2, evicted[1].(*mockEvictable).id, "should evict second lowest scoring item") + assert.Len(t, collection.items, 1, "should have 1 item remaining") + assert.Equal(t, 1, collection.items[0].id, "highest scoring item should remain") +} + +// TestEvictToMemoryTarget_LargeItems tests eviction with varying item sizes +func TestEvictToMemoryTarget_LargeItems(t *testing.T) { + now := time.Now() + + collection := &mockCollection{ + items: []*mockEvictable{ + {id: 1, frequency: 10, createdAt: now.Add(-30 * time.Minute), lastAccessAt: now.Add(-25 * time.Minute), bytes: 1000}, // Low score, large + {id: 2, frequency: 100, createdAt: now, lastAccessAt: now, bytes: 10}, // High score, small + }, + } + + // Need to free 500 bytes - should evict item 1 even though it's large + evicted := EvictToMemoryTarget(collection, 500, 0.5, 0) + + require.Len(t, evicted, 1, "should evict 1 item") + assert.Equal(t, 1, evicted[0].(*mockEvictable).id, "should evict large item with lowest score") + + totalFreed := evicted[0].EstimatedBytes() + assert.Equal(t, int64(1000), totalFreed, "should free 1000 bytes") +} diff --git a/pkg/logs/patterns/eviction/score.go b/pkg/logs/patterns/eviction/score.go new file mode 100644 index 000000000000..d7e5db55600e --- /dev/null +++ b/pkg/logs/patterns/eviction/score.go @@ -0,0 +1,55 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package eviction provides shared eviction scoring algorithms for patterns and tags. +package eviction + +import ( + "math" + "time" +) + +// CalculateScore calculates an eviction score using frequency and temporal metadata. +// Lower scores indicate higher priority for eviction. +// +// The score combines: +// - Frequency (usage count): More frequent items get higher scores +// - Age decay: Older items gradually lose priority +// - Recency boost: Recently accessed items get bonus points +// Formula: score = (frequency / (1 + age)^decayFactor) * (1 + recencyBoost) +// +// This algorithm uses power-law decay to balance frequency with age, ensuring that +// old but still-used items aren't evicted before truly unused ones. +func CalculateScore(frequency float64, createdAt, lastAccessAt, now time.Time, decayFactor float64) float64 { + // Age-based decay (from CreatedAt) + ageDays := now.Sub(createdAt).Hours() / 24.0 + + // Clamp to reasonable range [0, 365] to handle clock skew + if ageDays < 0 { + ageDays = 0 // Clock moved backward + } else if ageDays > 365 { + ageDays = 365 // Clock moved forward or genuinely old item + } + + // Apply power-law decay: score = frequency / (1 + age)^decayFactor + ageDecay := 1.0 / math.Pow(1.0+ageDays, decayFactor) + baseScore := frequency * ageDecay + + // Recency boost (from LastAccessAt) + hoursSinceAccess := now.Sub(lastAccessAt).Hours() + if hoursSinceAccess < 0 { + hoursSinceAccess = 0 // Handle clock skew + } + + // Items accessed recently get a bonus (hyperbolic decay) + // recencyBoost ranges from ~1.0 (just accessed) to ~0.0 (very old access) + recencyBoost := 1.0 / (1.0 + hoursSinceAccess/24.0) + + // Combine base score with recency boost + // Frequency is primary signal, recency is secondary + finalScore := baseScore * (1.0 + recencyBoost) + + return finalScore +} diff --git a/pkg/logs/patterns/eviction/score_test.go b/pkg/logs/patterns/eviction/score_test.go new file mode 100644 index 000000000000..e4c50662f319 --- /dev/null +++ b/pkg/logs/patterns/eviction/score_test.go @@ -0,0 +1,258 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package eviction + +import ( + "math" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +// TestCalculateScore_BasicScoring tests the scoring algorithm with various inputs +func TestCalculateScore_BasicScoring(t *testing.T) { + now := time.Now() + + tests := []struct { + name string + frequency float64 + createdAt time.Time + lastAccessAt time.Time + decayFactor float64 + expectedMin float64 + expectedMax float64 + }{ + { + name: "brand new item high frequency", + frequency: 10000, + createdAt: now, + lastAccessAt: now, + decayFactor: 0.5, + expectedMin: 19000, // ~10000 * 1.0 * 2.0 (high recency boost) + expectedMax: 21000, + }, + { + name: "30 day old item with decay", + frequency: 10000, + createdAt: now.Add(-30 * 24 * time.Hour), + lastAccessAt: now.Add(-1 * time.Hour), + decayFactor: 0.5, + expectedMin: 3000, // ~10000 / (1+30)^0.5 * recency + expectedMax: 4000, + }, + { + name: "low frequency item", + frequency: 100, + createdAt: now.Add(-7 * 24 * time.Hour), + lastAccessAt: now.Add(-6 * 24 * time.Hour), + decayFactor: 0.5, + expectedMin: 30, // Low count, week old, not accessed recently + expectedMax: 50, + }, + { + name: "no decay factor (pure LFU)", + frequency: 5000, + createdAt: now.Add(-60 * 24 * time.Hour), + lastAccessAt: now, + decayFactor: 0.0, + expectedMin: 9000, // 5000 * 1.0 * ~2.0 (no age decay, high recency) + expectedMax: 11000, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + score := CalculateScore(tt.frequency, tt.createdAt, tt.lastAccessAt, now, tt.decayFactor) + + t.Logf("Actual score: %.2f (expected range: %.0f - %.0f)", score, tt.expectedMin, tt.expectedMax) + assert.GreaterOrEqual(t, score, tt.expectedMin, + "Score should be at least %f, got %f", tt.expectedMin, score) + assert.LessOrEqual(t, score, tt.expectedMax, + "Score should be at most %f, got %f", tt.expectedMax, score) + }) + } +} + +// TestCalculateScore_ZeroFrequency tests that zero frequency gives zero score +func TestCalculateScore_ZeroFrequency(t *testing.T) { + now := time.Now() + score := CalculateScore(0, now.Add(-10*24*time.Hour), now, now, 0.5) + assert.Equal(t, 0.0, score, "Zero frequency should give zero score") +} + +// TestCalculateScore_ClockSkewBackward tests negative age handling (clock went backwards) +func TestCalculateScore_ClockSkewBackward(t *testing.T) { + now := time.Now() + // Item created "in the future" (clock went backwards) + score := CalculateScore(1000, now.Add(5*time.Minute), now, now, 0.5) + + // Should treat as brand new item (age = 0) + // Score should be roughly: 1000 * 1.0 * 2.0 = ~2000 + assert.Greater(t, score, 1800.0, "Should not break with negative age") + assert.Less(t, score, 2200.0, "Should treat as brand new item") +} + +// TestCalculateScore_ClockSkewForward tests extreme age capping (clock jumped forward) +func TestCalculateScore_ClockSkewForward(t *testing.T) { + now := time.Now() + + // Item appears 500 days old (clock jumped forward) + score500 := CalculateScore(10000, now.Add(-500*24*time.Hour), now, now, 0.5) + + // Compare with 365 days old - should be similar due to capping + score365 := CalculateScore(10000, now.Add(-365*24*time.Hour), now, now, 0.5) + + // Age should be capped at 365 days + // Expected: (10000 / (1+365)^0.5) * (1 + 1/(1+0)) = (10000 / 19.13) * 2 ≈ 1045 + assert.Greater(t, score500, 900.0, "Should not be excessively penalized by extreme age") + assert.Less(t, score500, 1200.0, "Should be capped at ~365 days worth of decay") + assert.InDelta(t, score500, score365, 50.0, "500-day and 365-day items should score similarly due to capping") +} + +// TestCalculateScore_RecencyBoost tests that recent access increases scores +func TestCalculateScore_RecencyBoost(t *testing.T) { + now := time.Now() + createdAt := now.Add(-30 * 24 * time.Hour) + frequency := 1000.0 + + // Test different recency levels + scoreJustAccessed := CalculateScore(frequency, createdAt, now, now, 0.5) + score1Hour := CalculateScore(frequency, createdAt, now.Add(-1*time.Hour), now, 0.5) + score7Days := CalculateScore(frequency, createdAt, now.Add(-7*24*time.Hour), now, 0.5) + + // More recent access should give higher scores + assert.Greater(t, scoreJustAccessed, score1Hour, + "Just accessed (%.2f) should score higher than 1 hour ago (%.2f)", scoreJustAccessed, score1Hour) + assert.Greater(t, score1Hour, score7Days, + "1 hour ago (%.2f) should score higher than 7 days ago (%.2f)", score1Hour, score7Days) +} + +// TestCalculateScore_AgeDecay tests that scores decrease as age increases +func TestCalculateScore_AgeDecay(t *testing.T) { + now := time.Now() + frequency := 10000.0 + decayFactor := 0.5 + + tests := []struct { + ageDays int + }{ + {1}, + {7}, + {30}, + {90}, + } + + // Track that scores decrease as age increases + previousScore := math.Inf(1) + + for _, tt := range tests { + t.Run(string(rune(tt.ageDays)), func(t *testing.T) { + score := CalculateScore( + frequency, + now.Add(-time.Duration(tt.ageDays)*24*time.Hour), + now, + now, + decayFactor, + ) + + assert.Greater(t, score, 0.0, "Score should be positive") + assert.Less(t, score, previousScore, + "Older items should have lower scores (age decay working)") + + previousScore = score + }) + } +} + +// TestCalculateScore_DecayFactorComparison tests different decay factors +func TestCalculateScore_DecayFactorComparison(t *testing.T) { + now := time.Now() + createdAt := now.Add(-30 * 24 * time.Hour) + frequency := 10000.0 + + // Higher decay factor = more aggressive decay + scoreNoDecay := CalculateScore(frequency, createdAt, now, now, 0.0) + scoreMediumDecay := CalculateScore(frequency, createdAt, now, now, 0.5) + scoreHighDecay := CalculateScore(frequency, createdAt, now, now, 1.0) + + assert.Greater(t, scoreNoDecay, scoreMediumDecay, + "No decay should give higher score than medium decay") + assert.Greater(t, scoreMediumDecay, scoreHighDecay, + "Medium decay should give higher score than high decay") +} + +// TestCalculateScore_FrequencyMatters tests that frequency is the primary signal +func TestCalculateScore_FrequencyMatters(t *testing.T) { + now := time.Now() + createdAt := now.Add(-7 * 24 * time.Hour) + + highScore := CalculateScore(10000, createdAt, now, now, 0.5) + lowScore := CalculateScore(100, createdAt, now, now, 0.5) + + assert.Greater(t, highScore, lowScore, + "High frequency should have much higher score") + assert.Greater(t, highScore/lowScore, 50.0, + "Score ratio should roughly match frequency ratio") +} + +// TestCalculateScore_RealWorldScenario tests realistic competing items +func TestCalculateScore_RealWorldScenario(t *testing.T) { + now := time.Now() + + // Item A: Old but very popular, still active + scoreA := CalculateScore( + 50000, // High frequency + now.Add(-60*24*time.Hour), // 60 days old + now.Add(-1*time.Hour), // Recently accessed + now, + 0.5, + ) + + // Item B: Recent but low frequency, dormant + scoreB := CalculateScore( + 100, // Low frequency + now.Add(-7*24*time.Hour), // 7 days old + now.Add(-6*24*time.Hour), // Not accessed recently + now, + 0.5, + ) + + assert.Greater(t, scoreA, scoreB, + "High frequency active item (A=%.2f) should beat low frequency dormant item (B=%.2f)", scoreA, scoreB) +} + +// TestCalculateScore_EvictionPriority tests realistic eviction ordering +func TestCalculateScore_EvictionPriority(t *testing.T) { + now := time.Now() + + items := []struct { + id int + frequency float64 + createdAt time.Time + lastAccessAt time.Time + description string + }{ + {1, 10000, now.Add(-30 * 24 * time.Hour), now, "high frequency, recent"}, + {2, 50, now.Add(-5 * 24 * time.Hour), now.Add(-4 * 24 * time.Hour), "low frequency, dormant"}, + {3, 5000, now.Add(-15 * 24 * time.Hour), now.Add(-1 * time.Hour), "medium frequency, active"}, + {4, 100, now.Add(-90 * 24 * time.Hour), now.Add(-89 * 24 * time.Hour), "old, low frequency, dormant"}, + } + + // Calculate scores + scores := make(map[int]float64) + for _, item := range items { + scores[item.id] = CalculateScore(item.frequency, item.createdAt, item.lastAccessAt, now, 0.5) + t.Logf("Item %d (%s): score=%.2f", item.id, item.description, scores[item.id]) + } + + // Verify expected ordering: 1 > 3 > 2 > 4 + assert.Greater(t, scores[1], scores[2], "Item 1 should score higher than 2") + assert.Greater(t, scores[3], scores[2], "Item 3 should score higher than 2") + assert.Greater(t, scores[2], scores[4], "Item 2 should score higher than 4") + assert.Less(t, scores[4], scores[3], "Item 4 should be evicted before 3") + assert.Less(t, scores[2], scores[1], "Item 2 should be evicted before 1") +} diff --git a/pkg/logs/patterns/eviction/types.go b/pkg/logs/patterns/eviction/types.go new file mode 100644 index 000000000000..4b294991866a --- /dev/null +++ b/pkg/logs/patterns/eviction/types.go @@ -0,0 +1,74 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package eviction provides shared eviction scoring algorithms for patterns and tags. +package eviction + +import ( + "time" +) + +// Evictable represents any item that can be evicted based on usage patterns. +type Evictable interface { + // GetFrequency returns the usage count/frequency + GetFrequency() float64 + + // GetCreatedAt returns when the item was created + GetCreatedAt() time.Time + + // GetLastAccessAt returns when the item was last accessed + GetLastAccessAt() time.Time + + // EstimatedBytes returns the approximate memory footprint of the item + EstimatedBytes() int64 +} + +// EvictableCollection represents a collection of evictables that can be evicted. +type EvictableCollection interface { + // CollectEvictables returns all evictable items from the collection + CollectEvictables() []Evictable + + // RemoveEvictable removes a specific item from the collection + RemoveEvictable(item Evictable) +} + +// heapItem wraps an Evictable with its cached eviction score for heap operations. +type heapItem struct { + item Evictable + score float64 +} + +// evictionHeap implements heap.Interface for efficient eviction based on scores. +// It's a min-heap: items with the lowest eviction scores bubble to the top. +type evictionHeap struct { + items []heapItem +} + +// Len returns the number of items in the heap +func (h evictionHeap) Len() int { return len(h.items) } + +// Less reports whether item i should sort before item j (min-heap: lower scores first) +func (h evictionHeap) Less(i, j int) bool { + return h.items[i].score < h.items[j].score +} + +// Swap exchanges items i and j +func (h evictionHeap) Swap(i, j int) { + h.items[i], h.items[j] = h.items[j], h.items[i] +} + +// Push adds an item to the heap (required by heap.Interface) +func (h *evictionHeap) Push(x interface{}) { + h.items = append(h.items, x.(heapItem)) +} + +// Pop removes and returns the minimum item (required by heap.Interface) +func (h *evictionHeap) Pop() interface{} { + old := h.items + n := len(old) + item := old[n-1] + h.items = old[0 : n-1] + return item +}