CLDSRV-783: Add token reservation system for rate limiting

anurag4DSB · anurag4DSB · commit 091d54a8bda3 · 2025-11-19T16:02:00.000+01:00
Implement token reservation architecture where workers request
capacity in advance from Redis. Workers maintain local token
buffers and consume tokens in-memory (no Redis in hot path).
Background refill job requests tokens every 100ms asynchronously.

Redis enforces GCRA atomically at token grant time, ensuring
strict quota enforcement across distributed workers.

Components:
- grantTokens.lua: Redis Lua script for atomic token granting
- tokenBucket.js: WorkerTokenBucket class for local token management
- refillJob.js: Background job for async token replenishment
- client.js: Add grantTokens() method to Redis client
diff --git a/lib/api/apiUtils/rateLimit/client.js b/lib/api/apiUtils/rateLimit/client.js
@@ -5,12 +5,22 @@ const Redis = require('ioredis');
 const { config } = require('../../../Config');
 
 const updateCounterScript = fs.readFileSync(`${__dirname  }/updateCounter.lua`).toString();
+const reconcileCounterScript = fs.readFileSync(`${__dirname  }/reconcileCounter.lua`).toString();
+const grantTokensScript = fs.readFileSync(`${__dirname  }/grantTokens.lua`).toString();
 
 const SCRIPTS = {
     updateCounter: {
         numberOfKeys: 1,
         lua: updateCounterScript,
     },
+    reconcileCounter: {
+        numberOfKeys: 1,
+        lua: reconcileCounterScript,
+    },
+    grantTokens: {
+        numberOfKeys: 1,
+        lua: grantTokensScript,
+    },
 };
 
 class RateLimitClient {
@@ -25,13 +35,13 @@ class RateLimitClient {
     /**
      * @typedef {Object} CounterUpdateBatch
      * @property {string} key - counter key
-     * @property {number} cost - cost to add to counter
+     * @property {number} cost - per-worker cost to add to counter
      */
 
     /**
      * @typedef {Object} CounterUpdateBatchResult
      * @property {string} key - counter key
-     * @property {number} value - current value of counter
+     * @property {number} value - current value of counter after update
      */
 
     /**
@@ -41,8 +51,9 @@ class RateLimitClient {
      */
 
     /**
-     * Add cost to the counter at key.
-     * Returns the new value for  the counter
+     * Update local counter values in Redis by adding per-worker costs.
+     * Each worker divides its consumption by worker count before syncing.
+     * Redis sums all workers' costs to get total node consumption.
      *
      * @param {CounterUpdateBatch[]} batch - batch of counter updates
      * @param {RateLimitClient~batchUpdate} cb
@@ -65,6 +76,50 @@ class RateLimitClient {
             })));
         });
     }
+
+    /**
+     * @callback RateLimitClient~grantTokens
+     * @param {Error|null} err
+     * @param {number|undefined} granted - Number of tokens granted (0 if denied)
+     */
+
+    /**
+     * Request tokens from Redis with atomic GCRA enforcement
+     *
+     * This method atomically:
+     * 1. Evaluates GCRA for N tokens
+     * 2. Grants tokens if quota available
+     * 3. Advances Redis counter by granted tokens
+     *
+     * Used by token reservation system to request capacity in advance.
+     *
+     * @param {string} bucketName - Bucket name
+     * @param {number} requested - Number of tokens requested
+     * @param {number} interval - Interval per request in ms
+     * @param {number} burstCapacity - Burst capacity in ms
+     * @param {RateLimitClient~grantTokens} cb - Callback
+     */
+    grantTokens(bucketName, requested, interval, burstCapacity, cb) {
+        const key = `throttling:bucket:${bucketName}:rps`;
+        const now = Date.now();
+
+        this.redis.grantTokens(
+            key,
+            requested,
+            interval,
+            burstCapacity,
+            now,
+            (err, result) => {
+                if (err) {
+                    return cb(err);
+                }
+
+                // Result is number of tokens granted (0 if denied, partial if limited)
+                const granted = parseInt(result, 10);
+                return cb(null, granted);
+            }
+        );
+    }
 }
 
 let instance;
diff --git a/lib/api/apiUtils/rateLimit/grantTokens.lua b/lib/api/apiUtils/rateLimit/grantTokens.lua
@@ -0,0 +1,64 @@
+-- grantTokens.lua
+-- Atomically evaluates GCRA and grants tokens for rate limiting
+--
+-- This script implements token reservation: workers request capacity
+-- in advance, and this script enforces the node-level quota using GCRA.
+--
+-- KEYS[1]: Counter key (e.g., "throttling:bucket:mybucket:rps")
+-- ARGV[1]: Requested token count (number of requests)
+-- ARGV[2]: Interval per request in milliseconds
+-- ARGV[3]: Burst capacity in milliseconds (bucket size)
+-- ARGV[4]: Current timestamp in milliseconds (arrivedAt)
+--
+-- Returns: Number of tokens granted (0 if quota exhausted, partial if limited)
+
+local key = KEYS[1]
+local requested = tonumber(ARGV[1])
+local interval = tonumber(ARGV[2])
+local burstCapacity = tonumber(ARGV[3])
+local arrivedAt = tonumber(ARGV[4])
+
+-- Get current counter value (emptyAt timestamp)
+local emptyAt = tonumber(redis.call('GET', key) or 0)
+
+-- GCRA evaluation
+-- expectedTime: When the bucket will be empty (or now if already empty)
+local expectedTime = math.max(emptyAt, arrivedAt)
+
+-- Calculate cost for requested tokens
+local cost = requested * interval
+
+-- Check if request fits within burst capacity
+-- Allow if: expectedTime + cost <= arrivedAt + burstCapacity
+local allowAt = expectedTime + cost
+local burstLimit = arrivedAt + burstCapacity
+
+if allowAt <= burstLimit then
+    -- Full request allowed
+    local newEmptyAt = expectedTime + cost
+    redis.call('SET', key, newEmptyAt)
+    redis.call('PEXPIRE', key, burstCapacity + 10000) -- TTL = burst + 10s buffer
+
+    return requested
+else
+    -- Request exceeds capacity, grant partial tokens
+    -- available = (arrivedAt + burstCapacity - expectedTime) / interval
+    local availableCapacity = burstLimit - expectedTime
+
+    if availableCapacity > 0 then
+        -- Grant partial tokens
+        local granted = math.floor(availableCapacity / interval)
+
+        if granted > 0 then
+            local actualCost = granted * interval
+            local newEmptyAt = expectedTime + actualCost
+            redis.call('SET', key, newEmptyAt)
+            redis.call('PEXPIRE', key, burstCapacity + 10000)
+
+            return granted
+        end
+    end
+
+    -- No capacity available
+    return 0
+end
diff --git a/lib/api/apiUtils/rateLimit/refillJob.js b/lib/api/apiUtils/rateLimit/refillJob.js
@@ -0,0 +1,130 @@
+const werelogs = require('werelogs');
+
+const { getAllTokenBuckets, cleanupTokenBuckets } = require('./tokenBucket');
+
+const logger = new werelogs.Logger('S3');
+
+let refillInterval = null;
+
+// Refill interval in milliseconds (how often to check and refill buckets)
+const REFILL_INTERVAL_MS = 100;
+
+// Cleanup interval for expired buckets (every 10 seconds)
+const CLEANUP_INTERVAL_MS = 10000;
+let cleanupCounter = 0;
+
+/**
+ * Background refill job for token buckets
+ *
+ * This job runs periodically (default: every 100ms) and:
+ * 1. Iterates through all active token buckets
+ * 2. Triggers async refill for buckets below threshold
+ * 3. Periodically cleans up expired/idle token buckets
+ *
+ * The refills are asynchronous and non-blocking, keeping Redis
+ * out of the hot request path.
+ */
+async function refillTokenBuckets() {
+    const tokenBuckets = getAllTokenBuckets();
+
+    if (tokenBuckets.size === 0) {
+        return {
+            checked: 0,
+            refilled: 0,
+        };
+    }
+
+    let checked = 0;
+    let refilled = 0;
+
+    // Trigger refill for all active buckets
+    const refillPromises = [];
+
+    for (const [bucketName, bucket] of tokenBuckets.entries()) {
+        checked++;
+
+        // Trigger async refill if needed (non-blocking)
+        const promise = bucket.refillIfNeeded().then(() => {
+            // Check if refill actually happened
+            if (bucket.refillCount > 0) {
+                refilled++;
+            }
+        }).catch(err => {
+            logger.error('Token refill error', {
+                bucketName,
+                error: err.message,
+                stack: err.stack,
+            });
+        });
+
+        refillPromises.push(promise);
+    }
+
+    // Wait for all refills to complete
+    await Promise.all(refillPromises);
+
+    return {
+        checked,
+        refilled,
+    };
+}
+
+/**
+ * Main refill job loop
+ * Runs periodically to proactively refill token buckets
+ */
+async function startRefillJob() {
+    logger.info('Starting token refill job', {
+        refillIntervalMs: REFILL_INTERVAL_MS,
+        cleanupIntervalMs: CLEANUP_INTERVAL_MS,
+    });
+
+    const tick = async () => {
+        try {
+            const stats = await refillTokenBuckets();
+
+            if (stats.refilled > 0) {
+                logger.debug('Refill tick completed', stats);
+            }
+
+            // Periodic cleanup (every CLEANUP_INTERVAL_MS)
+            cleanupCounter++;
+            if (cleanupCounter * REFILL_INTERVAL_MS >= CLEANUP_INTERVAL_MS) {
+                const removed = cleanupTokenBuckets();
+                if (removed > 0) {
+                    logger.debug('Cleaned up expired token buckets', {
+                        removed,
+                    });
+                }
+                cleanupCounter = 0;
+            }
+
+        } catch (err) {
+            logger.error('Refill job error', {
+                error: err.message,
+                stack: err.stack,
+            });
+        }
+    };
+
+    // Start periodic ticks
+    refillInterval = setInterval(tick, REFILL_INTERVAL_MS);
+}
+
+/**
+ * Stop the refill job
+ * Used during graceful shutdown
+ */
+function stopRefillJob() {
+    if (refillInterval) {
+        clearInterval(refillInterval);
+        refillInterval = null;
+        logger.info('Stopped token refill job');
+    }
+}
+
+module.exports = {
+    startRefillJob,
+    stopRefillJob,
+    refillTokenBuckets,
+};
diff --git a/lib/api/apiUtils/rateLimit/tokenBucket.js b/lib/api/apiUtils/rateLimit/tokenBucket.js