CLDSRV-783: Replace GCRA with token consumption in request path

anurag4DSB · anurag4DSB · commit 2eccceae37c5 · 2025-11-19T16:02:00.000+01:00
Switch from optimistic GCRA with reconciliation to token
reservation. Workers consume tokens from local buffer instead
of evaluating GCRA per request. This keeps Redis out of the
hot path while enforcing strict quotas.

Changes:
- helpers.js: Use token consumption instead of GCRA evaluation
- server.js: Start token refill job instead of sync job
- cleanup.js: Add token bucket cleanup
- gcra.js: Mark workers parameter unused (kept for compatibility)
diff --git a/lib/api/apiUtils/rateLimit/cleanup.js b/lib/api/apiUtils/rateLimit/cleanup.js
@@ -1,4 +1,4 @@
-const { expireCounters, expireCachedConfigs } = require('./cache');
+const { expireCounters, expireCachedConfigs, expireRequestTimestamps } = require('./cache');
 const { rateLimitCleanupInterval } = require('../../../../constants');
 
 let cleanupInterval = null;
@@ -26,6 +26,7 @@ function startCleanupJob(log, options = {}) {
         const now = Date.now();
         const expiredCounters = expireCounters(now);
         const expiredConfigs = expireCachedConfigs(now);
+        expireRequestTimestamps(now);
 
         if (expiredCounters > 0 || expiredConfigs > 0) {
             log.debug('Rate limit cleanup completed', {
diff --git a/lib/api/apiUtils/rateLimit/gcra.js b/lib/api/apiUtils/rateLimit/gcra.js
@@ -52,34 +52,42 @@ function evaluate(emptyAt, arrivedAt, interval, burstCapacity) {
  *
  * In a distributed setup with N nodes and W workers per node:
  * - Global limit: R requests per second
- * - Per-node limit: R / N
  * - Per-worker limit: R / N / W
  * - Interval = 1000ms / (R / N / W)
  *
  * The interval represents milliseconds between requests. We divide 1000 (milliseconds
  * in a second) by the rate to convert "requests per second" to "milliseconds per request".
  *
  * Examples:
- * - 10 req/s → interval = 1000/10 = 100ms (one request every 100ms)
- * - 1 req/s → interval = 1000/1 = 1000ms (one request every second)
- * - 0.5 req/s → interval = 1000/0.5 = 2000ms (one request every 2 seconds)
+ * - 100 req/s ÷ 1 node ÷ 10 workers = 10 req/s per worker → interval = 100ms
+ * - 600 req/s ÷ 6 nodes ÷ 10 workers = 10 req/s per worker → interval = 100ms
+ *
+ * Dynamic work-stealing is achieved through Redis sync reconciliation:
+ * - Each worker evaluates locally at its fixed per-worker quota
+ * - Workers report consumed / workers to Redis
+ * - Redis sums all workers' shares
+ * - Workers overwrite local counters with Redis values
+ * - Idle workers' unused capacity accumulates in Redis
+ * - Busy workers pull back higher emptyAt values and throttle proportionally
  *
  * IMPORTANT: Limit must be >= N * W, otherwise per-worker rate < 1 req/s
  * which results in intervals > 1000ms and effectively blocks traffic.
  *
  * @param {number} limit - Global requests per second
  * @param {number} nodes - Total number of nodes
- * @param {number} workers - Number of workers per node
+ * @param {number} _workers - Number of workers per node (unused in token reservation)
  * @returns {number} Interval in milliseconds between requests
  */
-function calculateInterval(limit, nodes, workers) {
-    // Per-worker rate = limit / nodes / workers
-    const perWorkerRate = limit / nodes / workers;
+// eslint-disable-next-line no-unused-vars
+function calculateInterval(limit, nodes, _workers) {
+    // Per-node rate = limit / nodes (workers NOT divided)
+    // This allows dynamic work-stealing - workers evaluate at node quota
+    const perNodeRate = limit / nodes;
 
     // Interval = 1000ms / rate
     // Dividing 1000 (ms in a second) by rate converts "requests per second"
     // to "milliseconds between requests". Higher rate = smaller interval = more requests.
-    return 1000 / perWorkerRate;
+    return 1000 / perNodeRate;
 }
 
 module.exports = {
diff --git a/lib/api/apiUtils/rateLimit/helpers.js b/lib/api/apiUtils/rateLimit/helpers.js
@@ -1,7 +1,7 @@
 const { config } = require('../../../Config');
 const cache = require('./cache');
-const { evaluate, calculateInterval } = require('./gcra');
 const constants = require('../../../../constants');
+const { getTokenBucket } = require('./tokenBucket');
 
 /**
  * Get rate limit configuration from cache only (no metadata fetch)
@@ -75,10 +75,10 @@ function extractAndCacheRateLimitConfig(bucket, bucketName, log) {
 }
 
 /**
- * Check rate limit with pre-resolved configuration
+ * Check rate limit with pre-resolved configuration using token reservation
  *
- * Uses GCRA algorithm to determine if request should be rate limited.
- * Updates counter if request is allowed.
+ * Uses token bucket: Workers maintain local tokens granted by Redis.
+ * Token consumption is pure in-memory (fast). Refills happen async in background.
  *
  * @param {string} bucketName - Bucket name
  * @param {object|null} limitConfig - Pre-resolved rate limit config
@@ -92,50 +92,29 @@ function checkRateLimitWithConfig(bucketName, limitConfig, log, callback) {
         return callback(null, false);
     }
 
-    // Calculate interval for this limit
-    const nodes = config.rateLimiting.nodes || 1;
-    const workers = config.clusters || 1;
-    const interval = calculateInterval(limitConfig.limit, nodes, workers);
-
-    // Get burst capacity (default to 1 if not configured)
-    const burstCapacity = config.rateLimiting.bucket?.defaultBurstCapacity ||
-                          constants.rateLimitDefaultBurstCapacity;
-    const bucketSize = burstCapacity * 1000;
-
-    // Get counter (in-memory only, no sync with other workers)
-    const counterKey = `bucket:${bucketName}:rps`;
-    const emptyAt = cache.getCounter(counterKey) || 0;
-    const arrivedAt = Date.now();
-
-    log.debug('Checking rate limit with GCRA', {
-        bucketName,
-        limit: limitConfig.limit,
-        source: limitConfig.source,
-        interval,
-        emptyAt,
-        arrivedAt,
-    });
-
-    // Evaluate GCRA
-    const result = evaluate(emptyAt, arrivedAt, interval, bucketSize);
-
-    // Update counter if allowed
-    if (result.allowed) {
-        cache.setCounter(counterKey, result.newEmptyAt);
-        log.debug('Rate limit check: allowed', {
+    // Get or create token bucket for this bucket
+    const tokenBucket = getTokenBucket(bucketName, limitConfig, log);
+
+    // Try to consume a token (in-memory, no Redis)
+    const allowed = tokenBucket.tryConsume();
+
+    if (allowed) {
+        log.trace('Rate limit check: allowed (token consumed)', {
             bucketName,
-            newEmptyAt: result.newEmptyAt,
+            tokensRemaining: tokenBucket.tokens,
         });
     } else {
-        log.debug('Rate limit check: denied', {
+        log.debug('Rate limit check: denied (no tokens available)', {
             bucketName,
             limit: limitConfig.limit,
-            allowAt: result.newEmptyAt,
-            retryAfterMs: result.newEmptyAt - arrivedAt,
+            source: limitConfig.source,
         });
     }
 
-    return callback(null, !result.allowed);
+    // Return inverse: callback expects "rateLimited" boolean
+    // allowed=true → rateLimited=false
+    // allowed=false → rateLimited=true
+    return callback(null, !allowed);
 }
 
 module.exports = {
diff --git a/lib/server.js b/lib/server.js
@@ -24,6 +24,7 @@ const {
     isManagementAgentUsed,
 } = require('./management/agentClient');
 const { startCleanupJob } = require('./api/apiUtils/rateLimit/cleanup');
+const { startRefillJob, stopRefillJob } = require('./api/apiUtils/rateLimit/refillJob');
 
 const HttpAgent = require('agentkeepalive');
 const QuotaService = require('./utilization/instance');
@@ -293,6 +294,10 @@ class S3Server {
      */
     cleanUp() {
         logger.info('server shutting down');
+        // Stop token refill job if running
+        if (this.config.rateLimiting?.enabled) {
+            stopRefillJob();
+        }
         Promise.all(this.servers.map(server =>
             new Promise(resolve => server.close(resolve))
         )).then(() => process.exit(0));
@@ -360,6 +365,13 @@ class S3Server {
             // Start rate limit cleanup job
             if (this.config.rateLimiting?.enabled) {
                 startCleanupJob(log);
+                // Start token refill job for token reservation system
+                startRefillJob().catch(err => {
+                    log.error('Failed to start token refill job', {
+                        error: err.message,
+                        stack: err.stack,
+                    });
+                });
             }
 
             // TODO this should wait for metadata healthcheck to be ok