Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d05720c
Create performance test harness for runs replication service
ericallam Jan 9, 2026
57861d3
improve test harness producer throughput and better organize run outputs
ericallam Jan 9, 2026
0bf8cfa
use a less CPU-intensive way of inserting task runs
ericallam Jan 9, 2026
be64766
use compact insert strategy for runs
ericallam Jan 10, 2026
32f758a
added back in max duration in seconds
ericallam Jan 10, 2026
4f946a4
cleanup the types
ericallam Jan 10, 2026
df12287
simplify
ericallam Jan 10, 2026
55ed118
much better type safety
ericallam Jan 10, 2026
44c5267
fixed types
ericallam Jan 11, 2026
284c196
fix clickhouse tests
ericallam Jan 11, 2026
a2fa855
really fix clickhouse tests
ericallam Jan 11, 2026
f8640f6
Add object-based insert functions and fix index generation
ericallam Jan 11, 2026
5f40360
Fix TypeScript errors in sort functions
ericallam Jan 11, 2026
ca37d7e
Fix sort comparators to return 0 for equal values
ericallam Jan 11, 2026
d1129d7
Remove performance test harness
ericallam Jan 11, 2026
df3ea39
Remove remaining performance harness artifacts
ericallam Jan 11, 2026
b9db398
Update pnpm-lock.yaml
ericallam Jan 11, 2026
4de8899
Stop dynamically loading superjson
ericallam Jan 12, 2026
777db7c
made accessing run and payload fields more type safe
ericallam Jan 13, 2026
8778403
speed up deduplicating of runs by making it more efficient
ericallam Jan 13, 2026
ec6ec2f
Fixed tests and made them less loggy
ericallam Jan 13, 2026
5ddabad
Fixed tests for realz
ericallam Jan 13, 2026
443f63b
added back in null check in getKey
ericallam Jan 13, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
304 changes: 148 additions & 156 deletions apps/webapp/app/services/runsReplicationService.server.ts

Large diffs are not rendered by default.

161 changes: 161 additions & 0 deletions apps/webapp/test/concurrentFlushScheduler.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import { ConcurrentFlushScheduler } from "~/services/runsReplicationService.server";

vi.setConfig({ testTimeout: 10_000 });

type TestItem = {
id: string;
event: "insert" | "update";
version: number;
};

describe("ConcurrentFlushScheduler", () => {
it("should deduplicate items by key, keeping the latest version", async () => {
const flushedBatches: TestItem[][] = [];

const scheduler = new ConcurrentFlushScheduler<TestItem>({
batchSize: 100,
flushInterval: 50,
maxConcurrency: 1,
callback: async (_flushId, batch) => {
flushedBatches.push([...batch]);
},
getKey: (item) => `${item.event}_${item.id}`,
shouldReplace: (existing, incoming) => incoming.version >= existing.version,
});

scheduler.start();

// Add items with duplicate keys but different versions
scheduler.addToBatch([
{ id: "run_1", event: "insert", version: 1 },
{ id: "run_1", event: "update", version: 2 },
{ id: "run_2", event: "insert", version: 1 },
]);

// Add more items - should merge with existing
scheduler.addToBatch([
{ id: "run_1", event: "insert", version: 3 }, // Higher version, should replace
{ id: "run_1", event: "update", version: 1 }, // Lower version, should NOT replace
{ id: "run_2", event: "update", version: 4 },
]);

// Wait for flush
await new Promise((resolve) => setTimeout(resolve, 100));

scheduler.shutdown();

// Should have flushed once with deduplicated items
expect(flushedBatches.length).toBeGreaterThanOrEqual(1);

const allFlushed = flushedBatches.flat();

// Find items by their key
const insertRun1 = allFlushed.find((i) => i.id === "run_1" && i.event === "insert");
const updateRun1 = allFlushed.find((i) => i.id === "run_1" && i.event === "update");
const insertRun2 = allFlushed.find((i) => i.id === "run_2" && i.event === "insert");
const updateRun2 = allFlushed.find((i) => i.id === "run_2" && i.event === "update");

// Verify correct versions were kept
expect(insertRun1?.version).toBe(3); // Latest version for insert_run_1
expect(updateRun1?.version).toBe(2); // Original update_run_1 (v1 didn't replace v2)
expect(insertRun2?.version).toBe(1); // Only version for insert_run_2
expect(updateRun2?.version).toBe(4); // Only version for update_run_2
});

it("should skip items where getKey returns null", async () => {
const flushedBatches: TestItem[][] = [];

const scheduler = new ConcurrentFlushScheduler<TestItem>({
batchSize: 100,
flushInterval: 50,
maxConcurrency: 1,
callback: async (_flushId, batch) => {
flushedBatches.push([...batch]);
},
getKey: (item) => {
if (!item.id) {
return null;
}
return `${item.event}_${item.id}`;
},
shouldReplace: (existing, incoming) => incoming.version >= existing.version,
});

scheduler.start();

scheduler.addToBatch([
{ id: "run_1", event: "insert", version: 1 },
{ id: "", event: "insert", version: 2 }, // Should be skipped (null key)
{ id: "run_2", event: "insert", version: 1 },
]);

await new Promise((resolve) => setTimeout(resolve, 100));

scheduler.shutdown();

const allFlushed = flushedBatches.flat();
expect(allFlushed).toHaveLength(2);
expect(allFlushed.map((i) => i.id).sort()).toEqual(["run_1", "run_2"]);
});

it("should flush when batch size threshold is reached", async () => {
const flushedBatches: TestItem[][] = [];

const scheduler = new ConcurrentFlushScheduler<TestItem>({
batchSize: 3,
flushInterval: 10000, // Long interval so timer doesn't trigger
maxConcurrency: 1,
callback: async (_flushId, batch) => {
flushedBatches.push([...batch]);
},
getKey: (item) => `${item.event}_${item.id}`,
shouldReplace: (existing, incoming) => incoming.version >= existing.version,
});

scheduler.start();

// Add 3 unique items - should trigger flush
scheduler.addToBatch([
{ id: "run_1", event: "insert", version: 1 },
{ id: "run_2", event: "insert", version: 1 },
{ id: "run_3", event: "insert", version: 1 },
]);

await new Promise((resolve) => setTimeout(resolve, 50));

expect(flushedBatches.length).toBe(1);
expect(flushedBatches[0]).toHaveLength(3);

scheduler.shutdown();
});

it("should respect shouldReplace returning false", async () => {
const flushedBatches: TestItem[][] = [];

const scheduler = new ConcurrentFlushScheduler<TestItem>({
batchSize: 100,
flushInterval: 50,
maxConcurrency: 1,
callback: async (_flushId, batch) => {
flushedBatches.push([...batch]);
},
getKey: (item) => `${item.event}_${item.id}`,
// Never replace - first item wins
shouldReplace: () => false,
});

scheduler.start();

scheduler.addToBatch([{ id: "run_1", event: "insert", version: 10 }]);

scheduler.addToBatch([{ id: "run_1", event: "insert", version: 999 }]);

await new Promise((resolve) => setTimeout(resolve, 100));

scheduler.shutdown();

const allFlushed = flushedBatches.flat();
const insertRun1 = allFlushed.find((i) => i.id === "run_1" && i.event === "insert");
expect(insertRun1?.version).toBe(10); // First one wins
});
});
25 changes: 24 additions & 1 deletion apps/webapp/test/runsReplicationService.part1.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { ClickHouse } from "@internal/clickhouse";
import { containerTest } from "@internal/testcontainers";
import { Logger } from "@trigger.dev/core/logger";
import { setTimeout } from "node:timers/promises";
import { z } from "zod";
import { TaskRunStatus } from "~/database-types";
Expand All @@ -22,6 +21,7 @@ describe("RunsReplicationService (part 1/2)", () => {
compression: {
request: true,
},
logLevel: "warn",
});

const { tracer, exporter } = createInMemoryTracing();
Expand All @@ -40,6 +40,7 @@ describe("RunsReplicationService (part 1/2)", () => {
leaderLockExtendIntervalMs: 1000,
ackIntervalSeconds: 5,
tracer,
logLevel: "warn",
});

await runsReplicationService.start();
Expand Down Expand Up @@ -135,6 +136,7 @@ describe("RunsReplicationService (part 1/2)", () => {
compression: {
request: true,
},
logLevel: "warn",
});

const { tracer, exporter } = createInMemoryTracing();
Expand All @@ -153,6 +155,7 @@ describe("RunsReplicationService (part 1/2)", () => {
leaderLockExtendIntervalMs: 1000,
ackIntervalSeconds: 5,
tracer,
logLevel: "warn",
});

await runsReplicationService.start();
Expand Down Expand Up @@ -271,6 +274,7 @@ describe("RunsReplicationService (part 1/2)", () => {
const clickhouse = new ClickHouse({
url: clickhouseContainer.getConnectionUrl(),
name: "runs-replication",
logLevel: "warn",
});

const { tracer, exporter } = createInMemoryTracing();
Expand All @@ -289,6 +293,7 @@ describe("RunsReplicationService (part 1/2)", () => {
leaderLockExtendIntervalMs: 1000,
ackIntervalSeconds: 5,
tracer,
logLevel: "warn",
});

await runsReplicationService.start();
Expand Down Expand Up @@ -341,6 +346,7 @@ describe("RunsReplicationService (part 1/2)", () => {
const clickhouse = new ClickHouse({
url: clickhouseContainer.getConnectionUrl(),
name: "runs-replication-batching",
logLevel: "warn",
});

const runsReplicationService = new RunsReplicationService({
Expand All @@ -356,6 +362,7 @@ describe("RunsReplicationService (part 1/2)", () => {
leaderLockTimeoutMs: 5000,
leaderLockExtendIntervalMs: 1000,
ackIntervalSeconds: 5,
logLevel: "warn",
});

await runsReplicationService.start();
Expand Down Expand Up @@ -443,6 +450,7 @@ describe("RunsReplicationService (part 1/2)", () => {
const clickhouse = new ClickHouse({
url: clickhouseContainer.getConnectionUrl(),
name: "runs-replication-payload",
logLevel: "warn",
});

const runsReplicationService = new RunsReplicationService({
Expand All @@ -458,6 +466,7 @@ describe("RunsReplicationService (part 1/2)", () => {
leaderLockTimeoutMs: 5000,
leaderLockExtendIntervalMs: 1000,
ackIntervalSeconds: 5,
logLevel: "warn",
});

await runsReplicationService.start();
Expand Down Expand Up @@ -542,6 +551,7 @@ describe("RunsReplicationService (part 1/2)", () => {
const clickhouse = new ClickHouse({
url: clickhouseContainer.getConnectionUrl(),
name: "runs-replication-payload",
logLevel: "warn",
});

const runsReplicationService = new RunsReplicationService({
Expand All @@ -557,6 +567,7 @@ describe("RunsReplicationService (part 1/2)", () => {
leaderLockTimeoutMs: 5000,
leaderLockExtendIntervalMs: 1000,
ackIntervalSeconds: 5,
logLevel: "warn",
});

await runsReplicationService.start();
Expand Down Expand Up @@ -646,6 +657,7 @@ describe("RunsReplicationService (part 1/2)", () => {
const clickhouse = new ClickHouse({
url: clickhouseContainer.getConnectionUrl(),
name: "runs-replication-update",
logLevel: "warn",
});

const runsReplicationService = new RunsReplicationService({
Expand All @@ -661,6 +673,7 @@ describe("RunsReplicationService (part 1/2)", () => {
leaderLockTimeoutMs: 5000,
leaderLockExtendIntervalMs: 1000,
ackIntervalSeconds: 5,
logLevel: "warn",
});

await runsReplicationService.start();
Expand Down Expand Up @@ -751,6 +764,7 @@ describe("RunsReplicationService (part 1/2)", () => {
const clickhouse = new ClickHouse({
url: clickhouseContainer.getConnectionUrl(),
name: "runs-replication-delete",
logLevel: "warn",
});

const runsReplicationService = new RunsReplicationService({
Expand All @@ -766,6 +780,7 @@ describe("RunsReplicationService (part 1/2)", () => {
leaderLockTimeoutMs: 5000,
leaderLockExtendIntervalMs: 1000,
ackIntervalSeconds: 5,
logLevel: "warn",
});

await runsReplicationService.start();
Expand Down Expand Up @@ -849,6 +864,7 @@ describe("RunsReplicationService (part 1/2)", () => {
const clickhouse = new ClickHouse({
url: clickhouseContainer.getConnectionUrl(),
name: "runs-replication-shutdown-handover",
logLevel: "warn",
});

// Service A
Expand All @@ -865,6 +881,7 @@ describe("RunsReplicationService (part 1/2)", () => {
leaderLockTimeoutMs: 5000,
leaderLockExtendIntervalMs: 1000,
ackIntervalSeconds: 5,
logLevel: "warn",
});

await runsReplicationServiceA.start();
Expand Down Expand Up @@ -968,6 +985,7 @@ describe("RunsReplicationService (part 1/2)", () => {
leaderLockTimeoutMs: 5000,
leaderLockExtendIntervalMs: 1000,
ackIntervalSeconds: 5,
logLevel: "warn",
});

await runsReplicationServiceB.start();
Expand Down Expand Up @@ -997,6 +1015,7 @@ describe("RunsReplicationService (part 1/2)", () => {
const clickhouse = new ClickHouse({
url: clickhouseContainer.getConnectionUrl(),
name: "runs-replication-shutdown-after-processed",
logLevel: "warn",
});

// Service A
Expand All @@ -1013,6 +1032,7 @@ describe("RunsReplicationService (part 1/2)", () => {
leaderLockTimeoutMs: 5000,
leaderLockExtendIntervalMs: 1000,
ackIntervalSeconds: 5,
logLevel: "warn",
});

await runsReplicationServiceA.start();
Expand Down Expand Up @@ -1114,6 +1134,7 @@ describe("RunsReplicationService (part 1/2)", () => {
leaderLockTimeoutMs: 5000,
leaderLockExtendIntervalMs: 1000,
ackIntervalSeconds: 5,
logLevel: "warn",
});

await runsReplicationServiceB.start();
Expand All @@ -1137,6 +1158,7 @@ describe("RunsReplicationService (part 1/2)", () => {
const clickhouse = new ClickHouse({
url: clickhouseContainer.getConnectionUrl(),
name: "runs-replication-metrics",
logLevel: "warn",
});

const { tracer } = createInMemoryTracing();
Expand All @@ -1157,6 +1179,7 @@ describe("RunsReplicationService (part 1/2)", () => {
ackIntervalSeconds: 5,
tracer,
meter: metricsHelper.meter,
logLevel: "warn",
});

await runsReplicationService.start();
Expand Down
Loading