Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions packages/engine/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
export * from './types.js';

// Engine modules (Agent A)
export { SuiteLoader } from './loader.js';
export { SuiteLoader, resolvePools } from './loader.js';
export { Runner } from './runner.js';
export type { RunnerOptions } from './runner.js';
export { Scorer } from './scorer.js';
export { Reporter } from './reporter.js';
export { SuiteDefinitionSchema, SuiteDefinitionSchema as suiteSchema } from './schema.js';
export { SuiteDefinitionSchema, SuiteDefinitionSchema as suiteSchema, ScenarioPoolSchema, ScenarioEntrySchema } from './schema.js';

// Concurrency
export { Semaphore } from './semaphore.js';
Expand Down
84 changes: 82 additions & 2 deletions packages/engine/src/loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { resolve, dirname } from 'node:path';
import { parse as parseYAML, YAMLParseError } from 'yaml';
import { ZodError } from 'zod';
import { SuiteDefinitionSchema } from './schema.js';
import type { SuiteDefinition, ScenarioDefinition } from './types.js';
import type { SuiteDefinition, ScenarioDefinition, ScenarioEntry, ScenarioPool } from './types.js';

export class SuiteLoader {
/**
Expand All @@ -29,6 +29,9 @@ export class SuiteLoader {
}
const suite = this.loadString(content, filePath);

// Resolve scenario pools before fixture resolution
resolvePools(suite);

// Fix #2: Resolve fixture file references and load their content
const suiteDir = dirname(resolve(filePath));
await this.resolveFixtures(suite, suiteDir);
Expand Down Expand Up @@ -62,7 +65,10 @@ export class SuiteLoader {
}

try {
return SuiteDefinitionSchema.parse(raw);
const suite = SuiteDefinitionSchema.parse(raw) as SuiteDefinition & { scenarios: ScenarioEntry[] };
// Resolve pools so the returned suite has a flat ScenarioDefinition[]
resolvePools(suite);
return suite as SuiteDefinition;
} catch (err) {
if (err instanceof ZodError) {
const issues = err.issues
Expand Down Expand Up @@ -126,3 +132,77 @@ export class SuiteLoader {
}
}
}

// ─── Scenario Pool Resolution ───────────────────────────────────────

/**
* Seeded PRNG — mulberry32.
* Returns a function that produces a float in [0, 1) on each call.
*/
function mulberry32(seed: number): () => number {
let s = seed | 0;
return () => {
s = (s + 0x6D2B79F5) | 0;
let t = Math.imul(s ^ (s >>> 15), 1 | s);
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
};
}

/** Fisher-Yates shuffle using a supplied random function. */
function shuffle<T>(arr: T[], rand: () => number): T[] {
const a = [...arr];
for (let i = a.length - 1; i > 0; i--) {
const j = Math.floor(rand() * (i + 1));
[a[i], a[j]] = [a[j], a[i]];
}
return a;
}

/** Type guard: is a ScenarioEntry a pool wrapper? */
function isPool(entry: ScenarioEntry): entry is { pool: ScenarioPool } {
return 'pool' in entry && typeof (entry as any).pool === 'object';
}

/**
* Expand scenario pools into concrete ScenarioDefinition[] in-place.
* After this call, `suite.scenarios` contains only ScenarioDefinition items.
*
* @throws Error if a pool has count=0 or empty scenarios array
*/
export function resolvePools(suite: { scenarios: ScenarioEntry[] }): void {
const resolved: ScenarioDefinition[] = [];

for (const entry of suite.scenarios) {
if (!isPool(entry)) {
resolved.push(entry);
continue;
}

const pool = entry.pool;

if (pool.scenarios.length === 0) {
throw new Error(`Scenario pool "${pool.id}" has no scenarios`);
}

if (pool.count === 0) {
throw new Error(`Scenario pool "${pool.id}" has count=0`);
}

const count = Math.min(pool.count, pool.scenarios.length);
if (pool.count > pool.scenarios.length) {
console.warn(
`Pool "${pool.id}": count (${pool.count}) exceeds pool size (${pool.scenarios.length}), clamping to ${pool.scenarios.length}`,
);
}

const rand = pool.seed != null
? mulberry32(pool.seed)
: Math.random.bind(Math);

const shuffled = shuffle(pool.scenarios, rand);
resolved.push(...shuffled.slice(0, count));
}

(suite as any).scenarios = resolved;
}
16 changes: 15 additions & 1 deletion packages/engine/src/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,20 @@ export const ScenarioDefinitionSchema = z.object({
depends_on: z.string().optional(),
});

// ─── Scenario Pool ──────────────────────────────────────────────────

export const ScenarioPoolSchema = z.object({
pool: z.object({
id: z.string().min(1),
count: z.number().int().min(0),
seed: z.number().int().nullable().optional(),
scenarios: z.array(ScenarioDefinitionSchema).min(1),
}),
});

/** A single entry: either a regular scenario or a pool wrapper. */
export const ScenarioEntrySchema = z.union([ScenarioDefinitionSchema, ScenarioPoolSchema]);

// ─── Agent Config ────────────────────────────────────────────────────

export const AgentConfigSchema = z.object({
Expand Down Expand Up @@ -99,6 +113,6 @@ export const SuiteDefinitionSchema = z.object({
agent: AgentConfigSchema.optional(),
judge: JudgeConfigSchema.optional(),
defaults: SuiteDefaultsSchema,
scenarios: z.array(ScenarioDefinitionSchema).min(1),
scenarios: z.array(ScenarioEntrySchema).min(1),
metadata: z.record(z.unknown()).optional(),
});
12 changes: 12 additions & 0 deletions packages/engine/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ export interface JudgeConfig {
};
}

// ─── Scenario Pool ──────────────────────────────────────────────────

export interface ScenarioPool {
id: string;
count: number;
seed?: number | null; // Fixed seed for reproducible selection (null = random)
scenarios: ScenarioDefinition[];
}

/** A single entry in the suite's scenarios array: either a plain scenario or a pool wrapper. */
export type ScenarioEntry = ScenarioDefinition | { pool: ScenarioPool };

// ─── Scenario ────────────────────────────────────────────────────────

export interface ScenarioDefinition {
Expand Down
Loading
Loading