);
}
diff --git a/dashboard/components/AiProvidersSection.tsx b/dashboard/components/AiProvidersSection.tsx
new file mode 100644
index 00000000..956e41d8
--- /dev/null
+++ b/dashboard/components/AiProvidersSection.tsx
@@ -0,0 +1,803 @@
+'use client';
+
+import { useEffect, useMemo, useState } from 'react';
+
+import { api } from '@/lib/api';
+import type {
+ AIModelInfo,
+ LlmConnector,
+ LlmConnectorCreate,
+ LlmConnectorType,
+ LlmDjPolicy,
+ LlmFeatureKey,
+ LlmFeaturePreferences,
+} from '@/lib/api-types';
+
+const CONNECTOR_TYPE_LABELS: Record = {
+ openai_apikey: 'OpenAI API key',
+ anthropic_apikey: 'Anthropic API key',
+ openrouter_apikey: 'OpenRouter API key',
+ xai_apikey: 'xAI Grok API key',
+ gemini_apikey: 'Google Gemini API key',
+ openai_compatible: 'Custom OpenAI-compatible endpoint',
+ bedrock: 'AWS Bedrock',
+ azure_openai: 'Azure OpenAI',
+};
+
+const STATUS_LABELS: Record = {
+ active: { text: 'Active', color: 'var(--color-success)' },
+ auth_invalid: { text: 'Auth invalid', color: 'var(--color-danger)' },
+ disabled: { text: 'Disabled', color: 'var(--text-secondary)' },
+};
+
+// Human-readable labels for the pinnable agentic features (issue #337). Falls
+// back to the raw feature key for any feature the backend adds before the UI
+// learns its label.
+const FEATURE_LABELS: Record = {
+ recommendation: 'Recommendations',
+ set_builder: 'Set builder',
+};
+
+// Provider-specific input placeholders. Missing entries fall back to the
+// per-field default below (openai_apikey for the key, openai_compatible for
+// the model hint), preserving the previous nested-ternary behavior.
+const API_KEY_PLACEHOLDERS: Partial> = {
+ anthropic_apikey: 'sk-ant-…',
+ openrouter_apikey: 'sk-or-…',
+ xai_apikey: 'xai-…',
+ gemini_apikey: 'AIza…',
+};
+const API_KEY_PLACEHOLDER_DEFAULT = 'sk-proj-… / sk-…';
+
+const MODEL_HINT_PLACEHOLDERS: Partial> = {
+ anthropic_apikey: 'claude-haiku-4-5-20251001',
+ openai_apikey: 'gpt-5-mini',
+ openrouter_apikey: 'e.g. openai/gpt-4o-mini',
+ xai_apikey: 'grok-3-mini',
+ gemini_apikey: 'gemini-2.5-flash',
+};
+const MODEL_HINT_PLACEHOLDER_DEFAULT = 'e.g. llama3';
+
+interface FormState {
+ open: boolean;
+ connector_type: LlmConnectorType;
+ display_name: string;
+ api_key: string;
+ base_url: string;
+ bearer: string;
+ model_hint: string;
+ aws_access_key_id: string;
+ aws_secret_access_key: string;
+ aws_region: string;
+ aws_model_id: string;
+ azure_resource_name: string;
+ azure_deployment_name: string;
+ azure_api_version: string;
+}
+
+const EMPTY_FORM: FormState = {
+ open: false,
+ connector_type: 'openai_apikey',
+ display_name: '',
+ api_key: '',
+ base_url: '',
+ bearer: '',
+ model_hint: '',
+ aws_access_key_id: '',
+ aws_secret_access_key: '',
+ aws_region: '',
+ aws_model_id: '',
+ azure_resource_name: '',
+ azure_deployment_name: '',
+ azure_api_version: '',
+};
+
+/**
+ * DJ-facing AI connector management UI (connect / test / delete, model hint,
+ * Hermes onboarding). Relocated from the standalone `/settings/ai` route into
+ * the `/account` page (issue #357). The component assumes the parent already
+ * enforces authentication — it does no auth gating of its own.
+ *
+ * Fail-closed behavior is preserved: when the DJ-scoped policy endpoint can't
+ * be read, NO provider types are offered rather than leaking every type.
+ */
+export default function AiProvidersSection() {
+ const [policy, setPolicy] = useState(null);
+ const [connectors, setConnectors] = useState([]);
+ const [loading, setLoading] = useState(true);
+ const [error, setError] = useState('');
+ const [form, setForm] = useState(EMPTY_FORM);
+ const [submitting, setSubmitting] = useState(false);
+ const [submitMessage, setSubmitMessage] = useState('');
+ const [submitError, setSubmitError] = useState('');
+ const [testStateById, setTestStateById] = useState>({});
+ // Live streamed text per connector for the "Stream test" button, plus the id
+ // currently streaming (drives the disabled state + label).
+ const [streamTextById, setStreamTextById] = useState>({});
+ const [streamingId, setStreamingId] = useState(null);
+ const [openrouterModels, setOpenrouterModels] = useState([]);
+ const [openrouterModelsLoaded, setOpenrouterModelsLoaded] = useState(false);
+ const [featurePrefs, setFeaturePrefs] = useState(null);
+
+ useEffect(() => {
+ let active = true;
+ setLoading(true);
+ setError('');
+ Promise.all([api.listLlmConnectors(), fetchPolicySoft(), fetchFeaturePrefsSoft()])
+ .then(([rows, p, prefs]) => {
+ if (!active) return;
+ setConnectors(rows);
+ setPolicy(p);
+ setFeaturePrefs(prefs);
+ })
+ .catch((err) => {
+ if (!active) return;
+ setError(err instanceof Error ? err.message : 'Failed to load');
+ })
+ .finally(() => {
+ if (active) setLoading(false);
+ });
+ return () => {
+ active = false;
+ };
+ }, []);
+
+ // Lazily fetch the OpenRouter model catalogue the first time a DJ opens the
+ // form on the OpenRouter type. Best-effort: an empty list (or a failed fetch)
+ // simply falls back to the free-text model input. Fetched once per mount.
+ const wantsOpenrouterModels = form.open && form.connector_type === 'openrouter_apikey';
+ useEffect(() => {
+ if (!wantsOpenrouterModels || openrouterModelsLoaded) return;
+ setOpenrouterModelsLoaded(true);
+ api
+ .listOpenRouterModels()
+ .then((res) => setOpenrouterModels(res.models))
+ .catch(() => {
+ // Swallow — the dropdown gracefully degrades to free-text entry.
+ });
+ }, [wantsOpenrouterModels, openrouterModelsLoaded]);
+
+ const allowedTypes = useMemo(() => {
+ // Fail closed: when the policy can't be read, offer no providers rather than
+ // surfacing every type and letting the DJ pick one the admin disabled (the
+ // create call would 403). The server is the source of truth for the set.
+ if (!policy) return [];
+ return policy.allowed_connector_types as LlmConnectorType[];
+ }, [policy]);
+
+ // onChange factory for the plain string form fields — every text input/select
+ // updates exactly one FormState key with the raw value. connector_type stays
+ // inline because it needs a cast to LlmConnectorType.
+ const handleField =
+ (key: Exclude) =>
+ (e: React.ChangeEvent) =>
+ setForm((f) => ({ ...f, [key]: e.target.value }));
+
+ const handleOpenForm = () => {
+ if (allowedTypes.length === 0) {
+ setSubmitError('Connector creation is currently disabled by admin policy.');
+ setSubmitMessage('');
+ return;
+ }
+ setForm({ ...EMPTY_FORM, open: true, connector_type: allowedTypes[0] });
+ setSubmitMessage('');
+ setSubmitError('');
+ };
+
+ const handleCancel = () => {
+ setForm(EMPTY_FORM);
+ setSubmitError('');
+ };
+
+ const handleCreate = async (e: React.FormEvent) => {
+ e.preventDefault();
+ setSubmitting(true);
+ setSubmitMessage('');
+ setSubmitError('');
+ const isCompatible = form.connector_type === 'openai_compatible';
+ const isBedrock = form.connector_type === 'bedrock';
+ const isAzure = form.connector_type === 'azure_openai';
+ // API-key providers: everything that isn't openai_compatible or bedrock.
+ // Azure also carries an api_key (plus its azure_* fields).
+ const isApiKey = !isCompatible && !isBedrock;
+ const payload: LlmConnectorCreate = {
+ connector_type: form.connector_type,
+ display_name: form.display_name,
+ // Bedrock has no model_hint field (it uses aws_model_id); never post a
+ // stale hint left over from a prior connector-type selection.
+ model_hint: isBedrock ? null : form.model_hint || null,
+ api_key: isApiKey ? form.api_key : null,
+ base_url: isCompatible ? form.base_url : null,
+ bearer: isCompatible ? form.bearer || null : null,
+ aws_access_key_id: isBedrock ? form.aws_access_key_id : null,
+ aws_secret_access_key: isBedrock ? form.aws_secret_access_key : null,
+ aws_region: isBedrock ? form.aws_region : null,
+ aws_model_id: isBedrock ? form.aws_model_id : null,
+ azure_resource_name: isAzure ? form.azure_resource_name : null,
+ azure_deployment_name: isAzure ? form.azure_deployment_name : null,
+ azure_api_version: isAzure ? form.azure_api_version : null,
+ };
+ try {
+ const created = await api.createLlmConnector(payload);
+ setConnectors((prev) => [created, ...prev]);
+ setForm(EMPTY_FORM);
+ setSubmitMessage(`Created "${created.display_name}". Run "Test" to verify it works.`);
+ } catch (err) {
+ setSubmitError(
+ err instanceof Error ? err.message : 'Create failed (check your inputs)',
+ );
+ } finally {
+ setSubmitting(false);
+ }
+ };
+
+ const handleTest = async (id: number) => {
+ setTestStateById((s) => ({ ...s, [id]: 'Testing…' }));
+ try {
+ const result = await api.testLlmConnector(id);
+ setTestStateById((s) => ({
+ ...s,
+ [id]: result.ok ? 'OK' : `Failed: ${result.error_code ?? 'unknown'}`,
+ }));
+ // Refresh the row so updated status renders
+ const fresh = await api.listLlmConnectors();
+ setConnectors(fresh);
+ } catch (err) {
+ setTestStateById((s) => ({
+ ...s,
+ [id]: err instanceof Error ? err.message : 'Test failed',
+ }));
+ }
+ };
+
+ const handleStreamTest = async (id: number) => {
+ setStreamTextById((s) => ({ ...s, [id]: '' }));
+ setStreamingId(id);
+ try {
+ await api.streamConnectorTest(id, (chunk) => {
+ if (chunk.text_delta) {
+ setStreamTextById((s) => ({ ...s, [id]: (s[id] ?? '') + chunk.text_delta }));
+ }
+ });
+ } catch (err) {
+ setStreamTextById((s) => ({
+ ...s,
+ [id]: err instanceof Error ? `(stream test failed: ${err.message})` : '(stream test failed)',
+ }));
+ } finally {
+ setStreamingId(null);
+ }
+ };
+
+ const handleDelete = async (id: number) => {
+ if (!window.confirm('Delete this connector? This cannot be undone.')) return;
+ try {
+ await api.deleteLlmConnector(id);
+ setConnectors((prev) => prev.filter((c) => c.id !== id));
+ } catch (err) {
+ setError(err instanceof Error ? err.message : 'Delete failed');
+ }
+ };
+
+ // Set / unset the per-DJ explicit default (issue #336). Optimistic update on
+ // the full list keeps the radio state consistent (exactly one row is default
+ // at any time) without waiting for a refetch.
+ const handleSetDefault = async (id: number) => {
+ try {
+ const updated = await api.setLlmConnectorDefault(id);
+ setConnectors((prev) =>
+ prev.map((c) =>
+ c.id === updated.id
+ ? updated
+ : c.user_id === updated.user_id
+ ? { ...c, is_default: false }
+ : c,
+ ),
+ );
+ } catch (err) {
+ setError(err instanceof Error ? err.message : 'Failed to set default');
+ }
+ };
+
+ const handleUnsetDefault = async (id: number) => {
+ try {
+ const updated = await api.unsetLlmConnectorDefault(id);
+ setConnectors((prev) => prev.map((c) => (c.id === updated.id ? updated : c)));
+ } catch (err) {
+ setError(err instanceof Error ? err.message : 'Failed to clear default');
+ }
+ };
+
+ // Per-feature pin (issue #337). An empty select value clears the pin (use the
+ // account default); any connector id sets/replaces it. The endpoint returns
+ // the full updated list, so we store it verbatim.
+ const handleFeaturePrefChange = async (feature: LlmFeatureKey, value: string) => {
+ try {
+ const updated =
+ value === ''
+ ? await api.clearLlmFeaturePreference(feature)
+ : await api.setLlmFeaturePreference({ feature, connector_id: Number(value) });
+ setFeaturePrefs(updated);
+ setError('');
+ } catch (err) {
+ setError(err instanceof Error ? err.message : 'Failed to update feature default');
+ }
+ };
+
+ return (
+
+
+ AI / Model providers
+
+
+
+ Connect your own LLM provider so AI-assisted features (recommendations, etc.) bill to
+ your account. Credentials are encrypted at rest. Calls consume your account's API or
+ subscription quota directly.
+
+
+ {loading &&
Loading…
}
+ {error &&
{error}
}
+ {submitMessage && (
+
{submitMessage}
+ )}
+ {submitError && (
+
{submitError}
+ )}
+
+
+
Connected providers
+ {connectors.length === 0 && !loading && (
+
No connectors yet.
+ )}
+ {connectors.map((c) => {
+ const status = STATUS_LABELS[c.status] ?? { text: c.status, color: 'var(--text-secondary)' };
+ // Pin / unpin is only meaningful for active connectors — the gateway
+ // skips inactive defaults, so don't let the DJ pin a row that
+ // resolution would silently bypass.
+ const canPin = c.status === 'active';
+ const radioId = `connector-default-${c.id}`;
+ return (
+
+ {/* Radio for "Set as default" — exactly one connector per DJ may be pinned. */}
+
+ {
+ if (canPin) {
+ handleSetDefault(c.id);
+ }
+ }}
+ />
+ {c.is_default ? (
+ <>
+ Pinned as default ·{' '}
+
+ >
+ ) : (
+ Set as default
+ )}
+
+
+ Pin a specific provider to each AI feature. Unpinned features use your account
+ default (or most-recently-used) connector. Inactive connectors are skipped
+ automatically.
+
+ );
+}
+
+async function fetchPolicySoft(): Promise {
+ // Read the DJ-scoped policy endpoint. On any failure we return null and the
+ // UI fails *closed* (no providers offered) — see `allowedTypes`. This avoids
+ // showing a DJ a provider the admin disabled, only to have the create call
+ // reject it with a 403.
+ try {
+ return await api.getLlmPolicy();
+ } catch {
+ return null;
+ }
+}
+
+async function fetchFeaturePrefsSoft(): Promise {
+ // Read the DJ's per-feature pins. On any failure we return null and the
+ // "Per-feature defaults" section is simply hidden — it's an enhancement, not
+ // load-bearing, so a transient error must not break the whole page.
+ try {
+ return await api.listLlmFeaturePreferences();
+ } catch {
+ return null;
+ }
+}
diff --git a/dashboard/components/__tests__/AiProvidersSection.featurePrefs.test.tsx b/dashboard/components/__tests__/AiProvidersSection.featurePrefs.test.tsx
new file mode 100644
index 00000000..cbabb2d3
--- /dev/null
+++ b/dashboard/components/__tests__/AiProvidersSection.featurePrefs.test.tsx
@@ -0,0 +1,124 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { render, screen, waitFor, fireEvent } from '@testing-library/react';
+
+import AiProvidersSection from '../AiProvidersSection';
+import { api } from '@/lib/api';
+import type { LlmConnector } from '@/lib/api-types';
+
+const NOW = new Date().toISOString();
+
+function makeConnector(overrides: Partial = {}): LlmConnector {
+ return {
+ id: 1,
+ user_id: 42,
+ connector_type: 'openai_apikey',
+ display_name: 'My OpenAI',
+ status: 'active',
+ base_url_plain: null,
+ model_hint: null,
+ created_at: NOW,
+ updated_at: NOW,
+ last_used_at: null,
+ last_error: null,
+ is_default: false,
+ last_health_check_at: null,
+ last_health_check_status: null,
+ monthly_token_cap: null,
+ ...overrides,
+ };
+}
+
+describe('AiProvidersSection per-feature defaults', () => {
+ beforeEach(() => {
+ vi.restoreAllMocks();
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([makeConnector()]);
+ vi.spyOn(api, 'getLlmPolicy').mockResolvedValue({
+ llm_apikey_connectors_enabled: true,
+ llm_compatible_connector_enabled: true,
+ allowed_connector_types: ['openai_apikey'],
+ });
+ vi.spyOn(api, 'listLlmFeaturePreferences').mockResolvedValue({
+ preferences: [],
+ known_features: ['recommendation', 'set_builder'],
+ });
+ });
+
+ it('renders a picker per known feature and sets a pin', async () => {
+ const setSpy = vi.spyOn(api, 'setLlmFeaturePreference').mockResolvedValue({
+ preferences: [{ feature: 'recommendation', connector_id: 1 }],
+ known_features: ['recommendation', 'set_builder'],
+ });
+
+ render();
+
+ await waitFor(() =>
+ expect(screen.getByText('Per-feature defaults')).toBeInTheDocument(),
+ );
+
+ // One picker per known feature.
+ expect(screen.getByLabelText('Recommendations')).toBeInTheDocument();
+ expect(screen.getByLabelText('Set builder')).toBeInTheDocument();
+
+ const select = screen.getByLabelText('Recommendations') as HTMLSelectElement;
+ fireEvent.change(select, { target: { value: '1' } });
+
+ await waitFor(() =>
+ expect(setSpy).toHaveBeenCalledWith({
+ feature: 'recommendation',
+ connector_id: 1,
+ }),
+ );
+ });
+
+ it('clears a pin when "Use account default" is selected', async () => {
+ vi.spyOn(api, 'listLlmFeaturePreferences').mockResolvedValue({
+ preferences: [{ feature: 'recommendation', connector_id: 1 }],
+ known_features: ['recommendation', 'set_builder'],
+ });
+ const clearSpy = vi.spyOn(api, 'clearLlmFeaturePreference').mockResolvedValue({
+ preferences: [],
+ known_features: ['recommendation', 'set_builder'],
+ });
+
+ render();
+ await waitFor(() =>
+ expect(screen.getByText('Per-feature defaults')).toBeInTheDocument(),
+ );
+
+ const select = screen.getByLabelText('Recommendations') as HTMLSelectElement;
+ // The current pin should be reflected as the selected value.
+ expect(select.value).toBe('1');
+
+ fireEvent.change(select, { target: { value: '' } });
+
+ await waitFor(() => expect(clearSpy).toHaveBeenCalledWith('recommendation'));
+ });
+
+ it('hides the section when the preferences fetch fails (fail soft)', async () => {
+ vi.spyOn(api, 'listLlmFeaturePreferences').mockRejectedValue(new Error('boom'));
+
+ render();
+
+ // The connectors list still renders…
+ await waitFor(() => expect(screen.getByText('My OpenAI')).toBeInTheDocument());
+ // …but the per-feature section is absent.
+ expect(screen.queryByText('Per-feature defaults')).not.toBeInTheDocument();
+ });
+
+ it('only offers active connectors in the picker', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([
+ makeConnector({ id: 1, display_name: 'Active one', status: 'active' }),
+ makeConnector({ id: 2, display_name: 'Broken one', status: 'auth_invalid' }),
+ ]);
+
+ render();
+ await waitFor(() =>
+ expect(screen.getByText('Per-feature defaults')).toBeInTheDocument(),
+ );
+
+ const select = screen.getByLabelText('Recommendations') as HTMLSelectElement;
+ const optionLabels = Array.from(select.options).map((o) => o.textContent);
+ expect(optionLabels).toContain('Active one');
+ expect(optionLabels).not.toContain('Broken one');
+ });
+});
diff --git a/dashboard/components/__tests__/AiProvidersSection.test.tsx b/dashboard/components/__tests__/AiProvidersSection.test.tsx
new file mode 100644
index 00000000..749bd886
--- /dev/null
+++ b/dashboard/components/__tests__/AiProvidersSection.test.tsx
@@ -0,0 +1,471 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { render, screen, waitFor, fireEvent } from '@testing-library/react';
+
+import AiProvidersSection from '../AiProvidersSection';
+import { api } from '@/lib/api';
+import type { LlmConnector, LlmConnectorType, LlmDjPolicy } from '@/lib/api-types';
+
+const ALL_APIKEY_TYPES: LlmConnectorType[] = [
+ 'openai_apikey',
+ 'anthropic_apikey',
+ 'openrouter_apikey',
+ 'xai_apikey',
+ 'bedrock',
+ 'azure_openai',
+ 'gemini_apikey',
+];
+
+// Build a DJ policy payload. `allowed_connector_types` is what the server
+// computes from the two toggles; the section renders exactly this set.
+function makePolicy(
+ apikeyEnabled: boolean,
+ compatibleEnabled: boolean,
+): LlmDjPolicy {
+ const allowed: LlmConnectorType[] = [];
+ if (apikeyEnabled) allowed.push(...ALL_APIKEY_TYPES);
+ if (compatibleEnabled) allowed.push('openai_compatible');
+ return {
+ llm_apikey_connectors_enabled: apikeyEnabled,
+ llm_compatible_connector_enabled: compatibleEnabled,
+ allowed_connector_types: allowed,
+ };
+}
+
+const NOW = new Date().toISOString();
+
+function makeConnector(overrides: Partial = {}): LlmConnector {
+ return {
+ id: 1,
+ user_id: 42,
+ connector_type: 'openai_apikey',
+ display_name: 'My OpenAI',
+ status: 'active',
+ base_url_plain: null,
+ model_hint: 'gpt-5-mini',
+ created_at: NOW,
+ updated_at: NOW,
+ last_used_at: null,
+ last_error: null,
+ is_default: false,
+ last_health_check_at: null,
+ last_health_check_status: null,
+ monthly_token_cap: null,
+ ...overrides,
+ };
+}
+
+describe('AiProvidersSection', () => {
+ beforeEach(() => {
+ vi.restoreAllMocks();
+ });
+
+ it('renders the section heading', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]);
+ vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden'));
+
+ render();
+
+ expect(screen.getByText('AI / Model providers')).toBeInTheDocument();
+ });
+
+ it('lists existing connectors', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([
+ makeConnector({ display_name: 'My OpenAI' }),
+ makeConnector({
+ id: 2,
+ connector_type: 'anthropic_apikey',
+ display_name: 'My Claude',
+ model_hint: 'claude-haiku',
+ }),
+ ]);
+ vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden'));
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('My OpenAI')).toBeInTheDocument());
+ expect(screen.getByText('My Claude')).toBeInTheDocument();
+ });
+
+ it('respects admin policy when filtering allowed connector types', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]);
+ vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(false, true));
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument());
+ fireEvent.click(screen.getByText('+ Add provider'));
+
+ // Provider dropdown should only contain the openai_compatible option
+ const select = screen.getByLabelText('Provider') as HTMLSelectElement;
+ const optionValues = Array.from(select.options).map((o) => o.value);
+ expect(optionValues).toEqual(['openai_compatible']);
+ });
+
+ it('reads the DJ-scoped policy endpoint (not the admin one)', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]);
+ const adminPolicySpy = vi
+ .spyOn(api, 'getAdminLlmPolicy')
+ .mockRejectedValue(new Error('should not be called'));
+ const policySpy = vi
+ .spyOn(api, 'getLlmPolicy')
+ .mockResolvedValue(makePolicy(true, true));
+
+ render();
+
+ await waitFor(() => expect(policySpy).toHaveBeenCalled());
+ expect(adminPolicySpy).not.toHaveBeenCalled();
+ });
+
+ it('fails closed: hides all provider types when policy fetch fails', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]);
+ // Simulate the DJ policy endpoint being unavailable.
+ vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('unavailable'));
+
+ render();
+
+ // No "+ Add provider" button — the picker is hidden entirely.
+ await waitFor(() =>
+ expect(
+ screen.getByText('Connector creation is currently disabled by admin policy.'),
+ ).toBeInTheDocument(),
+ );
+ expect(screen.queryByText('+ Add provider')).not.toBeInTheDocument();
+ expect(screen.queryByLabelText('Provider')).not.toBeInTheDocument();
+ });
+
+ it('fails closed: only api-key types when compatible is disabled (no leak of all)', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]);
+ vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, false));
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument());
+ fireEvent.click(screen.getByText('+ Add provider'));
+
+ const select = screen.getByLabelText('Provider') as HTMLSelectElement;
+ const optionValues = Array.from(select.options).map((o) => o.value);
+ expect(optionValues).not.toContain('openai_compatible');
+ expect(optionValues).toContain('openai_apikey');
+ });
+
+ it('offers Azure OpenAI and reveals its config fields', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]);
+ vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, true));
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument());
+ fireEvent.click(screen.getByText('+ Add provider'));
+
+ const select = screen.getByLabelText('Provider') as HTMLSelectElement;
+ const optionValues = Array.from(select.options).map((o) => o.value);
+ expect(optionValues).toContain('azure_openai');
+
+ // Switching to Azure surfaces the resource/deployment/api-version inputs.
+ fireEvent.change(select, { target: { value: 'azure_openai' } });
+ expect(screen.getByLabelText('API key')).toBeInTheDocument();
+ expect(screen.getByLabelText('Resource name')).toBeInTheDocument();
+ expect(screen.getByLabelText('Deployment name')).toBeInTheDocument();
+ expect(screen.getByLabelText('API version')).toBeInTheDocument();
+ });
+
+ it('sends Azure config fields on create', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]);
+ vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, true));
+ const createSpy = vi
+ .spyOn(api, 'createLlmConnector')
+ .mockResolvedValue(makeConnector({ connector_type: 'azure_openai' }));
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument());
+ fireEvent.click(screen.getByText('+ Add provider'));
+
+ fireEvent.change(screen.getByLabelText('Provider'), {
+ target: { value: 'azure_openai' },
+ });
+ fireEvent.change(screen.getByLabelText('Display name'), {
+ target: { value: 'Venue Azure' },
+ });
+ fireEvent.change(screen.getByLabelText('API key'), {
+ target: { value: 'azure-secret' },
+ });
+ fireEvent.change(screen.getByLabelText('Resource name'), {
+ target: { value: 'venue-co' },
+ });
+ fireEvent.change(screen.getByLabelText('Deployment name'), {
+ target: { value: 'gpt4o-prod' },
+ });
+ fireEvent.change(screen.getByLabelText('API version'), {
+ target: { value: '2024-06-01' },
+ });
+
+ fireEvent.click(screen.getByRole('button', { name: 'Save' }));
+
+ await waitFor(() => expect(createSpy).toHaveBeenCalled());
+ expect(createSpy).toHaveBeenCalledWith(
+ expect.objectContaining({
+ connector_type: 'azure_openai',
+ api_key: 'azure-secret',
+ azure_resource_name: 'venue-co',
+ azure_deployment_name: 'gpt4o-prod',
+ azure_api_version: '2024-06-01',
+ }),
+ );
+ });
+
+ it('offers AWS Bedrock when api-key connectors are enabled', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]);
+ vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, false));
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument());
+ fireEvent.click(screen.getByText('+ Add provider'));
+
+ const select = screen.getByLabelText('Provider') as HTMLSelectElement;
+ const optionValues = Array.from(select.options).map((o) => o.value);
+ expect(optionValues).toContain('bedrock');
+ expect(optionValues).not.toContain('openai_compatible');
+
+ // Selecting Bedrock reveals the four AWS credential inputs.
+ fireEvent.change(select, { target: { value: 'bedrock' } });
+ expect(screen.getByLabelText('AWS access key ID')).toBeInTheDocument();
+ expect(screen.getByLabelText('AWS secret access key')).toBeInTheDocument();
+ expect(screen.getByLabelText('AWS region')).toBeInTheDocument();
+ expect(screen.getByLabelText('Bedrock model ID')).toBeInTheDocument();
+ });
+
+ it('runs Test and surfaces the result', async () => {
+ const row = makeConnector();
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([row]);
+ vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, true));
+ const testSpy = vi.spyOn(api, 'testLlmConnector').mockResolvedValue({
+ ok: true,
+ error_code: null,
+ message: null,
+ });
+ // The refresh after Test re-lists connectors
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([row]);
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('My OpenAI')).toBeInTheDocument());
+ fireEvent.click(screen.getByRole('button', { name: 'Test' }));
+ await waitFor(() => {
+ expect(testSpy).toHaveBeenCalledWith(1);
+ });
+ });
+
+ it('runs Stream test and renders the streamed text live', async () => {
+ const row = makeConnector();
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([row]);
+ vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, true));
+ const streamSpy = vi
+ .spyOn(api, 'streamConnectorTest')
+ .mockImplementation(async (_id, onChunk) => {
+ onChunk({ text_delta: 'Online' });
+ onChunk({ text_delta: ' and ready', done: false });
+ onChunk({ stop_reason: 'end_turn', done: true });
+ });
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('My OpenAI')).toBeInTheDocument());
+ fireEvent.click(screen.getByRole('button', { name: 'Stream test' }));
+ await waitFor(() => {
+ expect(streamSpy).toHaveBeenCalledWith(1, expect.any(Function));
+ });
+ await waitFor(() =>
+ expect(screen.getByText('Online and ready')).toBeInTheDocument(),
+ );
+ });
+
+ it('offers OpenRouter and fetches its model dropdown', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]);
+ vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, false));
+ const modelsSpy = vi.spyOn(api, 'listOpenRouterModels').mockResolvedValue({
+ models: [
+ { id: 'openai/gpt-4o-mini', name: 'GPT-4o mini' },
+ { id: 'anthropic/claude-3.5-sonnet', name: 'Claude 3.5 Sonnet' },
+ ],
+ });
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument());
+ fireEvent.click(screen.getByText('+ Add provider'));
+
+ const select = screen.getByLabelText('Provider') as HTMLSelectElement;
+ const optionValues = Array.from(select.options).map((o) => o.value);
+ expect(optionValues).toContain('openrouter_apikey');
+
+ // Switch to OpenRouter — the model catalogue should be fetched and rendered.
+ fireEvent.change(select, { target: { value: 'openrouter_apikey' } });
+ await waitFor(() => expect(modelsSpy).toHaveBeenCalled());
+
+ // The dropdown options appear once the (async) fetch resolves.
+ await screen.findByRole('option', { name: /GPT-4o mini/ });
+ const modelSelect = screen.getByLabelText('Model (optional)') as HTMLSelectElement;
+ const modelValues = Array.from(modelSelect.options).map((o) => o.value);
+ expect(modelValues).toContain('openai/gpt-4o-mini');
+ expect(modelValues).toContain('anthropic/claude-3.5-sonnet');
+ });
+
+ it('creates an OpenRouter connector with the selected model', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]);
+ vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, false));
+ vi.spyOn(api, 'listOpenRouterModels').mockResolvedValue({
+ models: [{ id: 'openai/gpt-4o-mini', name: 'GPT-4o mini' }],
+ });
+ const createSpy = vi.spyOn(api, 'createLlmConnector').mockResolvedValue(
+ makeConnector({
+ connector_type: 'openrouter_apikey',
+ display_name: 'My OpenRouter',
+ model_hint: 'openai/gpt-4o-mini',
+ }),
+ );
+
+ render();
+ await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument());
+ fireEvent.click(screen.getByText('+ Add provider'));
+
+ fireEvent.change(screen.getByLabelText('Provider'), {
+ target: { value: 'openrouter_apikey' },
+ });
+ fireEvent.change(screen.getByLabelText('Display name'), {
+ target: { value: 'My OpenRouter' },
+ });
+ fireEvent.change(screen.getByLabelText('API key'), {
+ target: { value: 'sk-or-v1-1234567890abcdef1234567890abcdef' },
+ });
+
+ await screen.findByRole('option', { name: /GPT-4o mini/ });
+ const modelSelect = screen.getByLabelText('Model (optional)') as HTMLSelectElement;
+ fireEvent.change(modelSelect, { target: { value: 'openai/gpt-4o-mini' } });
+
+ fireEvent.click(screen.getByRole('button', { name: 'Save' }));
+
+ await waitFor(() => expect(createSpy).toHaveBeenCalled());
+ expect(createSpy).toHaveBeenCalledWith(
+ expect.objectContaining({
+ connector_type: 'openrouter_apikey',
+ display_name: 'My OpenRouter',
+ api_key: 'sk-or-v1-1234567890abcdef1234567890abcdef',
+ base_url: null,
+ bearer: null,
+ model_hint: 'openai/gpt-4o-mini',
+ }),
+ );
+ });
+
+ // ---------- per-DJ default (issue #336) ----------
+
+ it('shows the Default badge on the pinned connector', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([
+ makeConnector({ id: 1, display_name: 'Pinned', is_default: true }),
+ makeConnector({ id: 2, display_name: 'Other', is_default: false }),
+ ]);
+ vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden'));
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('Pinned')).toBeInTheDocument());
+ // The badge is rendered next to the display name.
+ expect(screen.getByText('Default')).toBeInTheDocument();
+ });
+
+ it('clicking the radio on an unpinned connector calls setDefault', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([
+ makeConnector({ id: 1, display_name: 'A', is_default: true }),
+ makeConnector({ id: 2, display_name: 'B', is_default: false }),
+ ]);
+ vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden'));
+ const setSpy = vi
+ .spyOn(api, 'setLlmConnectorDefault')
+ .mockResolvedValue(
+ makeConnector({ id: 2, display_name: 'B', is_default: true }),
+ );
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('B')).toBeInTheDocument());
+ // The radio for connector B is unchecked; click to pin it.
+ const radioB = screen.getByLabelText('Set as default');
+ fireEvent.click(radioB);
+
+ await waitFor(() => expect(setSpy).toHaveBeenCalledWith(2));
+ });
+
+ it('clicking Unpin on the pinned connector calls unsetDefault', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([
+ makeConnector({ id: 1, display_name: 'A', is_default: true }),
+ ]);
+ vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden'));
+ const unsetSpy = vi
+ .spyOn(api, 'unsetLlmConnectorDefault')
+ .mockResolvedValue(makeConnector({ id: 1, display_name: 'A', is_default: false }));
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('A')).toBeInTheDocument());
+ fireEvent.click(screen.getByRole('button', { name: 'Unpin' }));
+
+ await waitFor(() => expect(unsetSpy).toHaveBeenCalledWith(1));
+ });
+
+ it('disables the radio on inactive connectors', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([
+ makeConnector({
+ id: 1,
+ display_name: 'Broken',
+ status: 'auth_invalid',
+ is_default: false,
+ }),
+ ]);
+ vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden'));
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('Broken')).toBeInTheDocument());
+ const radio = screen.getByLabelText('Set as default') as HTMLInputElement;
+ expect(radio).toBeDisabled();
+ });
+
+ it('optimistically clears the previous default when pinning a new one', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([
+ makeConnector({ id: 1, user_id: 42, display_name: 'A', is_default: true }),
+ makeConnector({ id: 2, user_id: 42, display_name: 'B', is_default: false }),
+ ]);
+ vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden'));
+ vi.spyOn(api, 'setLlmConnectorDefault').mockResolvedValue(
+ makeConnector({ id: 2, user_id: 42, display_name: 'B', is_default: true }),
+ );
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('B')).toBeInTheDocument());
+ fireEvent.click(screen.getByLabelText('Set as default'));
+
+ // After the optimistic update, the Default badge should sit next to B, not A.
+ await waitFor(() => {
+ const badge = screen.getByText('Default');
+ // Badge is right beside the display name — walk up to the card.
+ const card = badge.closest('.card');
+ expect(card?.textContent).toContain('B');
+ });
+ });
+
+ it('deletes after confirmation', async () => {
+ vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([makeConnector()]);
+ vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('nope'));
+ const delSpy = vi.spyOn(api, 'deleteLlmConnector').mockResolvedValue();
+ vi.spyOn(window, 'confirm').mockReturnValue(true);
+
+ render();
+
+ await waitFor(() => expect(screen.getByText('My OpenAI')).toBeInTheDocument());
+ fireEvent.click(screen.getByRole('button', { name: 'Delete' }));
+ await waitFor(() => expect(delSpy).toHaveBeenCalledWith(1));
+ });
+});
diff --git a/dashboard/lib/__tests__/api.test.ts b/dashboard/lib/__tests__/api.test.ts
index b7126f09..655bbe1c 100644
--- a/dashboard/lib/__tests__/api.test.ts
+++ b/dashboard/lib/__tests__/api.test.ts
@@ -520,6 +520,127 @@ describe('ApiClient', () => {
});
});
+ describe('LLM Gateway API', () => {
+ beforeEach(() => {
+ api.setToken('test-token');
+ });
+
+ it('lists per-DJ connectors', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => [
+ {
+ id: 1,
+ user_id: 42,
+ connector_type: 'openai_apikey',
+ display_name: 'My OpenAI',
+ status: 'active',
+ base_url_plain: null,
+ model_hint: 'gpt-5-mini',
+ created_at: '2026-01-01T00:00:00Z',
+ updated_at: '2026-01-01T00:00:00Z',
+ last_used_at: null,
+ last_error: null,
+ },
+ ],
+ });
+
+ const result = await api.listLlmConnectors();
+ expect(result).toHaveLength(1);
+ expect(result[0].connector_type).toBe('openai_apikey');
+ });
+
+ it('creates a connector via POST', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({
+ id: 2,
+ user_id: 42,
+ connector_type: 'openai_compatible',
+ display_name: 'Hermes',
+ status: 'active',
+ base_url_plain: 'http://127.0.0.1:11434/v1',
+ model_hint: null,
+ created_at: '2026-01-01T00:00:00Z',
+ updated_at: '2026-01-01T00:00:00Z',
+ last_used_at: null,
+ last_error: null,
+ }),
+ });
+
+ const result = await api.createLlmConnector({
+ connector_type: 'openai_compatible',
+ display_name: 'Hermes',
+ base_url: 'http://127.0.0.1:11434/v1',
+ bearer: null,
+ api_key: null,
+ model_hint: null,
+ });
+ expect(result.id).toBe(2);
+
+ const [, options] = mockFetch.mock.calls[0];
+ expect(options.method).toBe('POST');
+ });
+
+ it('updates admin LLM policy via PATCH', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({
+ llm_apikey_connectors_enabled: false,
+ llm_compatible_connector_enabled: true,
+ llm_default_connector_id: null,
+ }),
+ });
+ const result = await api.updateAdminLlmPolicy({
+ llm_apikey_connectors_enabled: false,
+ llm_compatible_connector_enabled: null,
+ llm_default_connector_id: null,
+ clear_default: true,
+ });
+ expect(result.llm_apikey_connectors_enabled).toBe(false);
+
+ const [, options] = mockFetch.mock.calls[0];
+ expect(options.method).toBe('PATCH');
+ });
+
+ it('fetches admin usage with days param', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({ days: 30, rows: [] }),
+ });
+
+ await api.getAdminLlmUsage(30);
+ const [url] = mockFetch.mock.calls[0];
+ expect(url).toContain('/api/admin/llm/usage?days=30');
+ });
+
+ it('sets a connector monthly cap via PATCH', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({ id: 7, monthly_token_cap: 5000, current_month_tokens: 200 }),
+ });
+
+ const result = await api.setAdminLlmConnectorCap(7, 5000);
+ expect(result.monthly_token_cap).toBe(5000);
+
+ const [url, options] = mockFetch.mock.calls[0];
+ expect(url).toContain('/api/admin/llm/connectors/7/cap');
+ expect(options.method).toBe('PATCH');
+ expect(JSON.parse(options.body)).toEqual({ monthly_token_cap: 5000 });
+ });
+
+ it('clears a connector cap by passing null', async () => {
+ mockFetch.mockResolvedValueOnce({
+ ok: true,
+ json: async () => ({ id: 7, monthly_token_cap: null, current_month_tokens: 0 }),
+ });
+
+ await api.setAdminLlmConnectorCap(7, null);
+ const [, options] = mockFetch.mock.calls[0];
+ expect(JSON.parse(options.body)).toEqual({ monthly_token_cap: null });
+ });
+ });
+
describe('Activity Log API', () => {
beforeEach(() => {
api.setToken('test-token');
@@ -2293,6 +2414,107 @@ describe('ApiClient', () => {
});
});
+ describe('streamConnectorTest', () => {
+ it('parses SSE data frames and invokes onChunk per frame', async () => {
+ const sse =
+ 'data: {"text_delta":"Hi","done":false}\n\n' +
+ 'data: {"text_delta":" there","done":false}\n\n' +
+ 'data: {"text_delta":"","stop_reason":"end_turn","done":true}\n\n';
+ const encoder = new TextEncoder();
+ const stream = new ReadableStream({
+ start(controller) {
+ controller.enqueue(encoder.encode(sse));
+ controller.close();
+ },
+ });
+ const fetchMock = vi.spyOn(global, 'fetch').mockResolvedValueOnce(
+ new Response(stream, {
+ status: 200,
+ headers: { 'Content-Type': 'text/event-stream' },
+ }),
+ );
+
+ api.setToken('jwt-token');
+ const chunks: Array<{ text_delta?: string; done?: boolean }> = [];
+ await api.streamConnectorTest(7, (c) => chunks.push(c));
+
+ expect(chunks.map((c) => c.text_delta).join('')).toBe('Hi there');
+ expect(chunks.at(-1)?.done).toBe(true);
+
+ const init = fetchMock.mock.calls[0][1] as RequestInit;
+ const headers = new Headers(init.headers);
+ expect(headers.get('Authorization')).toBe('Bearer jwt-token');
+ expect(init.method).toBe('POST');
+ });
+
+ it('parses CRLF-delimited SSE frames, including event: error (#354)', async () => {
+ // A spec-compliant server or proxy may frame SSE with CRLF (\r\n\r\n)
+ // rather than LF. The parser must split frames and lines on either, or it
+ // silently drops every frame — including the typed `event: error`.
+ const sse =
+ 'data: {"text_delta":"Hi","done":false}\r\n\r\n' +
+ 'event: error\r\ndata: {"code":"ProviderUnavailable"}\r\n\r\n';
+ const encoder = new TextEncoder();
+ const stream = new ReadableStream({
+ start(controller) {
+ controller.enqueue(encoder.encode(sse));
+ controller.close();
+ },
+ });
+ vi.spyOn(global, 'fetch').mockResolvedValueOnce(
+ new Response(stream, {
+ status: 200,
+ headers: { 'Content-Type': 'text/event-stream' },
+ }),
+ );
+
+ api.setToken('jwt-token');
+ const chunks: Array<{ text_delta?: string }> = [];
+ await expect(
+ api.streamConnectorTest(7, (c) => chunks.push(c)),
+ ).rejects.toThrowError(/ProviderUnavailable/);
+ // The CRLF-framed data frame before the error was still parsed.
+ expect(chunks.map((c) => c.text_delta).join('')).toBe('Hi');
+ });
+
+ it('throws ApiError on non-OK response', async () => {
+ vi.spyOn(global, 'fetch').mockResolvedValueOnce(
+ new Response('nope', { status: 500 }),
+ );
+ api.setToken('jwt-token');
+ await expect(api.streamConnectorTest(7, () => {})).rejects.toBeInstanceOf(ApiError);
+ });
+
+ it('surfaces an SSE event: error frame as a thrown ApiError (#379)', async () => {
+ // The backend emits `event: error` + a sanitised `{code}` data line for
+ // typed gateway failures; the consumer must reject, not swallow it.
+ const sse =
+ 'data: {"text_delta":"partial","done":false}\n\n' +
+ 'event: error\ndata: {"code":"ProviderUnavailable"}\n\n';
+ const encoder = new TextEncoder();
+ const stream = new ReadableStream({
+ start(controller) {
+ controller.enqueue(encoder.encode(sse));
+ controller.close();
+ },
+ });
+ vi.spyOn(global, 'fetch').mockResolvedValueOnce(
+ new Response(stream, {
+ status: 200,
+ headers: { 'Content-Type': 'text/event-stream' },
+ }),
+ );
+
+ api.setToken('jwt-token');
+ const chunks: Array<{ text_delta?: string }> = [];
+ await expect(
+ api.streamConnectorTest(7, (c) => chunks.push(c)),
+ ).rejects.toThrowError(/ProviderUnavailable/);
+ // The leading valid chunk was still delivered before the error surfaced.
+ expect(chunks.map((c) => c.text_delta).join('')).toBe('partial');
+ });
+ });
+
describe('frictionless join api', () => {
it('getJoinConfig hits the public collect endpoint', async () => {
mockFetch.mockResolvedValueOnce(
diff --git a/dashboard/lib/api-types.generated.ts b/dashboard/lib/api-types.generated.ts
index e6af9f56..72bc6c28 100644
--- a/dashboard/lib/api-types.generated.ts
+++ b/dashboard/lib/api-types.generated.ts
@@ -169,6 +169,149 @@ export interface paths {
patch?: never;
trace?: never;
};
+ "/api/admin/llm/audit": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /**
+ * List Audit Events
+ * @description Browse the LLM audit trail (admin-only).
+ *
+ * Read-only view over ``llm_audit_event`` with optional filters and
+ * pagination. The target connector's display name is joined in — credential
+ * material is never read or returned.
+ */
+ get: operations["list_audit_events_api_admin_llm_audit_get"];
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/admin/llm/audit.csv": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /**
+ * Export Audit Events Csv
+ * @description Export the (filtered) audit trail as CSV (admin-only).
+ *
+ * Honors the same filters as ``GET /audit``. Capped at
+ * ``_AUDIT_CSV_ROW_CAP`` rows to avoid unbounded streaming. Columns:
+ * timestamp, actor, event_type, target_connector, notes. Never includes
+ * credential material.
+ */
+ get: operations["export_audit_events_csv_api_admin_llm_audit_csv_get"];
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/admin/llm/connectors": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /** List Connectors Admin */
+ get: operations["list_connectors_admin_api_admin_llm_connectors_get"];
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/admin/llm/connectors/{connector_id}/cap": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ get?: never;
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ /**
+ * Set Connector Cap Admin
+ * @description Set or clear a connector's monthly token cap (admin-only, issue #339).
+ *
+ * ``monthly_token_cap = null`` clears the cap (unlimited). The change is
+ * pre-flight only: an in-flight gateway call already past its cap check is
+ * unaffected. Pydantic enforces the non-negative bound (``ge=0``); the
+ * service layer re-validates defensively.
+ */
+ patch: operations["set_connector_cap_admin_api_admin_llm_connectors__connector_id__cap_patch"];
+ trace?: never;
+ };
+ "/api/admin/llm/connectors/{connector_id}/revoke": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ get?: never;
+ put?: never;
+ /** Revoke Connector Admin */
+ post: operations["revoke_connector_admin_api_admin_llm_connectors__connector_id__revoke_post"];
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/admin/llm/policy": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /** Get Policy */
+ get: operations["get_policy_api_admin_llm_policy_get"];
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ /** Patch Policy */
+ patch: operations["patch_policy_api_admin_llm_policy_patch"];
+ trace?: never;
+ };
+ "/api/admin/llm/usage": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /** Get Usage */
+ get: operations["get_usage_api_admin_llm_usage_get"];
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
"/api/admin/settings": {
parameters: {
query?: never;
@@ -1299,24 +1442,25 @@ export interface paths {
patch: operations["assign_kiosk_api_kiosk__kiosk_id__assign_patch"];
trace?: never;
};
- "/api/public/collect/{code}": {
+ "/api/llm/connectors": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
- /** Preview */
- get: operations["preview_api_public_collect__code__get"];
+ /** List Connectors */
+ get: operations["list_connectors_api_llm_connectors_get"];
put?: never;
- post?: never;
+ /** Create Connector Endpoint */
+ post: operations["create_connector_endpoint_api_llm_connectors_post"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
- "/api/public/collect/{code}/enrich-preview": {
+ "/api/llm/connectors/{connector_id}": {
parameters: {
query?: never;
header?: never;
@@ -1325,18 +1469,16 @@ export interface paths {
};
get?: never;
put?: never;
- /**
- * Enrich Preview
- * @description Lightweight Beatport BPM/key lookup for search-time vibes — no DB writes.
- */
- post: operations["enrich_preview_api_public_collect__code__enrich_preview_post"];
- delete?: never;
+ post?: never;
+ /** Delete Connector Endpoint */
+ delete: operations["delete_connector_endpoint_api_llm_connectors__connector_id__delete"];
options?: never;
head?: never;
- patch?: never;
+ /** Update Connector Metadata */
+ patch: operations["update_connector_metadata_api_llm_connectors__connector_id__patch"];
trace?: never;
};
- "/api/public/collect/{code}/guest/ensure-name": {
+ "/api/llm/connectors/{connector_id}/credentials": {
parameters: {
query?: never;
header?: never;
@@ -1344,148 +1486,196 @@ export interface paths {
cookie?: never;
};
get?: never;
- put?: never;
- /**
- * Ensure Name
- * @description Frictionless-join name management. Auto-generates a nickname when none is
- * set, or applies a manual rename. Gated on event.frictionless_join so it can
- * never bypass email verification on a hardened (non-frictionless) event.
- *
- * Not anonymous: requires the `wrzdj_human` HMAC-signed verified-human cookie
- * (set via Turnstile) through `require_verified_human_soft`. Calls without a
- * resolvable verified-human guest are rejected with 403
- * `human_verification_required`.
- */
- post: operations["ensure_name_api_public_collect__code__guest_ensure_name_post"];
+ /** Rotate Connector Credentials */
+ put: operations["rotate_connector_credentials_api_llm_connectors__connector_id__credentials_put"];
+ post?: never;
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
- "/api/public/collect/{code}/join-config": {
+ "/api/llm/connectors/{connector_id}/default": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
+ get?: never;
+ put?: never;
/**
- * Join Config
- * @description Public, unauthenticated: lets the join page decide its gate mode on load.
+ * Set Connector As Default
+ * @description Pin this connector as the DJ's explicit default (issue #336).
+ *
+ * Atomically clears any other defaults the DJ owns before flipping this row,
+ * so the partial unique index never sees two True rows for the same user.
+ *
+ * Setting a disabled / auth_invalid connector as default is rejected with 400
+ * so DJs don't silently break their own routing — a default that the gateway
+ * would skip anyway is a footgun.
*/
- get: operations["join_config_api_public_collect__code__join_config_get"];
- put?: never;
- post?: never;
- delete?: never;
+ post: operations["set_connector_as_default_api_llm_connectors__connector_id__default_post"];
+ /**
+ * Unset Connector As Default
+ * @description Clear the explicit default — gateway resolution falls back to MRU.
+ */
+ delete: operations["unset_connector_as_default_api_llm_connectors__connector_id__default_delete"];
options?: never;
head?: never;
patch?: never;
trace?: never;
};
- "/api/public/collect/{code}/leaderboard": {
+ "/api/llm/connectors/{connector_id}/stream-test": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
- /** Leaderboard */
- get: operations["leaderboard_api_public_collect__code__leaderboard_get"];
+ get?: never;
put?: never;
- post?: never;
+ /**
+ * Stream Test Connector
+ * @description Stream a short sentence through the connector as ``text/event-stream``.
+ *
+ * Validates ownership up front (404 for connectors the DJ doesn't own — never
+ * leaks existence). Each SSE ``data:`` frame is a JSON ``ChatResponseChunk``.
+ * On a typed gateway error an ``event: error`` frame is emitted carrying only a
+ * sanitised code (never the upstream payload), then the stream ends. Client
+ * disconnect cancels the upstream provider request — the gateway generator's
+ * ``finally`` writes the counts-only call log and closes the adapter.
+ *
+ * Unlike the public guest SSE stream (``api/sse.py``), this endpoint is
+ * authenticated, rate-limited (10/min), and strictly bounded (max 64 output
+ * tokens), so it holds the request-scoped DB session for the brief stream
+ * lifetime rather than opening a detached ``SessionLocal`` — the pool-pinning
+ * concern that drove ``api/sse.py``'s pattern applies to unauthenticated,
+ * indefinitely-open guest connections, not a short admin health probe.
+ */
+ post: operations["stream_test_connector_api_llm_connectors__connector_id__stream_test_post"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
- "/api/public/collect/{code}/live-join-code": {
+ "/api/llm/connectors/{connector_id}/test": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
+ get?: never;
+ put?: never;
/**
- * Get Live Join Code
- * @description Return the live join_code for an event that has entered the live phase.
+ * Test Connector
+ * @description Run a health check and return a sanitised result.
*
- * Requires a verified human cookie (not email verification) so the join_code
- * is never leaked to unverified bots scraping /collect during the
- * collection-to-live transition. The join_code is otherwise revealed only
- * via the QR code at the event venue.
+ * Behaviour identical to the background monitor (issue #340), so the
+ * ``last_health_check_at`` / ``last_health_check_status`` columns and audit
+ * rows are written the same way on every invocation regardless of trigger
+ * source. See ``services/llm/health_check.py`` for the shared helper.
*/
- get: operations["get_live_join_code_api_public_collect__code__live_join_code_get"];
- put?: never;
- post?: never;
+ post: operations["test_connector_api_llm_connectors__connector_id__test_post"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
- "/api/public/collect/{code}/profile": {
+ "/api/llm/feature-preferences": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
- /** Get Profile */
- get: operations["get_profile_api_public_collect__code__profile_get"];
+ /**
+ * List Feature Preferences
+ * @description List the DJ's per-feature connector pins (issue #337).
+ */
+ get: operations["list_feature_preferences_api_llm_feature_preferences_get"];
put?: never;
- /** Set Profile */
- post: operations["set_profile_api_public_collect__code__profile_post"];
+ /**
+ * Set Feature Preference Endpoint
+ * @description Pin (or re-pin) a connector to a feature for the current DJ.
+ *
+ * Validates connector ownership server-side (404 for IDs the DJ doesn't own,
+ * so another DJ's connector existence is never leaked) and rejects pinning a
+ * non-active connector (400) — the gateway would skip it anyway, so silently
+ * accepting it is a footgun.
+ */
+ post: operations["set_feature_preference_endpoint_api_llm_feature_preferences_post"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
- "/api/public/collect/{code}/profile/me": {
+ "/api/llm/feature-preferences/{feature}": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
- /** My Picks */
- get: operations["my_picks_api_public_collect__code__profile_me_get"];
+ get?: never;
put?: never;
post?: never;
- delete?: never;
+ /**
+ * Clear Feature Preference Endpoint
+ * @description Clear the DJ's pin for ``feature`` (no-op if unset). Returns the new list.
+ */
+ delete: operations["clear_feature_preference_endpoint_api_llm_feature_preferences__feature__delete"];
options?: never;
head?: never;
patch?: never;
trace?: never;
};
- "/api/public/collect/{code}/requests": {
+ "/api/llm/openrouter/models": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
- get?: never;
+ /**
+ * List Openrouter Models
+ * @description Return the OpenRouter model catalogue for the model-hint dropdown.
+ *
+ * Served from a process-wide TTL cache (refreshed hourly). The OpenRouter
+ * ``/models`` endpoint is public, so no connector credentials are required.
+ * Returns an empty list if the catalogue is unavailable — the frontend then
+ * falls back to a free-text model input.
+ */
+ get: operations["list_openrouter_models_api_llm_openrouter_models_get"];
put?: never;
- /** Submit */
- post: operations["submit_api_public_collect__code__requests_post"];
+ post?: never;
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
- "/api/public/collect/{code}/requests/{request_id}/preview": {
+ "/api/llm/policy": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
- /** Request Preview */
- get: operations["request_preview_api_public_collect__code__requests__request_id__preview_get"];
+ /**
+ * Get Dj Policy
+ * @description DJ-readable connector policy (non-sensitive subset).
+ *
+ * The settings/ai page consumes this to fail *closed* — hiding connector
+ * types the admin has disabled rather than showing every provider and only
+ * discovering the block when the create call returns 403. Admin-only fields
+ * (e.g. ``llm_default_connector_id``) are intentionally excluded.
+ */
+ get: operations["get_dj_policy_api_llm_policy_get"];
put?: never;
post?: never;
delete?: never;
@@ -1494,71 +1684,71 @@ export interface paths {
patch?: never;
trace?: never;
};
- "/api/public/collect/{code}/vote": {
+ "/api/public/collect/{code}": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
- get?: never;
+ /** Preview */
+ get: operations["preview_api_public_collect__code__get"];
put?: never;
- /** Vote */
- post: operations["vote_api_public_collect__code__vote_post"];
+ post?: never;
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
- "/api/public/e/{code}/bridge-status": {
+ "/api/public/collect/{code}/enrich-preview": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
+ get?: never;
+ put?: never;
/**
- * Get Public Bridge Status
- * @description Get bridge connection status for public display.
- *
- * Independent of track data — returns bridge connectivity even when
- * no track is currently playing. Resolves by join_code: serves guest-facing
- * kiosk display + overlay pages.
+ * Enrich Preview
+ * @description Lightweight Beatport BPM/key lookup for search-time vibes — no DB writes.
*/
- get: operations["get_public_bridge_status_api_public_e__code__bridge_status_get"];
- put?: never;
- post?: never;
+ post: operations["enrich_preview_api_public_collect__code__enrich_preview_post"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
- "/api/public/e/{code}/history": {
+ "/api/public/collect/{code}/guest/ensure-name": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
+ get?: never;
+ put?: never;
/**
- * Get Public History
- * @description Get play history for public display.
+ * Ensure Name
+ * @description Frictionless-join name management. Auto-generates a nickname when none is
+ * set, or applies a manual rename. Gated on event.frictionless_join so it can
+ * never bypass email verification on a hardened (non-frictionless) event.
*
- * Returns the list of tracks played during the event, newest first.
- * Resolves by join_code: serves guest-facing kiosk display.
+ * Not anonymous: requires the `wrzdj_human` HMAC-signed verified-human cookie
+ * (set via Turnstile) through `require_verified_human_soft`. Calls without a
+ * resolvable verified-human guest are rejected with 403
+ * `human_verification_required`.
*/
- get: operations["get_public_history_api_public_e__code__history_get"];
- put?: never;
- post?: never;
+ post: operations["ensure_name_api_public_collect__code__guest_ensure_name_post"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
- "/api/public/e/{code}/nowplaying": {
+ "/api/public/collect/{code}/join-config": {
parameters: {
query?: never;
header?: never;
@@ -1566,15 +1756,10 @@ export interface paths {
cookie?: never;
};
/**
- * Get Public Now Playing
- * @description Get current now-playing track for public display.
- *
- * Returns the track currently playing from StageLinQ, or None if nothing playing.
- *
- * Resolves by join_code: this endpoint serves the kiosk display + OBS overlay
- * pages, which route by join_code per the post-PR-#324 public/guest URL contract.
+ * Join Config
+ * @description Public, unauthenticated: lets the join page decide its gate mode on load.
*/
- get: operations["get_public_now_playing_api_public_e__code__nowplaying_get"];
+ get: operations["join_config_api_public_collect__code__join_config_get"];
put?: never;
post?: never;
delete?: never;
@@ -1583,7 +1768,207 @@ export interface paths {
patch?: never;
trace?: never;
};
- "/api/public/events/{code}": {
+ "/api/public/collect/{code}/leaderboard": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /** Leaderboard */
+ get: operations["leaderboard_api_public_collect__code__leaderboard_get"];
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/public/collect/{code}/live-join-code": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /**
+ * Get Live Join Code
+ * @description Return the live join_code for an event that has entered the live phase.
+ *
+ * Requires a verified human cookie (not email verification) so the join_code
+ * is never leaked to unverified bots scraping /collect during the
+ * collection-to-live transition. The join_code is otherwise revealed only
+ * via the QR code at the event venue.
+ */
+ get: operations["get_live_join_code_api_public_collect__code__live_join_code_get"];
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/public/collect/{code}/profile": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /** Get Profile */
+ get: operations["get_profile_api_public_collect__code__profile_get"];
+ put?: never;
+ /** Set Profile */
+ post: operations["set_profile_api_public_collect__code__profile_post"];
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/public/collect/{code}/profile/me": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /** My Picks */
+ get: operations["my_picks_api_public_collect__code__profile_me_get"];
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/public/collect/{code}/requests": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ get?: never;
+ put?: never;
+ /** Submit */
+ post: operations["submit_api_public_collect__code__requests_post"];
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/public/collect/{code}/requests/{request_id}/preview": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /** Request Preview */
+ get: operations["request_preview_api_public_collect__code__requests__request_id__preview_get"];
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/public/collect/{code}/vote": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ get?: never;
+ put?: never;
+ /** Vote */
+ post: operations["vote_api_public_collect__code__vote_post"];
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/public/e/{code}/bridge-status": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /**
+ * Get Public Bridge Status
+ * @description Get bridge connection status for public display.
+ *
+ * Independent of track data — returns bridge connectivity even when
+ * no track is currently playing. Resolves by join_code: serves guest-facing
+ * kiosk display + overlay pages.
+ */
+ get: operations["get_public_bridge_status_api_public_e__code__bridge_status_get"];
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/public/e/{code}/history": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /**
+ * Get Public History
+ * @description Get play history for public display.
+ *
+ * Returns the list of tracks played during the event, newest first.
+ * Resolves by join_code: serves guest-facing kiosk display.
+ */
+ get: operations["get_public_history_api_public_e__code__history_get"];
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/public/e/{code}/nowplaying": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /**
+ * Get Public Now Playing
+ * @description Get current now-playing track for public display.
+ *
+ * Returns the track currently playing from StageLinQ, or None if nothing playing.
+ *
+ * Resolves by join_code: this endpoint serves the kiosk display + OBS overlay
+ * pages, which route by join_code per the post-PR-#324 public/guest URL contract.
+ */
+ get: operations["get_public_now_playing_api_public_e__code__nowplaying_get"];
+ put?: never;
+ post?: never;
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/public/events/{code}": {
parameters: {
query?: never;
header?: never;
@@ -1701,6 +2086,14 @@ export interface paths {
* unauthenticated DoS (unlimited long-lived connections exhausting FDs)
* and passive eavesdropping via 6-char event-code brute force.
*
+ * POOL SAFETY (issue #356): the one-shot existence/auth check runs inside a
+ * short-lived ``with SessionLocal()`` block whose pooled connection is
+ * returned BEFORE the EventSourceResponse is returned. An EventSource
+ * connection can stay open indefinitely, so we must NOT hold a
+ * request-scoped ``get_db`` session across the stream lifetime — doing so
+ * pinned one pooled connection per open stream and exhausted the QueuePool
+ * (size 5 + overflow 10 = 15 connections) under modest guest load.
+ *
* Event types:
* - request_created: New request submitted
* - request_status_changed: Request status update
@@ -2019,6 +2412,58 @@ export interface paths {
patch?: never;
trace?: never;
};
+ "/api/setbuilder/sets": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /**
+ * List Sets
+ * @description List the current DJ's sets, newest first.
+ */
+ get: operations["list_sets_api_setbuilder_sets_get"];
+ put?: never;
+ /**
+ * Create Set
+ * @description Create a new empty set owned by the current DJ.
+ */
+ post: operations["create_set_api_setbuilder_sets_post"];
+ delete?: never;
+ options?: never;
+ head?: never;
+ patch?: never;
+ trace?: never;
+ };
+ "/api/setbuilder/sets/{set_id}": {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ /**
+ * Get Set
+ * @description Get one of the current DJ's sets, or 404.
+ */
+ get: operations["get_set_api_setbuilder_sets__set_id__get"];
+ put?: never;
+ post?: never;
+ /**
+ * Delete Set
+ * @description Delete one of the current DJ's sets, or 404.
+ */
+ delete: operations["delete_set_api_setbuilder_sets__set_id__delete"];
+ options?: never;
+ head?: never;
+ /**
+ * Rename Set
+ * @description Rename one of the current DJ's sets, or 404.
+ */
+ patch: operations["rename_set_api_setbuilder_sets__set_id__patch"];
+ trace?: never;
+ };
"/api/tidal/auth/cancel": {
parameters: {
query?: never;
@@ -2288,13 +2733,101 @@ export interface components {
/** Source */
source: string;
};
- /** AdminEventOut */
- AdminEventOut: {
- /** Code */
- code: string;
+ /**
+ * AdminAuditOut
+ * @description Paginated audit-event browse response.
+ */
+ AdminAuditOut: {
+ /** Limit */
+ limit: number;
+ /** Offset */
+ offset: number;
+ /** Rows */
+ rows: components["schemas"]["AuditEventRow"][];
+ /** Total */
+ total: number;
+ };
+ /**
+ * AdminConnectorCapPatch
+ * @description Admin set/clear a connector's monthly token cap (issue #339).
+ *
+ * ``monthly_token_cap`` is **required** so intent is always explicit: an
+ * integer sets the cap, ``null`` clears it (unlimited). Omitting the field
+ * (an empty ``{}`` body) is rejected with 422 rather than silently treated as
+ * ``null`` — that would let an accidental no-field PATCH wipe a configured
+ * cap. A non-null value must be a non-negative integer; ``0`` means "no
+ * further calls this month". The upper bound is a sanity ceiling, not a
+ * billing limit.
+ */
+ AdminConnectorCapPatch: {
+ /** Monthly Token Cap */
+ monthly_token_cap: number | null;
+ };
+ /**
+ * AdminConnectorOut
+ * @description Admin view — adds the DJ's username for display.
+ */
+ AdminConnectorOut: {
+ /** Base Url Plain */
+ base_url_plain: string | null;
/**
- * Created At
- * Format: date-time
+ * Connector Type
+ * @enum {string}
+ */
+ connector_type: "openai_apikey" | "anthropic_apikey" | "openai_compatible" | "openrouter_apikey" | "xai_apikey" | "bedrock" | "azure_openai" | "gemini_apikey";
+ /**
+ * Created At
+ * Format: date-time
+ */
+ created_at: string;
+ /**
+ * Current Month Tokens
+ * @default 0
+ */
+ current_month_tokens: number;
+ /** Display Name */
+ display_name: string;
+ /** Dj Username */
+ dj_username: string;
+ /** Id */
+ id: number;
+ /**
+ * Is Default
+ * @default false
+ */
+ is_default: boolean;
+ /** Last Error */
+ last_error: string | null;
+ /** Last Health Check At */
+ last_health_check_at: string | null;
+ /** Last Health Check Status */
+ last_health_check_status: ("ok" | "auth_invalid" | "rate_limited" | "quota_exceeded" | "provider_unavailable" | "error") | null;
+ /** Last Used At */
+ last_used_at: string | null;
+ /** Model Hint */
+ model_hint: string | null;
+ /** Monthly Token Cap */
+ monthly_token_cap: number | null;
+ /**
+ * Status
+ * @enum {string}
+ */
+ status: "active" | "auth_invalid" | "disabled";
+ /**
+ * Updated At
+ * Format: date-time
+ */
+ updated_at: string;
+ /** User Id */
+ user_id: number;
+ };
+ /** AdminEventOut */
+ AdminEventOut: {
+ /** Code */
+ code: string;
+ /**
+ * Created At
+ * Format: date-time
*/
created_at: string;
/**
@@ -2320,6 +2853,40 @@ export interface components {
*/
request_count: number;
};
+ /** AdminPolicyOut */
+ AdminPolicyOut: {
+ /** Llm Apikey Connectors Enabled */
+ llm_apikey_connectors_enabled: boolean;
+ /** Llm Call Log Retention Days */
+ llm_call_log_retention_days: number;
+ /** Llm Compatible Connector Enabled */
+ llm_compatible_connector_enabled: boolean;
+ /** Llm Default Connector Id */
+ llm_default_connector_id: number | null;
+ };
+ /** AdminPolicyPatch */
+ AdminPolicyPatch: {
+ /**
+ * Clear Default
+ * @default false
+ */
+ clear_default: boolean;
+ /** Llm Apikey Connectors Enabled */
+ llm_apikey_connectors_enabled?: boolean | null;
+ /** Llm Call Log Retention Days */
+ llm_call_log_retention_days?: number | null;
+ /** Llm Compatible Connector Enabled */
+ llm_compatible_connector_enabled?: boolean | null;
+ /** Llm Default Connector Id */
+ llm_default_connector_id?: number | null;
+ };
+ /** AdminUsageOut */
+ AdminUsageOut: {
+ /** Days */
+ days: number;
+ /** Rows */
+ rows: components["schemas"]["UsageRow"][];
+ };
/** AdminUserCreate */
AdminUserCreate: {
/** Password */
@@ -2362,6 +2929,34 @@ export interface components {
/** Role */
role?: string | null;
};
+ /**
+ * AuditEventRow
+ * @description A single audit-trail row with joined display labels.
+ *
+ * Never includes credential material — only the target connector's
+ * human-readable display name (joined from ``llm_connectors``).
+ */
+ AuditEventRow: {
+ /** Actor User Id */
+ actor_user_id: number;
+ /** Actor Username */
+ actor_username: string;
+ /**
+ * Created At
+ * Format: date-time
+ */
+ created_at: string;
+ /** Event Type */
+ event_type: string;
+ /** Id */
+ id: number;
+ /** Notes */
+ notes: string | null;
+ /** Target Connector Display Name */
+ target_connector_display_name: string | null;
+ /** Target Connector Id */
+ target_connector_id: number | null;
+ };
/**
* BeatportEventSettings
* @description Beatport sync settings for an event.
@@ -2805,6 +3400,156 @@ export interface components {
/** Request Id */
request_id: number;
};
+ /**
+ * ConnectorCreate
+ * @description Provider-agnostic create payload.
+ *
+ * Field requirements vary by ``connector_type``:
+ *
+ * - ``openai_apikey`` / ``anthropic_apikey`` / ``openrouter_apikey`` /
+ * ``xai_apikey`` / ``gemini_apikey``: ``api_key`` required; ``base_url``
+ * and ``bearer`` are ignored.
+ * - ``openai_compatible``: ``base_url`` required; ``bearer`` optional;
+ * ``api_key`` is ignored.
+ * - ``bedrock``: ``aws_access_key_id``, ``aws_secret_access_key``,
+ * ``aws_region`` and ``aws_model_id`` required; other fields ignored.
+ * - ``azure_openai``: ``api_key``, ``azure_resource_name``,
+ * ``azure_deployment_name`` and ``azure_api_version`` all required.
+ *
+ * The combination is enforced by :meth:`_require_credentials_for_type`.
+ * See ``build_create_payload`` in ``services/llm/connector_storage.py``
+ * for the full validation flow (including key shape checks).
+ */
+ ConnectorCreate: {
+ /** Api Key */
+ api_key?: string | null;
+ /** Aws Access Key Id */
+ aws_access_key_id?: string | null;
+ /** Aws Model Id */
+ aws_model_id?: string | null;
+ /** Aws Region */
+ aws_region?: string | null;
+ /** Aws Secret Access Key */
+ aws_secret_access_key?: string | null;
+ /** Azure Api Version */
+ azure_api_version?: string | null;
+ /** Azure Deployment Name */
+ azure_deployment_name?: string | null;
+ /** Azure Resource Name */
+ azure_resource_name?: string | null;
+ /** Base Url */
+ base_url?: string | null;
+ /** Bearer */
+ bearer?: string | null;
+ /**
+ * Connector Type
+ * @enum {string}
+ */
+ connector_type: "openai_apikey" | "anthropic_apikey" | "openai_compatible" | "openrouter_apikey" | "xai_apikey" | "bedrock" | "azure_openai" | "gemini_apikey";
+ /** Display Name */
+ display_name: string;
+ /** Model Hint */
+ model_hint?: string | null;
+ };
+ /**
+ * ConnectorCredentialsRotate
+ * @description Rotation payload — at least one credential field must be supplied.
+ *
+ * Field semantics mirror :class:`ConnectorCreate`. The actual field required
+ * depends on the connector being rotated (validated in ``rotate_credentials``).
+ */
+ ConnectorCredentialsRotate: {
+ /** Api Key */
+ api_key?: string | null;
+ /** Aws Access Key Id */
+ aws_access_key_id?: string | null;
+ /** Aws Model Id */
+ aws_model_id?: string | null;
+ /** Aws Region */
+ aws_region?: string | null;
+ /** Aws Secret Access Key */
+ aws_secret_access_key?: string | null;
+ /** Azure Api Version */
+ azure_api_version?: string | null;
+ /** Azure Deployment Name */
+ azure_deployment_name?: string | null;
+ /** Azure Resource Name */
+ azure_resource_name?: string | null;
+ /** Base Url */
+ base_url?: string | null;
+ /** Bearer */
+ bearer?: string | null;
+ };
+ /**
+ * ConnectorOut
+ * @description Public-safe connector view — never includes the credential blob.
+ */
+ ConnectorOut: {
+ /** Base Url Plain */
+ base_url_plain: string | null;
+ /**
+ * Connector Type
+ * @enum {string}
+ */
+ connector_type: "openai_apikey" | "anthropic_apikey" | "openai_compatible" | "openrouter_apikey" | "xai_apikey" | "bedrock" | "azure_openai" | "gemini_apikey";
+ /**
+ * Created At
+ * Format: date-time
+ */
+ created_at: string;
+ /** Display Name */
+ display_name: string;
+ /** Id */
+ id: number;
+ /**
+ * Is Default
+ * @default false
+ */
+ is_default: boolean;
+ /** Last Error */
+ last_error: string | null;
+ /** Last Health Check At */
+ last_health_check_at: string | null;
+ /** Last Health Check Status */
+ last_health_check_status: ("ok" | "auth_invalid" | "rate_limited" | "quota_exceeded" | "provider_unavailable" | "error") | null;
+ /** Last Used At */
+ last_used_at: string | null;
+ /** Model Hint */
+ model_hint: string | null;
+ /** Monthly Token Cap */
+ monthly_token_cap: number | null;
+ /**
+ * Status
+ * @enum {string}
+ */
+ status: "active" | "auth_invalid" | "disabled";
+ /**
+ * Updated At
+ * Format: date-time
+ */
+ updated_at: string;
+ /** User Id */
+ user_id: number;
+ };
+ /**
+ * ConnectorPatch
+ * @description Metadata-only patch (no credential rotation here).
+ */
+ ConnectorPatch: {
+ /** Display Name */
+ display_name?: string | null;
+ /** Model Hint */
+ model_hint?: string | null;
+ };
+ /** ConnectorTestResult */
+ ConnectorTestResult: {
+ /** Error Code */
+ error_code: string | null;
+ /** Message */
+ message: string | null;
+ /** Ok */
+ ok: boolean;
+ };
/**
* DisplaySettingsResponse
* @description Response for display settings update.
@@ -2847,6 +3592,28 @@ export interface components {
/** Requests Open */
requests_open?: boolean | null;
};
+ /**
+ * DjPolicyOut
+ * @description DJ-readable connector policy — the non-sensitive subset of the admin
+ * policy surface.
+ *
+ * Lets the settings/ai page fail *closed*: a normal DJ can learn which
+ * connector types the admin has enabled (so disallowed providers are hidden
+ * in the picker) without exposing admin-only fields such as
+ * ``llm_default_connector_id``.
+ *
+ * ``allowed_connector_types`` is the pre-computed set of connector types a DJ
+ * may create given the two toggles, so the frontend doesn't have to hard-code
+ * the api-key-vs-compatible mapping.
+ */
+ DjPolicyOut: {
+ /** Allowed Connector Types */
+ allowed_connector_types: ("openai_apikey" | "anthropic_apikey" | "openai_compatible" | "openrouter_apikey" | "xai_apikey" | "bedrock" | "azure_openai" | "gemini_apikey")[];
+ /** Llm Apikey Connectors Enabled */
+ llm_apikey_connectors_enabled: boolean;
+ /** Llm Compatible Connector Enabled */
+ llm_compatible_connector_enabled: boolean;
+ };
/** EnrichPreviewItem */
EnrichPreviewItem: {
/** Artist */
@@ -3012,6 +3779,42 @@ export interface components {
/** Name */
name?: string | null;
};
+ /**
+ * FeaturePreferenceOut
+ * @description A single per-feature connector pin (issue #337).
+ */
+ FeaturePreferenceOut: {
+ /** Connector Id */
+ connector_id: number;
+ /**
+ * Feature
+ * @enum {string}
+ */
+ feature: "recommendation" | "set_builder";
+ };
+ /**
+ * FeaturePreferenceSet
+ * @description Set/change a per-feature pin. Upsert — replaces any existing pin.
+ */
+ FeaturePreferenceSet: {
+ /** Connector Id */
+ connector_id: number;
+ /**
+ * Feature
+ * @enum {string}
+ */
+ feature: "recommendation" | "set_builder";
+ };
+ /**
+ * FeaturePreferencesListOut
+ * @description All of a DJ's per-feature pins + the catalogue of pinnable features.
+ */
+ FeaturePreferencesListOut: {
+ /** Known Features */
+ known_features: ("recommendation" | "set_builder")[];
+ /** Preferences */
+ preferences: components["schemas"]["FeaturePreferenceOut"][];
+ };
/** GuestNowPlaying */
GuestNowPlaying: {
/** Album Art Url */
@@ -3767,6 +4570,102 @@ export interface components {
catalog_search: components["schemas"]["CapabilityStatus"];
playlist_sync: components["schemas"]["CapabilityStatus"];
};
+ /**
+ * SetCreate
+ * @description Body for creating a new (empty) set.
+ */
+ SetCreate: {
+ /** Event Id */
+ event_id?: number | null;
+ /** Name */
+ name: string;
+ };
+ /**
+ * SetDetail
+ * @description Full set record (Phase 0: no slot/curve expansion yet).
+ */
+ SetDetail: {
+ /** Bpm Ceiling */
+ bpm_ceiling: number | null;
+ /** Bpm Floor */
+ bpm_floor: number | null;
+ /**
+ * Created At
+ * Format: date-time
+ */
+ created_at: string;
+ /** Event Id */
+ event_id: number | null;
+ /** Exported At */
+ exported_at: string | null;
+ /** Id */
+ id: number;
+ /** Key Strictness */
+ key_strictness: number;
+ /** Name */
+ name: string;
+ /**
+ * Sharing Mode
+ * @enum {string}
+ */
+ sharing_mode: "private" | "invite_only";
+ /**
+ * Status
+ * @enum {string}
+ */
+ status: "draft" | "locked" | "exported";
+ /** Target Duration Sec */
+ target_duration_sec: number | null;
+ /** Tidal Playlist Id */
+ tidal_playlist_id: string | null;
+ /**
+ * Updated At
+ * Format: date-time
+ */
+ updated_at: string;
+ /** Vibe Theme */
+ vibe_theme: string | null;
+ };
+ /**
+ * SetRename
+ * @description Body for renaming a set.
+ */
+ SetRename: {
+ /** Name */
+ name: string;
+ };
+ /**
+ * SetSummary
+ * @description Set list item (no children).
+ */
+ SetSummary: {
+ /**
+ * Created At
+ * Format: date-time
+ */
+ created_at: string;
+ /** Event Id */
+ event_id: number | null;
+ /** Id */
+ id: number;
+ /** Name */
+ name: string;
+ /**
+ * Sharing Mode
+ * @enum {string}
+ */
+ sharing_mode: "private" | "invite_only";
+ /**
+ * Status
+ * @enum {string}
+ */
+ status: "draft" | "locked" | "exported";
+ /**
+ * Updated At
+ * Format: date-time
+ */
+ updated_at: string;
+ };
/** StatusMessageResponse */
StatusMessageResponse: {
/** Message */
@@ -3995,6 +4894,30 @@ export interface components {
/** Tidal Sync Enabled */
tidal_sync_enabled?: boolean | null;
};
+ /** UsageRow */
+ UsageRow: {
+ /** Connector Id */
+ connector_id: number;
+ /**
+ * Connector Type
+ * @enum {string}
+ */
+ connector_type: "openai_apikey" | "anthropic_apikey" | "openai_compatible" | "openrouter_apikey" | "xai_apikey" | "bedrock" | "azure_openai" | "gemini_apikey";
+ /** Display Name */
+ display_name: string;
+ /** Dj Username */
+ dj_username: string;
+ /** Error Count */
+ error_count: number;
+ /** Error Rate */
+ error_rate: number;
+ /** Total Calls */
+ total_calls: number;
+ /** Total Tokens In */
+ total_tokens_in: number;
+ /** Total Tokens Out */
+ total_tokens_out: number;
+ };
/** UserOut */
UserOut: {
/**
@@ -4403,9 +5326,16 @@ export interface operations {
};
};
};
- admin_get_settings_api_admin_settings_get: {
+ list_audit_events_api_admin_llm_audit_get: {
parameters: {
- query?: never;
+ query?: {
+ event_type?: string | null;
+ actor_user_id?: number | null;
+ target_connector_id?: number | null;
+ days?: number;
+ limit?: number;
+ offset?: number;
+ };
header?: never;
path?: never;
cookie?: never;
@@ -4418,31 +5348,41 @@ export interface operations {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["SystemSettingsOut"];
+ "application/json": components["schemas"]["AdminAuditOut"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
};
};
};
};
- admin_update_settings_api_admin_settings_patch: {
+ export_audit_events_csv_api_admin_llm_audit_csv_get: {
parameters: {
- query?: never;
+ query?: {
+ event_type?: string | null;
+ actor_user_id?: number | null;
+ target_connector_id?: number | null;
+ days?: number;
+ };
header?: never;
path?: never;
cookie?: never;
};
- requestBody: {
- content: {
- "application/json": components["schemas"]["SystemSettingsUpdate"];
- };
- };
+ requestBody?: never;
responses: {
- /** @description Successful Response */
+ /** @description CSV export of the filtered audit trail. */
200: {
headers: {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["SystemSettingsOut"];
+ "text/csv": string;
};
};
/** @description Validation Error */
@@ -4456,7 +5396,7 @@ export interface operations {
};
};
};
- admin_stats_api_admin_stats_get: {
+ list_connectors_admin_api_admin_llm_connectors_get: {
parameters: {
query?: never;
header?: never;
@@ -4471,23 +5411,25 @@ export interface operations {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["SystemStats"];
+ "application/json": components["schemas"]["AdminConnectorOut"][];
};
};
};
};
- admin_list_users_api_admin_users_get: {
+ set_connector_cap_admin_api_admin_llm_connectors__connector_id__cap_patch: {
parameters: {
- query?: {
- page?: number;
- limit?: number;
- role?: string | null;
- };
+ query?: never;
header?: never;
- path?: never;
+ path: {
+ connector_id: number;
+ };
cookie?: never;
};
- requestBody?: never;
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["AdminConnectorCapPatch"];
+ };
+ };
responses: {
/** @description Successful Response */
200: {
@@ -4495,7 +5437,7 @@ export interface operations {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["PaginatedResponse"];
+ "application/json": components["schemas"]["AdminConnectorOut"];
};
};
/** @description Validation Error */
@@ -4509,26 +5451,24 @@ export interface operations {
};
};
};
- admin_create_user_api_admin_users_post: {
+ revoke_connector_admin_api_admin_llm_connectors__connector_id__revoke_post: {
parameters: {
query?: never;
header?: never;
- path?: never;
- cookie?: never;
- };
- requestBody: {
- content: {
- "application/json": components["schemas"]["AdminUserCreate"];
+ path: {
+ connector_id: number;
};
+ cookie?: never;
};
+ requestBody?: never;
responses: {
/** @description Successful Response */
- 201: {
+ 200: {
headers: {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["AdminUserOut"];
+ "application/json": components["schemas"]["AdminConnectorOut"];
};
};
/** @description Validation Error */
@@ -4542,40 +5482,263 @@ export interface operations {
};
};
};
- admin_delete_user_api_admin_users__user_id__delete: {
+ get_policy_api_admin_llm_policy_get: {
parameters: {
query?: never;
header?: never;
- path: {
- user_id: number;
- };
+ path?: never;
cookie?: never;
};
requestBody?: never;
responses: {
/** @description Successful Response */
- 204: {
- headers: {
- [name: string]: unknown;
- };
- content?: never;
- };
- /** @description Validation Error */
- 422: {
+ 200: {
headers: {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["HTTPValidationError"];
+ "application/json": components["schemas"]["AdminPolicyOut"];
};
};
};
};
- admin_update_user_api_admin_users__user_id__patch: {
+ patch_policy_api_admin_llm_policy_patch: {
parameters: {
query?: never;
header?: never;
- path: {
+ path?: never;
+ cookie?: never;
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["AdminPolicyPatch"];
+ };
+ };
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["AdminPolicyOut"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ get_usage_api_admin_llm_usage_get: {
+ parameters: {
+ query?: {
+ days?: number;
+ };
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["AdminUsageOut"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ admin_get_settings_api_admin_settings_get: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["SystemSettingsOut"];
+ };
+ };
+ };
+ };
+ admin_update_settings_api_admin_settings_patch: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["SystemSettingsUpdate"];
+ };
+ };
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["SystemSettingsOut"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ admin_stats_api_admin_stats_get: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["SystemStats"];
+ };
+ };
+ };
+ };
+ admin_list_users_api_admin_users_get: {
+ parameters: {
+ query?: {
+ page?: number;
+ limit?: number;
+ role?: string | null;
+ };
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["PaginatedResponse"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ admin_create_user_api_admin_users_post: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["AdminUserCreate"];
+ };
+ };
+ responses: {
+ /** @description Successful Response */
+ 201: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["AdminUserOut"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ admin_delete_user_api_admin_users__user_id__delete: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ user_id: number;
+ };
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 204: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content?: never;
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ admin_update_user_api_admin_users__user_id__patch: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
user_id: number;
};
cookie?: never;
@@ -6168,7 +7331,461 @@ export interface operations {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["AcceptAllResponse"];
+ "application/json": components["schemas"]["AcceptAllResponse"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ bulk_delete_requests_endpoint_api_events__code__requests_bulk_delete: {
+ parameters: {
+ query?: {
+ status?: string | null;
+ };
+ header?: never;
+ path: {
+ code: string;
+ };
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["BulkActionResponse"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ reject_all_requests_endpoint_api_events__code__requests_reject_all_post: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ code: string;
+ };
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["BulkActionResponse"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ event_search_api_events__code__search_get: {
+ parameters: {
+ query: {
+ q: string;
+ };
+ header?: never;
+ path: {
+ code: string;
+ };
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["SearchResult"][];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ unarchive_event_endpoint_api_events__code__unarchive_post: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ code: string;
+ };
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["EventOut"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ api_health_check_api_health_get: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": unknown;
+ };
+ };
+ };
+ };
+ list_my_kiosks_api_kiosk_mine_get: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["KioskOut"][];
+ };
+ };
+ };
+ };
+ complete_kiosk_pairing_api_kiosk_pair__pair_code__complete_post: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ pair_code: string;
+ };
+ cookie?: never;
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["KioskCompletePairingRequest"];
+ };
+ };
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["KioskOut"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ delete_kiosk_endpoint_api_kiosk__kiosk_id__delete: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ kiosk_id: number;
+ };
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 204: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content?: never;
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ rename_kiosk_endpoint_api_kiosk__kiosk_id__patch: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ kiosk_id: number;
+ };
+ cookie?: never;
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["KioskRenameRequest"];
+ };
+ };
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["KioskOut"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ assign_kiosk_api_kiosk__kiosk_id__assign_patch: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ kiosk_id: number;
+ };
+ cookie?: never;
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["KioskAssignRequest"];
+ };
+ };
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["KioskOut"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ list_connectors_api_llm_connectors_get: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["ConnectorOut"][];
+ };
+ };
+ };
+ };
+ create_connector_endpoint_api_llm_connectors_post: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["ConnectorCreate"];
+ };
+ };
+ responses: {
+ /** @description Successful Response */
+ 201: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["ConnectorOut"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ delete_connector_endpoint_api_llm_connectors__connector_id__delete: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ connector_id: number;
+ };
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 204: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content?: never;
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ update_connector_metadata_api_llm_connectors__connector_id__patch: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ connector_id: number;
+ };
+ cookie?: never;
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["ConnectorPatch"];
+ };
+ };
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["ConnectorOut"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ rotate_connector_credentials_api_llm_connectors__connector_id__credentials_put: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ connector_id: number;
+ };
+ cookie?: never;
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["ConnectorCredentialsRotate"];
+ };
+ };
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["ConnectorOut"];
};
};
/** @description Validation Error */
@@ -6182,14 +7799,12 @@ export interface operations {
};
};
};
- bulk_delete_requests_endpoint_api_events__code__requests_bulk_delete: {
+ set_connector_as_default_api_llm_connectors__connector_id__default_post: {
parameters: {
- query?: {
- status?: string | null;
- };
+ query?: never;
header?: never;
path: {
- code: string;
+ connector_id: number;
};
cookie?: never;
};
@@ -6201,8 +7816,22 @@ export interface operations {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["BulkActionResponse"];
+ "application/json": components["schemas"]["ConnectorOut"];
+ };
+ };
+ /** @description Connector cannot be set as default (e.g. disabled or auth_invalid). */
+ 400: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content?: never;
+ };
+ /** @description Connector not found for current user. */
+ 404: {
+ headers: {
+ [name: string]: unknown;
};
+ content?: never;
};
/** @description Validation Error */
422: {
@@ -6215,12 +7844,12 @@ export interface operations {
};
};
};
- reject_all_requests_endpoint_api_events__code__requests_reject_all_post: {
+ unset_connector_as_default_api_llm_connectors__connector_id__default_delete: {
parameters: {
query?: never;
header?: never;
path: {
- code: string;
+ connector_id: number;
};
cookie?: never;
};
@@ -6232,8 +7861,15 @@ export interface operations {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["BulkActionResponse"];
+ "application/json": components["schemas"]["ConnectorOut"];
+ };
+ };
+ /** @description Connector not found for current user. */
+ 404: {
+ headers: {
+ [name: string]: unknown;
};
+ content?: never;
};
/** @description Validation Error */
422: {
@@ -6246,14 +7882,12 @@ export interface operations {
};
};
};
- event_search_api_events__code__search_get: {
+ stream_test_connector_api_llm_connectors__connector_id__stream_test_post: {
parameters: {
- query: {
- q: string;
- };
+ query?: never;
header?: never;
path: {
- code: string;
+ connector_id: number;
};
cookie?: never;
};
@@ -6265,7 +7899,7 @@ export interface operations {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["SearchResult"][];
+ "application/json": unknown;
};
};
/** @description Validation Error */
@@ -6279,12 +7913,12 @@ export interface operations {
};
};
};
- unarchive_event_endpoint_api_events__code__unarchive_post: {
+ test_connector_api_llm_connectors__connector_id__test_post: {
parameters: {
query?: never;
header?: never;
path: {
- code: string;
+ connector_id: number;
};
cookie?: never;
};
@@ -6296,7 +7930,7 @@ export interface operations {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["EventOut"];
+ "application/json": components["schemas"]["ConnectorTestResult"];
};
};
/** @description Validation Error */
@@ -6310,7 +7944,7 @@ export interface operations {
};
};
};
- api_health_check_api_health_get: {
+ list_feature_preferences_api_llm_feature_preferences_get: {
parameters: {
query?: never;
header?: never;
@@ -6325,19 +7959,23 @@ export interface operations {
[name: string]: unknown;
};
content: {
- "application/json": unknown;
+ "application/json": components["schemas"]["FeaturePreferencesListOut"];
};
};
};
};
- list_my_kiosks_api_kiosk_mine_get: {
+ set_feature_preference_endpoint_api_llm_feature_preferences_post: {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
- requestBody?: never;
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["FeaturePreferenceSet"];
+ };
+ };
responses: {
/** @description Successful Response */
200: {
@@ -6345,34 +7983,22 @@ export interface operations {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["KioskOut"][];
+ "application/json": components["schemas"]["FeaturePreferencesListOut"];
};
};
- };
- };
- complete_kiosk_pairing_api_kiosk_pair__pair_code__complete_post: {
- parameters: {
- query?: never;
- header?: never;
- path: {
- pair_code: string;
- };
- cookie?: never;
- };
- requestBody: {
- content: {
- "application/json": components["schemas"]["KioskCompletePairingRequest"];
- };
- };
- responses: {
- /** @description Successful Response */
- 200: {
+ /** @description Connector is not active and cannot be pinned. */
+ 400: {
headers: {
[name: string]: unknown;
};
- content: {
- "application/json": components["schemas"]["KioskOut"];
+ content?: never;
+ };
+ /** @description Connector not found for current user. */
+ 404: {
+ headers: {
+ [name: string]: unknown;
};
+ content?: never;
};
/** @description Validation Error */
422: {
@@ -6385,23 +8011,25 @@ export interface operations {
};
};
};
- delete_kiosk_endpoint_api_kiosk__kiosk_id__delete: {
+ clear_feature_preference_endpoint_api_llm_feature_preferences__feature__delete: {
parameters: {
query?: never;
header?: never;
path: {
- kiosk_id: number;
+ feature: "recommendation" | "set_builder";
};
cookie?: never;
};
requestBody?: never;
responses: {
/** @description Successful Response */
- 204: {
+ 200: {
headers: {
[name: string]: unknown;
};
- content?: never;
+ content: {
+ "application/json": components["schemas"]["FeaturePreferencesListOut"];
+ };
};
/** @description Validation Error */
422: {
@@ -6414,20 +8042,14 @@ export interface operations {
};
};
};
- rename_kiosk_endpoint_api_kiosk__kiosk_id__patch: {
+ list_openrouter_models_api_llm_openrouter_models_get: {
parameters: {
query?: never;
header?: never;
- path: {
- kiosk_id: number;
- };
+ path?: never;
cookie?: never;
};
- requestBody: {
- content: {
- "application/json": components["schemas"]["KioskRenameRequest"];
- };
- };
+ requestBody?: never;
responses: {
/** @description Successful Response */
200: {
@@ -6435,34 +8057,19 @@ export interface operations {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["KioskOut"];
- };
- };
- /** @description Validation Error */
- 422: {
- headers: {
- [name: string]: unknown;
- };
- content: {
- "application/json": components["schemas"]["HTTPValidationError"];
+ "application/json": components["schemas"]["AIModelsResponse"];
};
};
};
};
- assign_kiosk_api_kiosk__kiosk_id__assign_patch: {
+ get_dj_policy_api_llm_policy_get: {
parameters: {
query?: never;
header?: never;
- path: {
- kiosk_id: number;
- };
+ path?: never;
cookie?: never;
};
- requestBody: {
- content: {
- "application/json": components["schemas"]["KioskAssignRequest"];
- };
- };
+ requestBody?: never;
responses: {
/** @description Successful Response */
200: {
@@ -6470,17 +8077,22 @@ export interface operations {
[name: string]: unknown;
};
content: {
- "application/json": components["schemas"]["KioskOut"];
+ "application/json": components["schemas"]["DjPolicyOut"];
};
};
- /** @description Validation Error */
- 422: {
+ /** @description Not authenticated (missing or invalid bearer token). */
+ 401: {
headers: {
[name: string]: unknown;
};
- content: {
- "application/json": components["schemas"]["HTTPValidationError"];
+ content?: never;
+ };
+ /** @description Authenticated but not an active DJ (e.g. pending approval). */
+ 403: {
+ headers: {
+ [name: string]: unknown;
};
+ content?: never;
};
};
};
@@ -7617,6 +9229,154 @@ export interface operations {
};
};
};
+ list_sets_api_setbuilder_sets_get: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["SetSummary"][];
+ };
+ };
+ };
+ };
+ create_set_api_setbuilder_sets_post: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path?: never;
+ cookie?: never;
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["SetCreate"];
+ };
+ };
+ responses: {
+ /** @description Successful Response */
+ 201: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["SetDetail"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ get_set_api_setbuilder_sets__set_id__get: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ set_id: number;
+ };
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["SetDetail"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ delete_set_api_setbuilder_sets__set_id__delete: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ set_id: number;
+ };
+ cookie?: never;
+ };
+ requestBody?: never;
+ responses: {
+ /** @description Successful Response */
+ 204: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content?: never;
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
+ rename_set_api_setbuilder_sets__set_id__patch: {
+ parameters: {
+ query?: never;
+ header?: never;
+ path: {
+ set_id: number;
+ };
+ cookie?: never;
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["SetRename"];
+ };
+ };
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["SetDetail"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ headers: {
+ [name: string]: unknown;
+ };
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
cancel_auth_api_tidal_auth_cancel_post: {
parameters: {
query?: never;
diff --git a/dashboard/lib/api-types.ts b/dashboard/lib/api-types.ts
index c177b05b..c132e30a 100644
--- a/dashboard/lib/api-types.ts
+++ b/dashboard/lib/api-types.ts
@@ -52,6 +52,32 @@ export type AIModelInfo = Schemas['AIModelInfo'];
export type AIModelsResponse = Schemas['AIModelsResponse'];
export type AISettings = Schemas['AISettingsOut'];
export type AISettingsUpdate = Schemas['AISettingsUpdate'];
+
+// LLM gateway (issue #329)
+export type LlmConnector = Schemas['ConnectorOut'];
+export type LlmAdminConnector = Schemas['AdminConnectorOut'];
+export type LlmConnectorCreate = Schemas['ConnectorCreate'];
+export type LlmConnectorPatch = Schemas['ConnectorPatch'];
+export type LlmConnectorCredentialsRotate = Schemas['ConnectorCredentialsRotate'];
+export type LlmConnectorTestResult = Schemas['ConnectorTestResult'];
+export type LlmAdminPolicy = Schemas['AdminPolicyOut'];
+export type LlmAdminPolicyPatch = Schemas['AdminPolicyPatch'];
+// Monthly token cap (issue #339)
+export type LlmAdminConnectorCapPatch = Schemas['AdminConnectorCapPatch'];
+export type LlmDjPolicy = Schemas['DjPolicyOut'];
+export type LlmAdminUsage = Schemas['AdminUsageOut'];
+export type LlmUsageRow = Schemas['UsageRow'];
+// LLM audit trail (issue #341)
+export type LlmAdminAudit = Schemas['AdminAuditOut'];
+export type LlmAuditRow = Schemas['AuditEventRow'];
+// Per-feature connector preference (issue #337)
+export type LlmFeaturePreference = Schemas['FeaturePreferenceOut'];
+export type LlmFeaturePreferences = Schemas['FeaturePreferencesListOut'];
+export type LlmFeaturePreferenceSet = Schemas['FeaturePreferenceSet'];
+export type LlmFeatureKey = Schemas['FeaturePreferenceOut']['feature'];
+// Derive from schema so backend enum changes propagate to TS automatically.
+export type LlmConnectorType = Schemas['ConnectorOut']['connector_type'];
+export type LlmConnectorStatus = Schemas['ConnectorOut']['status'];
export type ActivityLogEntry = Schemas['ActivityLogEntry'];
export type CapabilityStatus = Schemas['CapabilityStatus'];
export type ServiceCapabilities = Schemas['ServiceCapabilities'];
diff --git a/dashboard/lib/api.ts b/dashboard/lib/api.ts
index 6c4f7d3e..8db9635f 100644
--- a/dashboard/lib/api.ts
+++ b/dashboard/lib/api.ts
@@ -5,6 +5,20 @@ import type {
AISettings,
AISettingsUpdate,
ActivityLogEntry,
+ LlmAdminAudit,
+ LlmAdminConnector,
+ LlmAdminPolicy,
+ LlmAdminPolicyPatch,
+ LlmAdminUsage,
+ LlmConnector,
+ LlmConnectorCreate,
+ LlmConnectorCredentialsRotate,
+ LlmConnectorPatch,
+ LlmConnectorTestResult,
+ LlmDjPolicy,
+ LlmFeatureKey,
+ LlmFeaturePreferences,
+ LlmFeaturePreferenceSet,
ArchivedEvent,
BeatportEventSettings,
BeatportSearchResult,
@@ -52,6 +66,25 @@ export type {
AIModelsResponse,
AISettings,
AISettingsUpdate,
+ LlmAdminAudit,
+ LlmAdminConnector,
+ LlmAdminPolicy,
+ LlmAdminPolicyPatch,
+ LlmAdminUsage,
+ LlmAuditRow,
+ LlmConnector,
+ LlmConnectorCreate,
+ LlmConnectorCredentialsRotate,
+ LlmConnectorPatch,
+ LlmConnectorStatus,
+ LlmConnectorTestResult,
+ LlmConnectorType,
+ LlmDjPolicy,
+ LlmFeatureKey,
+ LlmFeaturePreference,
+ LlmFeaturePreferences,
+ LlmFeaturePreferenceSet,
+ LlmUsageRow,
ArchivedEvent,
BeatportEventSettings,
BeatportSearchResult,
@@ -103,6 +136,17 @@ export type {
VoteResponse,
} from './api-types';
+// ========== Admin LLM audit trail filters (issue #341) ==========
+
+export interface AdminLlmAuditFilters {
+ event_type?: string;
+ actor_user_id?: number;
+ target_connector_id?: number;
+ days?: number;
+ limit?: number;
+ offset?: number;
+}
+
// ========== Pre-Event Collection Types ==========
export interface CollectEventPreview {
@@ -231,6 +275,26 @@ export class HumanVerificationRequiredError extends ApiError {
}
}
+/**
+ * One incremental chunk of a streamed LLM response (mirrors the backend
+ * `ChatResponseChunk`). Non-final chunks carry `text_delta` and/or
+ * `tool_call_deltas`; the final chunk has `done: true` plus `stop_reason` and
+ * (when reported) `usage`. Hand-written client type — SSE chunks are not part of
+ * the REST OpenAPI schema.
+ */
+export interface LlmStreamChunk {
+ text_delta?: string;
+ tool_call_deltas?: Array<{
+ index: number;
+ id?: string | null;
+ name?: string | null;
+ input_json_fragment?: string;
+ }>;
+ stop_reason?: 'end_turn' | 'tool_use' | 'max_tokens' | 'error' | null;
+ usage?: { prompt: number; completion: number } | null;
+ done?: boolean;
+}
+
/**
* Wrap a guest-public fetch in 403-human-verification-required retry logic.
* Caller passes a `reverify` async function that re-runs the Turnstile
@@ -1173,6 +1237,244 @@ class ApiClient {
});
}
+ // ========== LLM connectors (per-DJ) ==========
+
+ async listLlmConnectors(): Promise {
+ return this.fetch('/api/llm/connectors');
+ }
+
+ // DJ-readable connector policy (non-sensitive subset). The settings/ai page
+ // uses this to fail closed — hiding connector types the admin disabled —
+ // instead of falling back to "all types allowed" on the admin-only endpoint.
+ async getLlmPolicy(): Promise {
+ return this.fetch('/api/llm/policy');
+ }
+
+ async listOpenRouterModels(): Promise {
+ return this.fetch('/api/llm/openrouter/models');
+ }
+
+ async createLlmConnector(data: LlmConnectorCreate): Promise {
+ return this.fetch('/api/llm/connectors', {
+ method: 'POST',
+ body: JSON.stringify(data),
+ });
+ }
+
+ async updateLlmConnector(id: number, data: LlmConnectorPatch): Promise {
+ return this.fetch(`/api/llm/connectors/${id}`, {
+ method: 'PATCH',
+ body: JSON.stringify(data),
+ });
+ }
+
+ async rotateLlmConnectorCredentials(
+ id: number,
+ data: LlmConnectorCredentialsRotate,
+ ): Promise {
+ return this.fetch(`/api/llm/connectors/${id}/credentials`, {
+ method: 'PUT',
+ body: JSON.stringify(data),
+ });
+ }
+
+ async testLlmConnector(id: number): Promise {
+ return this.fetch(`/api/llm/connectors/${id}/test`, { method: 'POST' });
+ }
+
+ /**
+ * Stream a short health-check sentence through a connector via SSE.
+ *
+ * Uses fetch + ReadableStream rather than EventSource because EventSource
+ * cannot send the Authorization header this authenticated endpoint requires.
+ * Pass an AbortSignal to cancel — aborting closes the connection, which the
+ * backend treats as a client disconnect and cancels the upstream provider
+ * request. `onChunk` is invoked for every parsed SSE data frame.
+ */
+ async streamConnectorTest(
+ id: number,
+ onChunk: (chunk: LlmStreamChunk) => void,
+ signal?: AbortSignal,
+ ): Promise {
+ const headers = new Headers({ Accept: 'text/event-stream' });
+ if (this.token) headers.set('Authorization', `Bearer ${this.token}`);
+
+ const response = await fetch(`${getApiUrl()}/api/llm/connectors/${id}/stream-test`, {
+ method: 'POST',
+ headers,
+ signal,
+ });
+ if (!response.ok || !response.body) {
+ if (response.status === 401 && this.onUnauthorized) this.onUnauthorized();
+ throw new ApiError('Stream test failed', response.status);
+ }
+
+ const reader = response.body.getReader();
+ const decoder = new TextDecoder();
+ let buffer = '';
+ // SSE frames are separated by a blank line. The spec allows LF (`\n\n`) or
+ // CRLF (`\r\n\r\n`) terminators, so match either — a CRLF-emitting server or
+ // proxy must not leave frames (including `event: error`) unparsed.
+ const frameBoundary = /\r?\n\r?\n/;
+ try {
+ for (;;) {
+ const { done, value } = await reader.read();
+ if (done) break;
+ buffer += decoder.decode(value, { stream: true });
+ let sep: number;
+ while ((sep = buffer.search(frameBoundary)) !== -1) {
+ const frame = buffer.slice(0, sep);
+ const boundary = buffer.slice(sep).match(frameBoundary)?.[0] ?? '\n\n';
+ buffer = buffer.slice(sep + boundary.length);
+ // A frame may carry an `event:` name plus one or more `data:` lines.
+ // The backend emits `event: error` for typed gateway failures, so we
+ // must inspect the event type — not just blindly parse `data:`.
+ let eventType = 'message';
+ const dataLines: string[] = [];
+ for (const line of frame.split(/\r?\n/)) {
+ if (line.startsWith('event:')) {
+ eventType = line.slice('event:'.length).trim();
+ } else if (line.startsWith('data:')) {
+ dataLines.push(line.slice('data:'.length).trim());
+ }
+ }
+ const data = dataLines.join('\n').trim();
+ if (!data || data === '[DONE]') continue;
+
+ if (eventType === 'error') {
+ // Surface the sanitised backend error code as a thrown failure
+ // rather than passing it through as an inert chunk.
+ let code: string | undefined;
+ try {
+ code = (JSON.parse(data) as { code?: string }).code;
+ } catch {
+ code = undefined;
+ }
+ throw new ApiError(`Stream test failed${code ? `: ${code}` : ''}`, 500);
+ }
+
+ try {
+ onChunk(JSON.parse(data) as LlmStreamChunk);
+ } catch {
+ // Ignore unparseable keepalive frames.
+ }
+ }
+ }
+ } finally {
+ reader.releaseLock();
+ }
+ }
+
+ async deleteLlmConnector(id: number): Promise {
+ await this.fetch(`/api/llm/connectors/${id}`, { method: 'DELETE' });
+ }
+
+ // Pin / unpin a connector as the DJ's explicit default (issue #336). When
+ // pinned, the gateway routes through this connector regardless of which one
+ // is most-recently-used.
+ async setLlmConnectorDefault(id: number): Promise {
+ return this.fetch(`/api/llm/connectors/${id}/default`, { method: 'POST' });
+ }
+
+ async unsetLlmConnectorDefault(id: number): Promise {
+ return this.fetch(`/api/llm/connectors/${id}/default`, { method: 'DELETE' });
+ }
+
+ // ========== Per-feature connector preferences (issue #337) ==========
+
+ async listLlmFeaturePreferences(): Promise {
+ return this.fetch('/api/llm/feature-preferences');
+ }
+
+ async setLlmFeaturePreference(data: LlmFeaturePreferenceSet): Promise {
+ return this.fetch('/api/llm/feature-preferences', {
+ method: 'POST',
+ body: JSON.stringify(data),
+ });
+ }
+
+ async clearLlmFeaturePreference(feature: LlmFeatureKey): Promise {
+ return this.fetch(`/api/llm/feature-preferences/${feature}`, {
+ method: 'DELETE',
+ });
+ }
+
+ // ========== Admin LLM policy + oversight ==========
+
+ async getAdminLlmPolicy(): Promise {
+ return this.fetch('/api/admin/llm/policy');
+ }
+
+ async updateAdminLlmPolicy(data: LlmAdminPolicyPatch): Promise {
+ return this.fetch('/api/admin/llm/policy', {
+ method: 'PATCH',
+ body: JSON.stringify(data),
+ });
+ }
+
+ async listAllLlmConnectors(): Promise {
+ return this.fetch('/api/admin/llm/connectors');
+ }
+
+ async revokeAdminLlmConnector(id: number): Promise {
+ return this.fetch(`/api/admin/llm/connectors/${id}/revoke`, { method: 'POST' });
+ }
+
+ async getAdminLlmUsage(days = 30): Promise {
+ return this.fetch(`/api/admin/llm/usage?days=${days}`);
+ }
+
+ /**
+ * Set or clear a connector's monthly token cap (admin-only, issue #339).
+ * Pass `null` to clear the cap (unlimited).
+ */
+ async setAdminLlmConnectorCap(
+ id: number,
+ monthlyTokenCap: number | null,
+ ): Promise {
+ return this.fetch(`/api/admin/llm/connectors/${id}/cap`, {
+ method: 'PATCH',
+ body: JSON.stringify({ monthly_token_cap: monthlyTokenCap }),
+ });
+ }
+
+ // ========== Admin LLM audit trail (issue #341) ==========
+
+ private buildAuditQuery(filters: AdminLlmAuditFilters = {}): URLSearchParams {
+ const params = new URLSearchParams();
+ if (filters.event_type) params.set('event_type', filters.event_type);
+ if (filters.actor_user_id != null) {
+ params.set('actor_user_id', String(filters.actor_user_id));
+ }
+ if (filters.target_connector_id != null) {
+ params.set('target_connector_id', String(filters.target_connector_id));
+ }
+ if (filters.days != null) params.set('days', String(filters.days));
+ if (filters.limit != null) params.set('limit', String(filters.limit));
+ if (filters.offset != null) params.set('offset', String(filters.offset));
+ return params;
+ }
+
+ async getAdminLlmAudit(filters: AdminLlmAuditFilters = {}): Promise {
+ const params = this.buildAuditQuery(filters);
+ return this.fetch(`/api/admin/llm/audit?${params.toString()}`);
+ }
+
+ /**
+ * Download the (filtered) audit trail as a CSV Blob. Pagination params are
+ * ignored server-side for the export — it honors only the filter fields.
+ */
+ async downloadAdminLlmAuditCsv(filters: AdminLlmAuditFilters = {}): Promise {
+ const params = this.buildAuditQuery({
+ event_type: filters.event_type,
+ actor_user_id: filters.actor_user_id,
+ target_connector_id: filters.target_connector_id,
+ days: filters.days,
+ });
+ const response = await this.rawFetch(`/api/admin/llm/audit.csv?${params.toString()}`);
+ return response.blob();
+ }
+
// ========== Kiosk Pairing ==========
async getKioskPairChallenge(): Promise<{ nonce: string; expires_in: number }> {
diff --git a/dashboard/next.config.js b/dashboard/next.config.js
index 44585cf6..185e530c 100644
--- a/dashboard/next.config.js
+++ b/dashboard/next.config.js
@@ -15,6 +15,13 @@ const csp = [
const nextConfig = {
output: 'standalone',
allowedDevOrigins: ['192.168.*.*'],
+ async redirects() {
+ return [
+ // DJ AI connector/model settings moved into the account page (#357).
+ // Keep old bookmarks/links working with a permanent (308) redirect.
+ { source: '/settings/ai', destination: '/account', permanent: true },
+ ];
+ },
async headers() {
return [
{
diff --git a/docs/LLM-PLUGIN.md b/docs/LLM-PLUGIN.md
new file mode 100644
index 00000000..6d03f304
--- /dev/null
+++ b/docs/LLM-PLUGIN.md
@@ -0,0 +1,323 @@
+# LLM Adapter Plug-in Guide
+
+The WrzDJ backend dispatches every LLM call through the **LLM Gateway**, which
+selects a connector for the calling user and routes the request through a
+provider-specific **adapter**. The set of adapters is open: forks and
+third-party deployments can add new providers without modifying any file
+under `server/app/services/llm/`.
+
+This document is the contract that third-party plug-ins write against.
+
+> Companion guide: [`docs/PLUGIN-ARCHITECTURE.md`](PLUGIN-ARCHITECTURE.md)
+> describes the bridge-side equipment plug-in system. The LLM plug-in surface
+> follows the same shape: a small ABC, a registry, and a strict typed-error
+> contract.
+
+## Architecture Overview
+
+```text
+Caller (recommendation engine, agentic feature)
+ │
+ ▼
+Gateway.dispatch(db, actor, request, *, purpose)
+ │ 1. Resolve LlmConnector (per-DJ MRU → org default)
+ │ 2. registry.get_adapter_class(connector_type)
+ │ 3. adapter = cls(connector); await adapter.chat(request)
+ │ 4. Log call + handle fallback policy
+ ▼
+LlmAdapter (your plug-in)
+ │ 1. Parse connector.credentials (encrypted JSON blob)
+ │ 2. Translate ChatRequest → provider-native request
+ │ 3. Translate provider response → ChatResponse
+ │ 4. Map provider errors → typed LlmError subclasses
+ ▼
+Provider HTTP endpoint / SDK
+```
+
+| Layer | File | Responsibility |
+|-------|------|----------------|
+| Adapter | `app/services/llm/adapters/*.py` (built-in) `LLM_PLUGIN_DIR/*.py` (third-party) | Convert between canonical and provider-native shapes; map errors |
+| Registry | `app/services/llm/registry.py` | `connector_type` → adapter class lookup |
+| Tool translation | `app/services/llm/tool_translation.py` | JSON-Schema `ToolSpec` ↔ provider tool/function shape |
+| Gateway | `app/services/llm/gateway.py` | Resolve connector, call adapter, log, handle fallback |
+| Models | `app/models/llm_connector.py` | `LlmConnector` row (encrypted credentials), call log, audit log |
+| Exceptions | `app/services/llm/exceptions.py` | Typed error hierarchy adapters must raise |
+
+The connector row stores credentials as **encrypted JSON** via the
+`EncryptedText` SQLAlchemy column type — accessing
+`connector.credentials` returns the decrypted plaintext blob. Your adapter is
+responsible for parsing that blob.
+
+## The `LlmAdapter` ABC
+
+Defined in [`app/services/llm/base.py`](../server/app/services/llm/base.py).
+
+```python
+class LlmAdapter(ABC):
+ connector_type: str = "" # set on the subclass — registry key
+
+ def __init__(self, connector) -> None:
+ self.connector = connector
+
+ @abstractmethod
+ async def chat(self, request: ChatRequest) -> ChatResponse: ...
+
+ @abstractmethod
+ async def health_check(self) -> None: ...
+```
+
+### Required Class Attribute: `connector_type`
+
+A short, lowercase, snake-case string. The DB column that stores it is 40
+characters; pick something unique and stable (e.g. `mistral_apikey`,
+`groq_apikey`, `local_vllm`). The registry **refuses to bind the same
+`connector_type` to two different classes** — that prevents silent shadowing
+of built-in adapters.
+
+### Required Method: `chat()`
+
+| Property | Contract |
+|----------|----------|
+| Coroutine | Yes — `async def`. The gateway always awaits. |
+| Input | A canonical `ChatRequest`. |
+| Output | A canonical `ChatResponse`. |
+| Errors | One of the typed `LlmError` subclasses (see below). Never a raw HTTP / SDK exception. |
+| Side effects | None other than the upstream network call. Do **not** mutate the connector row. |
+| Logging | Do not log full prompts, completions, or any credential material. |
+
+### Required Method: `health_check()`
+
+Validate the credential against the provider. The gateway calls this from the
+admin "Test connector" path. Returns `None` on success; raises the same typed
+exceptions as `chat()` on failure.
+
+Pattern: issue the cheapest possible call (e.g. `max_tokens=1`). The shared
+helper `build_healthcheck_request()` in
+`app/services/llm/adapters/_httpx_openai.py` is reusable for OpenAI-shaped
+endpoints.
+
+## Canonical Types
+
+Defined in [`app/services/llm/base.py`](../server/app/services/llm/base.py).
+These are **stable** Pydantic models — fields may be added in a minor release
+but never renamed or removed without a major-version bump.
+
+### `ChatRequest`
+
+| Field | Type | Notes |
+|-------|------|-------|
+| `messages` | `list[Message]` | Required. `role ∈ {"system", "user", "assistant", "tool"}`. Tool messages carry `tool_call_id`. |
+| `tools` | `list[ToolSpec] \| None` | JSON-Schema shape. Translate via `tool_translation.to_*_tools()`. |
+| `force_tool` | `str \| None` | Forces a specific tool name; raise `ToolTranslationError` if not in `tools`. |
+| `max_tokens` | `int \| None` | Adapters supply a default if `None`. |
+| `temperature` | `float \| None` | Pass through verbatim when not `None`. |
+| `model` | `str \| None` | Overrides `connector.model_hint`. |
+| `timeout_seconds` | `float \| None` | Adapters MAY clamp to a max. |
+| `system` | `str \| None` | Provider-native system prompt. Map to the right surface (OpenAI: first system message; Anthropic: top-level `system`). |
+| `fallback_policy` | `Literal["none", "org_default", "retry_then_org_default"]` | Handled by the gateway, not the adapter. Ignore. |
+
+### `ChatResponse`
+
+| Field | Type | Notes |
+|-------|------|-------|
+| `text` | `str` | The textual assistant reply. Empty string if the model only emitted tool calls. |
+| `tool_calls` | `list[ToolCall]` | Empty list when no tools were called. |
+| `stop_reason` | `Literal["end_turn", "tool_use", "max_tokens", "error"]` | Required. Map from the provider's native stop reason. |
+| `usage` | `TokenUsage \| None` | Counts only — never prompt content. Optional. |
+| `model` | `str \| None` | Provider-reported model id (for telemetry). Recommended. |
+
+### `ToolSpec`, `ToolCall`, `Message`
+
+See the source. `ToolSpec.input_schema` is a JSON-Schema dict;
+`tool_translation.py` knows how to translate it for OpenAI / Anthropic /
+Bedrock and parse the response back into canonical `ToolCall` objects.
+Reuse those helpers rather than reimplementing them per adapter.
+
+## Exception Contract
+
+Defined in [`app/services/llm/exceptions.py`](../server/app/services/llm/exceptions.py).
+Every error from the adapter must be one of these. The gateway translates
+them into telemetry, audit events, and HTTP response codes; raw provider
+errors **must not** reach the caller (they often contain bearer tokens in
+error messages — a credential-leak vector).
+
+| Exception | When to raise | Status hint |
+|-----------|---------------|-------------|
+| `AuthInvalid` | Credentials are malformed, missing, or rejected (`401`/`403`). Includes "failed to parse the credential JSON". | Marks connector `status="auth_invalid"`; writes audit event. |
+| `RateLimited(retry_after_seconds=...)` | Provider returned `429`. Pass through `Retry-After` if present. | Gateway logs and surfaces as `429` to the caller. |
+| `QuotaExceeded` | Billing failure (`402`) or provider-specific quota error. | Logged, surfaced as `402` to caller. |
+| `ProviderUnavailable` | `5xx`, network failure, timeout, generic SDK error. | Logged, surfaced as `502`. Eligible for fallback. |
+| `ToolTranslationError` | Unable to translate input tools or parse the response. | Logged, surfaced as `502`. **Not** a fallback trigger. |
+| `NoLlmConfigured` | **Gateway-only.** Adapters should not raise this. | – |
+
+### Mapping example (OpenAI HTTP shape)
+
+```python
+status = response.status_code
+if status in (401, 403):
+ raise AuthInvalid(f"Auth failed (HTTP {status})")
+if status == 402:
+ raise QuotaExceeded("Quota or billing failure")
+if status == 429:
+ retry = response.headers.get("retry-after")
+ raise RateLimited("Rate limited", retry_after_seconds=int(float(retry)) if retry else None)
+if 500 <= status < 600:
+ raise ProviderUnavailable(f"Upstream error (HTTP {status})")
+# 4xx other than the above → almost certainly a translation problem.
+raise ToolTranslationError(f"Upstream rejected request (HTTP {status})")
+```
+
+## Tool Translation
+
+The canonical `ToolSpec` is JSON-Schema. Adapters should delegate to
+[`app/services/llm/tool_translation.py`](../server/app/services/llm/tool_translation.py)
+rather than re-implementing the conversion. The module exposes:
+
+| Helper | Direction |
+|--------|-----------|
+| `to_openai_tools(tools, force)` | Canonical → OpenAI `tools` + `tool_choice` |
+| `parse_openai_response(payload)` | OpenAI body → `ChatResponse` |
+| `to_anthropic_tools(tools, force)` | Canonical → Anthropic `tools` + `tool_choice` |
+| `parse_anthropic_response(message)` | Anthropic SDK message → `ChatResponse` |
+| `to_bedrock_tools(tools, force)` | Canonical → Bedrock Converse `toolConfig` |
+| `parse_bedrock_response(payload)` | Bedrock body → `ChatResponse` |
+
+Adding a new translation pair for a provider whose tool shape genuinely
+differs is allowed — open a PR adding helpers under the same naming
+convention. Until then, do not silently re-shape tools inside your adapter.
+
+## Registration
+
+Register the adapter as the **last statement** of your module:
+
+```python
+register_adapter(MyAdapter.connector_type, MyAdapter)
+```
+
+That call:
+
+- Validates the class subclasses `LlmAdapter`.
+- Rejects empty `connector_type`.
+- Rejects double-binding (a different class trying to take an already-bound
+ key — surfaced as `ValueError` at startup).
+
+Re-registering the *same* class is a no-op (safe for test re-imports).
+
+## Loading Third-Party Plug-ins
+
+There are two supported mechanisms:
+
+1. **Import from your own code.** Add the file to your fork of the backend
+ and ensure it gets imported at startup (e.g. add it to the
+ `app/services/llm/registry.py::_bootstrap` block, or import it from
+ `app/main.py`). This is the recommended path for forks.
+
+2. **`LLM_PLUGIN_DIR` env var.** Set the environment variable to a directory
+ path. At startup the loader
+ ([`app/services/llm/plugin_loader.py`](../server/app/services/llm/plugin_loader.py))
+ imports every `*.py` file in that directory (non-recursive; files starting
+ with `_` are skipped). Each plug-in is responsible for calling
+ `register_adapter()` on import. A broken plug-in is logged with a full
+ stack trace and skipped — it does **not** prevent the rest of the directory
+ or the backend itself from starting.
+
+### Security posture for `LLM_PLUGIN_DIR`
+
+Loading a plug-in grants it the **full privileges of the backend process**.
+There is no sandbox; this is the same trust boundary as `pip install`.
+Operators must:
+
+- Treat the plug-in directory as a privileged path. Only the backend's
+ service account should have write access to it.
+- Audit every plug-in's source the same way they would audit a third-party
+ Python dependency.
+- Never set `LLM_PLUGIN_DIR` to a world-writable or multi-tenant path.
+
+In production we recommend leaving `LLM_PLUGIN_DIR` unset and packaging
+trusted plug-ins as ordinary Python modules. The env-var loader exists to
+make local experimentation and forks ergonomic.
+
+## Stable vs Internal API
+
+The plug-in surface is **the surface listed in this document**. Everything
+else under `app/services/llm/` is internal — including helper modules,
+private functions, and adapter base-class internals not enumerated above.
+
+| Surface | Stability |
+|---------|-----------|
+| `LlmAdapter` ABC method signatures (`chat`, `health_check`, `connector_type`) | **Stable.** Breaking change → major version bump. |
+| `ChatRequest`, `ChatResponse`, `Message`, `ToolSpec`, `ToolCall`, `TokenUsage` field names + types | **Stable.** Field additions in minor versions; never renames/removals without a major bump. |
+| Exception types and their constructor signatures | **Stable.** |
+| `register_adapter`, `get_adapter_class`, `list_connector_types`, `is_registered` | **Stable.** |
+| `tool_translation.to_*_tools` / `parse_*_response` | **Stable** for the providers documented above. |
+| `_httpx_openai`, `url_validator`, `connector_storage` | **Internal.** Reuse at your own risk; may change without notice. |
+| `gateway.dispatch` internals (fallback, logging, audit) | **Internal.** Callers must use the public `Gateway.dispatch` entrypoint. |
+| `LlmConnector` ORM model | **Internal.** Adapters touch only `connector.credentials`, `connector.model_hint`, and `connector.base_url_plain`. |
+
+Schema changes to the `LlmConnector` storage shape (encrypted JSON blob keys)
+are versioned by `connector_type`. Each provider chooses its own blob keys
+in its own migration; the only invariant is that **the blob is a JSON object**.
+
+## Test Matrix
+
+Every registered adapter — built-in or third-party — must pass the
+parametrised contract tests in
+[`server/tests/test_llm_adapter_contract.py`](../server/tests/test_llm_adapter_contract.py).
+The contract covers:
+
+1. The class subclasses `LlmAdapter`.
+2. `connector_type` is non-empty and matches the registration key.
+3. `chat` and `health_check` are async callables.
+4. The constructor accepts a connector row without raising.
+5. `chat()` raises `AuthInvalid` (or another `LlmError`) for malformed
+ credential blobs — never a raw `JSONDecodeError`, `KeyError`, or HTTP
+ exception.
+6. The registry returns classes (not instances) and raises `KeyError` on
+ unknown lookups.
+
+Adapter-specific HTTP and parsing behaviour belongs in a separate test file
+(see the built-in adapters' tests in `test_llm_adapters.py` for the pattern).
+
+Run the contract test against your adapter:
+
+```bash
+cd server
+.venv/bin/pytest tests/test_llm_adapter_contract.py
+```
+
+If a contract test fails on your adapter, **fix the adapter** — do not
+modify the contract. The contract is what lets the gateway dispatch
+generically.
+
+## Reference Skeleton
+
+The minimum working adapter lives at
+[`docs/examples/echo_adapter.py`](examples/echo_adapter.py). It is exercised
+by `test_skeleton_echo_adapter_*` in the contract test file, so any change
+that breaks the documented surface fails CI immediately.
+
+## Adding a Plug-in in 5 Minutes
+
+```bash
+# 1. Copy the skeleton.
+cp docs/examples/echo_adapter.py /opt/wrzdj/llm_plugins/mistral_apikey.py
+
+# 2. Edit it:
+# - Change `connector_type` to a unique value (e.g. "mistral_apikey").
+# - Replace the echo body with your provider call.
+# - Map provider errors to the typed exceptions.
+
+# 3. Point the backend at the plug-in directory.
+export LLM_PLUGIN_DIR=/opt/wrzdj/llm_plugins
+uvicorn app.main:app
+
+# 4. Verify the registry sees it.
+python -c "from app.services.llm.registry import list_connector_types; print(list_connector_types())"
+
+# 5. Run the contract tests.
+cd server && .venv/bin/pytest tests/test_llm_adapter_contract.py
+```
+
+Once your adapter is registered, DJs can create a connector row via
+`POST /api/llm/connectors` with `connector_type="mistral_apikey"` and the
+gateway will route their requests through your adapter automatically.
diff --git a/docs/examples/echo_adapter.py b/docs/examples/echo_adapter.py
new file mode 100644
index 00000000..40930948
--- /dev/null
+++ b/docs/examples/echo_adapter.py
@@ -0,0 +1,174 @@
+"""Echo adapter — minimal reference implementation of ``LlmAdapter``.
+
+This skeleton is the canonical "blank slate" for third-party LLM provider
+plug-ins. It implements the full :class:`~app.services.llm.base.LlmAdapter`
+contract without making any network calls — every request is echoed back as
+the assistant message body.
+
+Usage in tests::
+
+ # Self-test against the contract — no production import.
+ from docs.examples import echo_adapter # noqa: F401 (side-effect: register)
+ from app.services.llm.registry import get_adapter_class
+
+ cls = get_adapter_class("echo")
+ response = await cls(connector).chat(request)
+
+Usage in production (third-party plug-ins)::
+
+ # 1. Copy this file under any module path you control.
+ # 2. Customize ``connector_type`` and the body of ``chat()``.
+ # 3. Either:
+ # a) drop the .py file into the directory pointed to by ``LLM_PLUGIN_DIR``,
+ # or
+ # b) import the module from your own bootstrap code at startup.
+ # 4. The :func:`register_adapter` call at the bottom binds the class to the
+ # registry the moment the module is imported.
+
+See ``docs/LLM-PLUGIN.md`` for the full extension contract.
+
+Security note: this skeleton intentionally does not validate or sanitise the
+input it echoes. Real adapters must:
+- Treat ``connector.credentials`` as untrusted (the encrypted blob can be
+ malformed; raise :class:`AuthInvalid` rather than letting :class:`json.JSONDecodeError`
+ bubble up).
+- Translate upstream HTTP/SDK errors into the typed exception hierarchy
+ (``AuthInvalid`` / ``RateLimited`` / ``QuotaExceeded`` / ``ProviderUnavailable``
+ / ``ToolTranslationError``). Raw provider errors must not reach the caller.
+- Never log secrets, full prompts, or completion bodies (the gateway only
+ logs counts).
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import Any
+
+from app.services.llm.base import (
+ ChatRequest,
+ ChatResponse,
+ ContentBlock,
+ LlmAdapter,
+ Message,
+ TokenUsage,
+)
+from app.services.llm.exceptions import AuthInvalid
+from app.services.llm.registry import register_adapter
+
+logger = logging.getLogger(__name__)
+
+
+class EchoAdapter(LlmAdapter):
+ """An adapter that echoes the last user message back as the assistant reply.
+
+ Useful for:
+ - Wiring tests for the gateway / connector storage layer end-to-end
+ without depending on a live provider.
+ - Showing third-party plug-in authors the minimum required surface.
+ """
+
+ # The registry key for this adapter. Plug-in authors must change this to a
+ # unique string before publishing — the registry refuses to register two
+ # different classes under the same ``connector_type``.
+ connector_type = "echo"
+
+ # ------------------------------------------------------------------
+ # Credential handling
+ # ------------------------------------------------------------------
+ def _read_credentials(self) -> dict[str, Any]:
+ """Parse the encrypted credential blob, raising AuthInvalid on failure.
+
+ The :class:`~app.models.llm_connector.LlmConnector` row stores
+ credentials as an encrypted JSON string. Accessing ``self.connector.credentials``
+ triggers decryption transparently via the ``EncryptedText`` column
+ type. After that, parsing is the adapter's responsibility — and every
+ failure mode here must surface as :class:`AuthInvalid` so the gateway
+ can mark the connector and emit a clean audit event.
+ """
+ raw = self.connector.credentials or ""
+ try:
+ blob = json.loads(raw)
+ except (json.JSONDecodeError, TypeError) as exc:
+ raise AuthInvalid("Connector credentials are malformed") from exc
+ if not isinstance(blob, dict):
+ raise AuthInvalid("Connector credentials shape is invalid")
+ return blob
+
+ # ------------------------------------------------------------------
+ # LlmAdapter — required methods
+ # ------------------------------------------------------------------
+ async def chat(self, request: ChatRequest) -> ChatResponse:
+ """Echo the most recent user message back as the assistant reply.
+
+ Real adapters should:
+ - Translate ``request.messages`` to the provider's native message shape.
+ - Call ``to__tools(request.tools, request.force_tool)`` from
+ ``app.services.llm.tool_translation`` to translate tools.
+ - Call ``parse__response(...)`` from that same module to
+ translate the response back to ``ChatResponse``.
+ - Map provider HTTP / SDK errors to the typed exception hierarchy.
+ """
+ # We deliberately read credentials before doing any echoing — that way
+ # this skeleton exercises the same boundary (malformed creds raise
+ # AuthInvalid) that real adapters depend on.
+ self._read_credentials()
+
+ last_user = next(
+ (m for m in reversed(request.messages) if m.role == "user"),
+ None,
+ )
+ if last_user is None:
+ text = ""
+ else:
+ text = _flatten_message_text(last_user)
+
+ return ChatResponse(
+ text=text,
+ tool_calls=[],
+ stop_reason="end_turn",
+ usage=TokenUsage(prompt=len(text.split()), completion=len(text.split())),
+ # Surface the resolved model name (request override → connector hint
+ # → adapter default) so call logs and recommendation telemetry stay
+ # accurate. Real adapters should set this to the *provider-reported*
+ # model id from the response payload, not the requested model.
+ model=request.model or self.connector.model_hint or "echo-1",
+ )
+
+ async def health_check(self) -> None:
+ """Validate the credential without exercising the (nonexistent) provider.
+
+ Real adapters should issue a cheap, low-token call (e.g. ``max_tokens=1``)
+ and raise the same typed exceptions as :meth:`chat`.
+ """
+ # No provider to ping — the credential parse step is enough proof that
+ # the connector is wired correctly.
+ self._read_credentials()
+
+
+def _flatten_message_text(msg: Message) -> str:
+ """Collapse a possibly-multi-block message to plain text.
+
+ Real provider adapters typically keep the block structure; this skeleton
+ flattens because a string return matches the simplest possible echo.
+ """
+ content = msg.content
+ if isinstance(content, str):
+ return content
+ parts: list[str] = []
+ for block in content:
+ if isinstance(block, ContentBlock):
+ parts.append(block.text)
+ elif isinstance(block, dict):
+ parts.append(block.get("text") or "")
+ return "".join(parts)
+
+
+# The registry call here is what makes the skeleton "live" — importing this
+# module registers the adapter under the ``connector_type`` declared above.
+#
+# Third-party plug-ins follow the same pattern. The registry refuses to bind
+# the same ``connector_type`` to two different classes, so plug-in authors
+# must pick a unique value (the ``LlmConnector.connector_type`` column is 40
+# chars; keep it short, lowercase, snake-case, e.g. ``mistral_apikey``).
+register_adapter(EchoAdapter.connector_type, EchoAdapter)
diff --git a/docs/superpowers/plans/2026-05-25-llm-audit-trail-admin-ui.md b/docs/superpowers/plans/2026-05-25-llm-audit-trail-admin-ui.md
new file mode 100644
index 00000000..51474357
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-25-llm-audit-trail-admin-ui.md
@@ -0,0 +1,62 @@
+# LLM Audit-Trail Admin UI (#341) Implementation Plan
+
+> **For agentic workers:** Steps use checkbox (`- [ ]`) syntax for tracking. TDD throughout.
+
+**Goal:** Add an admin-only browse/filter/export UI for the existing `llm_audit_event` table on the `/admin/ai` page.
+
+**Architecture:** New read-only backend endpoints on `admin_llm.py` (`GET /api/admin/llm/audit` paginated JSON + `GET /api/admin/llm/audit.csv` streaming CSV), both joining actor username + target connector display name (never credentials). New Pydantic schemas. New API-client methods + a new "Audit trail" card section on the existing `/admin/ai` page (the page uses cards as sections — no tab component exists).
+
+**Tech Stack:** FastAPI, SQLAlchemy 2.0, slowapi, Pydantic v2, Next.js/React 19 + vanilla CSS, vitest.
+
+**Scope fences:** Edit only `server/app/api/admin_llm.py`, `server/app/schemas/llm.py`, `server/tests/*`, `dashboard/app/admin/ai/page.tsx` (+ `__tests__`), `dashboard/lib/api.ts` (add-only), `dashboard/lib/api-types.ts` (add-only). NO migration. READ-ONLY on `llm_audit_event`.
+
+---
+
+## Task 1: Backend schemas + paginated audit endpoint
+
+**Files:**
+- Modify: `server/app/schemas/llm.py` (add `AuditEventRow`, `AdminAuditOut`)
+- Modify: `server/app/api/admin_llm.py` (add `GET /audit`)
+- Test: `server/tests/test_llm_admin_audit.py`
+
+- [ ] Step 1: Write failing tests covering: basic list (admin), 403 for non-admin, filter by event_type, filter by actor_user_id, filter by target_connector_id, days window, pagination (limit/offset + total), joined actor_username + target_connector_display_name, no credentials leaked.
+- [ ] Step 2: Run → FAIL (404 / no endpoint).
+- [ ] Step 3: Add schemas + endpoint. Query `LlmAuditEvent` left-joined to User (actor) and LlmConnector (target). Filters all optional. `days` default 30, range 1..3650. limit 1..200 default 50, offset >=0. Return rows newest-first + `total`.
+- [ ] Step 4: Run → PASS.
+- [ ] Step 5: Commit.
+
+## Task 2: CSV export endpoint
+
+**Files:**
+- Modify: `server/app/api/admin_llm.py` (add `GET /audit.csv`)
+- Test: `server/tests/test_llm_admin_audit.py`
+
+- [ ] Step 1: Write failing tests: CSV content-type + header row + a data row; honors event_type filter; 403 non-admin; cap rows.
+- [ ] Step 2: Run → FAIL.
+- [ ] Step 3: Implement StreamingResponse with `csv` module; same filter helper as Task 1; cap at 10000 rows. Columns: timestamp, actor, event_type, target_connector, notes (notes column reserved/empty — schema has no notes field; emit blank to honor issue's column list).
+- [ ] Step 4: Run → PASS.
+- [ ] Step 5: Commit.
+
+## Task 3: Frontend API client + types
+
+**Files:**
+- Modify: `dashboard/lib/api-types.ts` (add `LlmAdminAudit`, `LlmAuditRow`)
+- Modify: `dashboard/lib/api.ts` (add `getAdminLlmAudit`, `getAdminLlmAuditCsvUrl`/download helper)
+- Regenerate: `dashboard/lib/api-types.generated.ts` via `npm run types:export && npm run types:generate`
+
+- [ ] Step 1: Regenerate OpenAPI types so new schemas appear.
+- [ ] Step 2: Add manual aliases + client methods.
+- [ ] Step 3: tsc passes.
+- [ ] Step 4: Commit.
+
+## Task 4: Audit trail card on /admin/ai page + tests
+
+**Files:**
+- Modify: `dashboard/app/admin/ai/page.tsx`
+- Test: `dashboard/app/admin/ai/__tests__/page.test.tsx`
+
+- [ ] Step 1: Write failing test: renders "Audit trail" heading + a seeded row; filter inputs present; export button present.
+- [ ] Step 2: Run → FAIL.
+- [ ] Step 3: Implement card: filters (event type select, actor, target connector, days), table (timestamp, actor, event type, connector, notes), pagination (prev/next), CSV export button.
+- [ ] Step 4: Run → PASS. Full frontend CI.
+- [ ] Step 5: Commit.
diff --git a/docs/superpowers/plans/2026-05-26-move-dj-ai-settings-to-account.md b/docs/superpowers/plans/2026-05-26-move-dj-ai-settings-to-account.md
new file mode 100644
index 00000000..72ae4cd0
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-26-move-dj-ai-settings-to-account.md
@@ -0,0 +1,96 @@
+# Move DJ AI connector/model settings into the account page Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Relocate the DJ-facing AI connector UI (connect/test/rotate/delete, model hint, Hermes onboarding) from `/settings/ai` into the existing `/account` page, redirect the old route, and update tests — keeping the admin `/admin/ai` UI untouched.
+
+**Architecture:** Extract the existing `/settings/ai` page body into a reusable client component `components/AiProvidersSection.tsx`. Render it as a third inline card section inside `/account`. Delete the old `/settings/ai` route and add a server-side redirect in `next.config.js` so bookmarks 308 to `/account`. Preserve fail-closed policy behavior verbatim (it moves with the component).
+
+**Tech Stack:** Next.js 16 (App Router), React 19, TypeScript (strict), vanilla CSS + inline styles, Vitest + Testing Library.
+
+---
+
+### Task 1: Extract AI providers UI into a reusable component
+
+**Files:**
+- Create: `dashboard/components/AiProvidersSection.tsx`
+- Reference (source of logic): `dashboard/app/(dj)/settings/ai/page.tsx`
+
+The component contains ALL connector logic from the current page: policy fetch (`fetchPolicySoft` → `getLlmPolicy`), `allowedTypes` fail-closed memo, connectors list, create form (all provider types incl. bedrock/azure/openai_compatible/openrouter dropdown), test, delete. It must NOT include the page-level `` wrapper, the "← Dashboard" link header, the `useAuth`/`useRouter` auth-redirect (those stay at the page level — `/account` already does the auth gate). It exports a default React component `AiProvidersSection` rendering a `` that begins with an `
AI / Model providers
` and the existing intro paragraph, then "Connected providers" and the add-provider form.
+
+- [ ] **Step 1: Create the component** by moving the body. Keep every form field, label text (e.g. `Provider`, `Display name`, `API key`, `Resource name`, `Bedrock model ID`, `Model (optional)`), the OpenRouter model fetch effect, and the fail-closed `allowedTypes` logic identical so existing test assertions still hold. The top of the rendered output is an intro `
` + `
`; the rest is the two ``s. Wrap all of it in a single fragment/section with `style={{ marginTop: '2rem' }}` matching the account-page card rhythm (it will live inside its own card in Task 2, so use a plain wrapper, not a `.card`).
+
+- [ ] **Step 2: Type-check** — `cd dashboard && npx tsc --noEmit`. Expected: PASS.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add dashboard/components/AiProvidersSection.tsx
+git commit -m "refactor(ai): extract AI providers UI into reusable component"
+```
+
+---
+
+### Task 2: Render the AI section inside /account and delete old route
+
+**Files:**
+- Modify: `dashboard/app/(dj)/account/page.tsx`
+- Modify: `dashboard/next.config.js` (add `redirects()`)
+- Delete: `dashboard/app/(dj)/settings/ai/page.tsx`
+- Delete: `dashboard/app/(dj)/settings/ai/__tests__/page.test.tsx` (logic re-tested via component in Task 3)
+- Delete dir if empty: `dashboard/app/(dj)/settings/`
+
+- [ ] **Step 1: Import and render** `AiProvidersSection` in `/account`. Add a third card `
` (same wrapper style as Change Email card: `{ background: 'var(--card)', borderRadius: '0.75rem', padding: '1.5rem', marginTop: '1.5rem' }`) below Change Email, containing ``. Widen the page `` maxWidth from `480px` to `720px` so the provider form (which used `720px`) is not cramped.
+
+- [ ] **Step 2: Add redirect** in `next.config.js`:
+
+```js
+async redirects() {
+ return [
+ { source: '/settings/ai', destination: '/account', permanent: true },
+ ];
+},
+```
+
+- [ ] **Step 3: Delete** the old route file, its test, and the now-empty `settings/` dir.
+
+- [ ] **Step 4: Grep** `grep -rn "/settings/ai" dashboard/ --include="*.ts" --include="*.tsx" | grep -v node_modules` → expect no remaining nav/link hits (only possibly api-types doc comments, which are fine).
+
+- [ ] **Step 5: Type-check + lint** — `cd dashboard && npx tsc --noEmit && npm run lint`. Expected: PASS.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add dashboard/app/\(dj\)/account/page.tsx dashboard/next.config.js
+git add -u dashboard/app/\(dj\)/settings
+git commit -m "feat(ai): move DJ AI settings into account page; redirect old route (#357)"
+```
+
+---
+
+### Task 3: Move/adapt the AI tests to the component + account page
+
+**Files:**
+- Create: `dashboard/components/__tests__/AiProvidersSection.test.tsx` (port the old settings/ai tests, importing the component instead of the page; drop the `next/navigation`/`useAuth` mocks that the page-level no longer needs but keep `next/link` mock if used)
+- Modify: `dashboard/app/(dj)/account/__tests__/page.test.tsx` (add the AI api methods to the `@/lib/api` mock so the section can mount inside the account page without throwing, and assert the AI heading renders)
+
+- [ ] **Step 1: Port connector tests** to `AiProvidersSection.test.tsx` — same assertions (lists connectors, fail-closed hides providers, policy filtering, azure/bedrock/openrouter fields, test, delete). Render `` directly. Keep `vi.mock('next/link', ...)` if the component still uses `Link` (it should NOT — Link header stays on the page; remove the import). Mock `@/lib/api` methods used: `listLlmConnectors`, `getLlmPolicy`, `createLlmConnector`, `testLlmConnector`, `deleteLlmConnector`, `listOpenRouterModels`, `getAdminLlmPolicy` (for the "reads DJ-scoped not admin" test).
+
+- [ ] **Step 2: Update account page test** — extend the existing `vi.mock('@/lib/api', ...)` to add `listLlmConnectors: () => Promise.resolve([])` and `getLlmPolicy: () => Promise.reject(new Error('x'))` (fail-closed, no extra UI). Add a test: AI heading `AI / Model providers` is in the document.
+
+- [ ] **Step 3: Run frontend tests** — `cd dashboard && npm test -- --run`. Expected: PASS, coverage thresholds met.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add dashboard/components/__tests__/AiProvidersSection.test.tsx dashboard/app/\(dj\)/account/__tests__/page.test.tsx
+git commit -m "test(ai): relocate AI provider tests to component + account page (#357)"
+```
+
+---
+
+## Self-Review
+
+- Spec coverage: relocate UI (Task 1+2) ✓; update nav/links (Task 2 grep — only the page itself referenced it) ✓; redirect old route (Task 2) ✓; admin /admin/ai untouched (not touched by any task) ✓; tests moved (Task 3) ✓; fail-closed preserved (logic moved verbatim, retested) ✓.
+- Placeholder scan: none.
+- Type consistency: component name `AiProvidersSection` used consistently in Tasks 1–3.
diff --git a/docs/superpowers/plans/2026-05-26-remove-deprecated-anthropic-env-reads.md b/docs/superpowers/plans/2026-05-26-remove-deprecated-anthropic-env-reads.md
new file mode 100644
index 00000000..a09d7160
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-26-remove-deprecated-anthropic-env-reads.md
@@ -0,0 +1,106 @@
+# Remove deprecated ANTHROPIC_API_KEY env-var reads Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Remove the now-dead legacy `ANTHROPIC_API_KEY` env-var fallback path in the recommendation engine, since the LLM Gateway connector system has been the source of truth for credentials since the MVP.
+
+**Architecture:** Every production caller of `call_llm` / `generate_llm_suggestions` passes `db` + `actor`, so the gateway path always runs and the `_legacy_call` direct-Anthropic fallback (and its `anthropic_api_key` / `anthropic_max_tokens` / `anthropic_timeout_seconds` config reads) is dead code. We delete that fallback, the unused config fields, and refresh the legacy unit test to drive the gateway path instead. We deliberately KEEP `config.anthropic_api_key` and `config.anthropic_model` because the admin AI-settings/model-listing endpoints and the recommendation response `llm_model` default still read them — removing those is a cross-cutting frontend+API-contract change out of scope for this backend cleanup.
+
+**Tech Stack:** Python 3.11+, FastAPI, pydantic-settings, pytest.
+
+---
+
+## Design decisions (scope reconciliation)
+
+The issue's literal grep target is the **uppercase env-var name** `ANTHROPIC_API_KEY`. In non-test code that string appears only in:
+- `server/alembic/versions/046_admin_ai_oauth.py` — historical one-shot data migration. **MUST stay** (allowable exception).
+- `server/app/services/recommendation/llm_hooks.py:78` — a docstring mention of the dead fallback. **Removed** here.
+
+The actual env-var *reads* go through the pydantic-settings attribute `config.anthropic_api_key` (lowercase). Mapping every read:
+
+| Location | What it does | Decision |
+|---|---|---|
+| `llm_client._legacy_call` | direct-Anthropic fallback when `db is None` | **REMOVE** — dead; all callers pass `db` |
+| `llm_client._resolve_max_tokens` | reads `anthropic_max_tokens` for gateway `ChatRequest.max_tokens` | **KEEP the cap, drop the config dependency** — inline the `1024` default |
+| `llm_hooks.is_llm_available` final fallback | `bool(get_settings().anthropic_api_key)` | **REMOVE** — gateway connector check is authoritative |
+| `admin._list_anthropic_models` / `/ai/settings` | live admin observability of the legacy key | **KEEP** — powers admin UI + API contract + frontend tests; out of scope |
+| `events.py:986` | `result.llm_model or get_settings().anthropic_model` display default | **KEEP** — `anthropic_model` is a model-name default, not a credential fallback |
+
+Config fields:
+- `anthropic_max_tokens`, `anthropic_timeout_seconds` → **REMOVE** (only the deleted `_legacy_call` / `_resolve_max_tokens` used them).
+- `anthropic_api_key`, `anthropic_model` → **KEEP** (still read by admin + events display).
+
+---
+
+## File Structure
+
+- `server/app/services/recommendation/llm_client.py` — delete `_legacy_call`, the `AsyncAnthropic` import, the `db is None` branch; inline max-tokens default.
+- `server/app/services/recommendation/llm_hooks.py` — drop the `db is None` env-var fallback and the docstring `ANTHROPIC_API_KEY` mention; tighten `is_llm_available` to require `db`.
+- `server/app/core/config.py` — remove `anthropic_max_tokens`, `anthropic_timeout_seconds`.
+- `server/tests/test_llm_client.py` — replace the `AsyncAnthropic`-patching legacy tests with gateway-path tests.
+- `server/tests/test_llm_hooks.py` — drop the env-var-availability assertions.
+- `.env.example` — drop the deprecated `ANTHROPIC_*` lines (keep nothing that's dead).
+- `CLAUDE.md` — update the Environment section + LLM Gateway note.
+
+---
+
+### Task 1: Remove the dead `_legacy_call` fallback in `llm_client.py`
+
+**Files:**
+- Modify: `server/app/services/recommendation/llm_client.py`
+- Test: `server/tests/test_llm_client.py`
+
+- [ ] **Step 1: Rewrite `TestCallLLM` to drive the gateway path**
+
+Replace the two `AsyncAnthropic`-patching tests with tests that pass a fake `db` and patch `Gateway.dispatch`, asserting the parse + trim behavior.
+
+- [ ] **Step 2: Run to verify they fail** (`call_llm` still has the `db is None` branch / `Gateway` not yet the sole path)
+
+Run: `.venv/bin/pytest tests/test_llm_client.py -q`
+
+- [ ] **Step 3: Edit `llm_client.py`**
+ - Remove `from anthropic import AsyncAnthropic`.
+ - Remove the `if db is None: result = await _legacy_call(...)` branch — make the gateway path unconditional; raise/parse via gateway always.
+ - Delete `_legacy_call`.
+ - Replace `_resolve_max_tokens()` body to return a module constant default (`DEFAULT_MAX_TOKENS = 1024`) instead of `get_settings().anthropic_max_tokens`.
+ - Remove the now-unused `get_settings` import if nothing else uses it.
+
+- [ ] **Step 4: Run tests** — `.venv/bin/pytest tests/test_llm_client.py -q` → PASS
+
+- [ ] **Step 5: Commit**
+
+### Task 2: Tighten `is_llm_available` in `llm_hooks.py`
+
+**Files:**
+- Modify: `server/app/services/recommendation/llm_hooks.py`
+- Test: `server/tests/test_llm_hooks.py`
+
+- [ ] **Step 1: Update `test_llm_hooks.py`** — remove the two assertions that `is_llm_available()` (no db) keys off `anthropic_api_key`; keep/adjust the db-based connector tests. `is_llm_available()` with no db now returns `False`.
+- [ ] **Step 2: Run to verify fail.**
+- [ ] **Step 3: Edit `llm_hooks.py`** — drop the final `bool(get_settings().anthropic_api_key)` fallback (both the `db is not None` tail and the no-db return → `False`); remove the `ANTHROPIC_API_KEY` docstring bullet and the `db is None` env-var sentence in `generate_llm_suggestions`; remove the now-unused `get_settings` import.
+- [ ] **Step 4: Run tests** → PASS.
+- [ ] **Step 5: Commit.**
+
+### Task 3: Remove dead config fields
+
+**Files:**
+- Modify: `server/app/core/config.py`
+
+- [ ] **Step 1: Remove `anthropic_max_tokens` and `anthropic_timeout_seconds`** from the `Settings` class. Keep `anthropic_api_key` and `anthropic_model` (still used by admin + events).
+- [ ] **Step 2: Grep** `grep -rn "anthropic_max_tokens\|anthropic_timeout" server/app` → zero hits.
+- [ ] **Step 3: Commit.**
+
+### Task 4: Docs + env example
+
+**Files:**
+- Modify: `.env.example`, `CLAUDE.md`
+
+- [ ] **Step 1: `.env.example`** — remove the deprecated `ANTHROPIC_API_KEY` / `ANTHROPIC_MODEL` / `ANTHROPIC_MAX_TOKENS` / `ANTHROPIC_TIMEOUT_SECONDS` lines and rewrite the surrounding comment to state credentials are connector-only.
+- [ ] **Step 2: `CLAUDE.md`** — update the Anthropic env-var line in the Environment section and the LLM Gateway note (legacy fallback removed).
+- [ ] **Step 3: Commit.**
+
+### Task 5: Full backend CI + acceptance grep
+
+- [ ] `cd server && .venv/bin/ruff check . && .venv/bin/ruff format --check . && .venv/bin/bandit -r app -c pyproject.toml -q && .venv/bin/pytest --tb=short -q`
+- [ ] `grep -rn "ANTHROPIC_API_KEY" server/ | grep -v /tests/` → only the alembic migration hits remain.
+- [ ] `.venv/bin/alembic upgrade head && .venv/bin/alembic check` (config field removal must not drift).
diff --git a/docs/superpowers/plans/2026-05-26-sse-stream-pooled-db-connection-leak.md b/docs/superpowers/plans/2026-05-26-sse-stream-pooled-db-connection-leak.md
new file mode 100644
index 00000000..aa35419b
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-26-sse-stream-pooled-db-connection-leak.md
@@ -0,0 +1,333 @@
+# Fix SSE Stream Pooled DB Connection Leak Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Stop the public SSE `event_stream` endpoint from pinning a pooled DB connection for the entire (potentially unbounded) lifetime of an EventSource connection, which exhausts the pool (size 5 + overflow 10 = 15) under modest guest load.
+
+**Architecture:** Remove the `db: Session = Depends(get_db)` request-scoped dependency from `event_stream`. Run the one-shot existence/auth check inside a short-lived `with SessionLocal() as s:` block that is fully closed (connection returned to the pool) BEFORE the `EventSourceResponse` is returned. The async generator currently performs no per-tick DB access, so it opens no session; if future per-tick DB access is needed it must open its own short-lived `SessionLocal()` session. Existence/auth error responses (404 unknown, 410 archived/expired) are preserved exactly.
+
+**Tech Stack:** FastAPI, SQLAlchemy 2.0 (QueuePool), sse-starlette, pytest.
+
+---
+
+### Task 1: Regression test proving idle SSE streams hold ~0 pooled DB connections
+
+**Files:**
+- Test: `server/tests/test_sse_pool.py` (create)
+
+The existing `client`/`db` fixtures override `get_db` with a single shared `StaticPool` SQLite session, so they cannot measure the production `QueuePool`. This test exercises the real `event_stream` endpoint function directly against a real `SessionLocal`-backed engine, asserting the function returns (existence check done) with the pool fully checked back in, and that the returned generator can be opened/closed without checking out a connection.
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+"""Regression test for issue #356 — SSE event_stream must NOT pin a pooled
+DB connection for the lifetime of the stream.
+
+Before the fix, event_stream declared `db: Session = Depends(get_db)`, so
+FastAPI held the session (and its checked-out QueuePool connection) open
+until the request finished — which for an EventSource never happens while
+the browser holds it open. ~15 concurrent guest viewers exhausted the pool.
+
+These tests bypass the conftest StaticPool override and drive a real
+QueuePool engine so engine.pool.checkedout() is meaningful.
+"""
+
+import asyncio
+from datetime import timedelta
+
+import pytest
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+from starlette.requests import Request as StarletteRequest
+
+from app.core.time import utcnow
+from app.models.base import Base
+from app.models.user import User
+from app.models.event import Event
+from app.services.auth import get_password_hash
+
+
+@pytest.fixture()
+def pooled_engine(monkeypatch):
+ """A real file-backed SQLite engine using QueuePool (default), so
+ engine.pool.checkedout() reflects actual checked-out connections.
+
+ Patches app.db.session.SessionLocal AND the name already imported into
+ app.api.sse so the endpoint resolves our pooled session factory.
+ """
+ import app.db.session as db_session
+ import app.api.sse as sse_module
+
+ engine = create_engine(
+ "sqlite:///file:sse_pool_test?mode=memory&cache=shared&uri=true",
+ pool_size=5,
+ max_overflow=10,
+ )
+ Base.metadata.create_all(bind=engine)
+ TestSession = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+ monkeypatch.setattr(db_session, "SessionLocal", TestSession)
+ monkeypatch.setattr(sse_module, "SessionLocal", TestSession, raising=False)
+
+ # Seed an active event using a short-lived session.
+ with TestSession() as s:
+ user = User(
+ username="pooluser",
+ password_hash=get_password_hash("poolpassword123"),
+ role="dj",
+ )
+ s.add(user)
+ s.commit()
+ s.refresh(user)
+ evt = Event(
+ code="POOL01",
+ join_code="POOLJN",
+ name="Pool Event",
+ created_by_user_id=user.id,
+ expires_at=utcnow() + timedelta(hours=6),
+ )
+ s.add(evt)
+ s.commit()
+
+ try:
+ yield engine, TestSession
+ finally:
+ Base.metadata.drop_all(bind=engine)
+ engine.dispose()
+
+
+def _make_request(code: str) -> StarletteRequest:
+ """Minimal ASGI scope for a GET that reports as connected."""
+ scope = {
+ "type": "http",
+ "method": "GET",
+ "path": f"/api/public/events/{code}/stream",
+ "headers": [],
+ "query_string": b"",
+ }
+
+ async def receive(): # pragma: no cover - never drained in these tests
+ return {"type": "http.disconnect"}
+
+ return StarletteRequest(scope, receive)
+
+
+def test_event_stream_returns_with_pool_checked_in(pooled_engine):
+ """After event_stream() returns, the existence-check connection must be
+ back in the pool (checkedout() == 0)."""
+ from app.api.sse import event_stream
+
+ engine, _ = pooled_engine
+ assert engine.pool.checkedout() == 0
+
+ req = _make_request("POOLJN")
+ response = asyncio.run(event_stream(code="POOLJN", request=req))
+
+ # EventSourceResponse created, generator not yet iterated.
+ assert engine.pool.checkedout() == 0
+
+
+def test_n_concurrent_idle_streams_hold_zero_pool_connections(pooled_engine):
+ """N concurrent open (idle) SSE streams must hold ~0 pooled connections.
+
+ Open N generators (past pool_size + max_overflow = 15), prime each one
+ tick so the generator body is actively suspended on queue.get(), then
+ assert the pool has 0 checked-out connections. Before the fix this would
+ be N (one pinned per stream) and would TimeoutError past 15.
+ """
+ from app.api.sse import event_stream
+
+ engine, _ = pooled_engine
+ n = 25 # well past pool capacity (15)
+
+ async def drive():
+ generators = []
+ for _ in range(n):
+ req = _make_request("POOLJN")
+ resp = await event_stream(code="POOLJN", request=req)
+ gen = resp.body_iterator
+ generators.append(gen)
+
+ # Prime each generator one step so it subscribes and suspends on
+ # queue.get(); give the event loop a tick to settle.
+ primer_tasks = [asyncio.ensure_future(g.__anext__()) for g in generators]
+ await asyncio.sleep(0.05)
+
+ checked_out = engine.pool.checkedout()
+
+ # Cancel the primers and close generators to release subscriptions.
+ for t in primer_tasks:
+ t.cancel()
+ for g in generators:
+ await g.aclose()
+
+ return checked_out
+
+ checked_out = asyncio.run(drive())
+ assert checked_out == 0, (
+ f"Expected 0 pooled connections held by {n} idle SSE streams, "
+ f"got {checked_out} — the stream is pinning DB connections."
+ )
+
+
+def test_event_stream_preserves_404_for_unknown_event(pooled_engine):
+ """Existence check must still reject unknown codes with 404."""
+ from fastapi import HTTPException
+
+ from app.api.sse import event_stream
+
+ req = _make_request("NOEXIS")
+ with pytest.raises(HTTPException) as exc:
+ asyncio.run(event_stream(code="NOEXIS", request=req))
+ assert exc.value.status_code == 404
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run (from `server/`): `.venv/bin/pytest tests/test_sse_pool.py -v`
+Expected: `test_event_stream_returns_with_pool_checked_in` raises `TypeError` because `event_stream` still requires the `db` parameter (FastAPI `Depends` default is not auto-injected when calling the function directly), and/or the pool assertions fail. RED.
+
+- [ ] **Step 3: Implement the fix in `server/app/api/sse.py`**
+
+Remove the `db: Session = Depends(get_db)` parameter. Import `SessionLocal`. Run the existence check in a short-lived session closed before returning.
+
+```python
+"""SSE streaming endpoint for real-time event updates (no authentication required)."""
+
+import asyncio
+import json
+import logging
+from typing import Any
+
+from fastapi import APIRouter, HTTPException, Request
+from sse_starlette.sse import EventSourceResponse
+
+from app.core.rate_limit import limiter
+from app.db.session import SessionLocal
+from app.services.event import EventLookupResult, get_event_by_join_code_with_status
+from app.services.event_bus import get_event_bus
+
+logger = logging.getLogger(__name__)
+router = APIRouter()
+
+DISCONNECT_CHECK_INTERVAL = 15 # seconds
+
+
+async def _event_generator(
+ request: Request,
+ event_code: str,
+) -> Any:
+ """Yield SSE events for a given event code until the client disconnects.
+
+ Keepalive pings are handled by sse-starlette's built-in ping task (every 15s).
+ This generator only yields actual events. The timeout on queue.get() lets us
+ periodically check for client disconnect without blocking forever.
+
+ NOTE (issue #356): this generator deliberately holds NO DB session. If a
+ future change needs per-tick DB access it MUST open its own short-lived
+ `with SessionLocal() as s:` session per tick and close it before awaiting —
+ never hold a pooled connection across the stream lifetime.
+ """
+ bus = get_event_bus()
+ queue = bus.subscribe(event_code)
+ try:
+ while True:
+ if await request.is_disconnected():
+ break
+ try:
+ message = await asyncio.wait_for(queue.get(), timeout=DISCONNECT_CHECK_INTERVAL)
+ yield {
+ "event": message["event"],
+ "data": json.dumps(message["data"]),
+ }
+ except TimeoutError:
+ # No event received — loop to check is_disconnected()
+ continue
+ finally:
+ bus.unsubscribe(event_code, queue)
+
+
+@router.get("/events/{code}/stream")
+@limiter.limit("10/minute")
+async def event_stream(
+ code: str,
+ request: Request,
+) -> EventSourceResponse:
+ """Public SSE endpoint for real-time event updates.
+
+ SECURITY (CRIT-5): rate-limited and existence-checked. Before this fix,
+ the endpoint had no rate limit and no existence check, allowing
+ unauthenticated DoS (unlimited long-lived connections exhausting FDs)
+ and passive eavesdropping via 6-char event-code brute force.
+
+ POOL SAFETY (issue #356): the existence/auth check runs in a short-lived
+ session that is closed (its pooled connection returned) BEFORE the
+ EventSourceResponse is returned. An EventSource connection can stay open
+ indefinitely, so we must NOT hold a request-scoped get_db session across
+ the stream lifetime — doing so pinned one pooled connection per open
+ stream and exhausted the QueuePool (size 5 + overflow 10) under guest load.
+
+ Event types:
+ - request_created: New request submitted
+ - request_status_changed: Request status update
+ - now_playing_changed: Now-playing track update
+ - requests_bulk_update: Batch accept/reject
+ - bridge_status_changed: Bridge connect/disconnect
+ """
+ with SessionLocal() as db:
+ event, result = get_event_by_join_code_with_status(db, code)
+ if result == EventLookupResult.NOT_FOUND:
+ raise HTTPException(status_code=404, detail="Event not found")
+ if result == EventLookupResult.ARCHIVED:
+ raise HTTPException(status_code=410, detail="Event has been archived")
+ if result == EventLookupResult.EXPIRED:
+ raise HTTPException(status_code=410, detail="Event has expired")
+ event_code = event.code
+
+ return EventSourceResponse(
+ _event_generator(request, event_code),
+ media_type="text/event-stream",
+ headers={"X-Accel-Buffering": "no"},
+ )
+```
+
+- [ ] **Step 4: Run the new tests to verify they pass**
+
+Run (from `server/`): `.venv/bin/pytest tests/test_sse_pool.py -v`
+Expected: all 3 tests PASS.
+
+- [ ] **Step 5: Run existing SSE security tests to confirm no regression**
+
+Run (from `server/`): `.venv/bin/pytest tests/test_sse_security.py -v`
+Expected: all PASS (404/410 existence checks + rate limit preserved).
+
+- [ ] **Step 6: Full backend CI gate**
+
+Run (from `server/`):
+```bash
+.venv/bin/ruff check .
+.venv/bin/ruff format --check .
+.venv/bin/bandit -r app -c pyproject.toml -q
+.venv/bin/pytest --tb=short -q
+```
+Expected: all green, coverage >= 80%.
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add server/app/api/sse.py server/tests/test_sse_pool.py docs/superpowers/plans/2026-05-26-sse-stream-pooled-db-connection-leak.md
+git commit -m "fix(sse): don't pin a pooled DB connection for the SSE stream lifetime (#356)"
+```
+
+---
+
+## Self-Review
+
+**Spec coverage:**
+- "Open SSE streams no longer hold a pooled DB connection while idle" → fix removes `Depends(get_db)`, uses `with SessionLocal()` closed before returning; `test_n_concurrent_idle_streams_hold_zero_pool_connections` proves it.
+- "A test confirms N concurrent open streams consume ~0 idle pool connections" → `test_n_concurrent_idle_streams_hold_zero_pool_connections` (N=25 > pool capacity 15).
+- "Existence/auth checks preserved" → `test_event_stream_preserves_404_for_unknown_event` + existing `test_sse_security.py`.
+
+**Placeholder scan:** none.
+
+**Type consistency:** `event_stream(code, request)`, `_event_generator(request, event_code)`, `SessionLocal()` — consistent across plan and fix.
diff --git a/docs/superpowers/plans/2026-05-28-llm-cost-quota-caps.md b/docs/superpowers/plans/2026-05-28-llm-cost-quota-caps.md
new file mode 100644
index 00000000..318aaa01
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-28-llm-cost-quota-caps.md
@@ -0,0 +1,1208 @@
+# LLM Cost / Quota Caps per DJ Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Let admins set a monthly token cap per DJ LLM connector; the gateway refuses calls that would push the current calendar month over the cap with a clear DJ-facing message.
+
+**Architecture:** Add a nullable `monthly_token_cap` integer column to `LlmConnector` (None = unlimited). A direct aggregation query sums `tokens_in + tokens_out` from `llm_call_log` for the current calendar month per connector. The gateway runs a pre-flight check in `dispatch()`: if current month usage already meets/exceeds the cap, raise a new `QuotaCapReached` exception. Admins set caps via a new PATCH endpoint in `admin_llm.py`; the admin UI adds a cap input + usage-vs-cap progress bar per connector row.
+
+**Tech Stack:** FastAPI, SQLAlchemy 2.0, Alembic, Pydantic v2, Next.js 16 / React 19 (vanilla CSS), pytest, vitest.
+
+**Why direct aggregation (not a rollup table):** At current volume (`llm_call_log` has a 30-365 day retention window, per-DJ DJ-initiated recommendation calls — low hundreds/month at most), a single indexed `SUM(...) WHERE created_at >= month_start GROUP BY connector_id` is correct and cheap. `llm_call_log.created_at` is already indexed. A materialized view or hourly cron rollup adds operational complexity (refresh scheduling, staleness windows, an extra table + migration) with no measurable benefit until call volume is orders of magnitude higher. Documented here and in the PR; revisit if usage telemetry shows the aggregation query becoming hot.
+
+---
+
+## File Structure
+
+**Backend:**
+- `server/app/models/llm_connector.py` — add `monthly_token_cap: Mapped[int | None]` column on `LlmConnector`.
+- `server/alembic/versions/050_llm_connector_monthly_token_cap.py` — new migration (down_revision `049`).
+- `server/app/services/llm/exceptions.py` — add `QuotaCapReached(LlmError)`.
+- `server/app/services/llm/connector_storage.py` — add `current_month_token_usage(db, connector_id)` aggregation helper + `set_monthly_cap(connector, cap)` setter with validation.
+- `server/app/services/llm/gateway.py` — add a pre-flight cap check in `dispatch()` before the primary attempt (and before any fallback attempt against a connector with a cap).
+- `server/app/schemas/llm.py` — add `monthly_token_cap` to `ConnectorOut`; add `AdminConnectorCapPatch` request schema; add `current_month_tokens` to `AdminConnectorOut`.
+- `server/app/api/admin_llm.py` — add `PATCH /connectors/{id}/cap` endpoint; populate `current_month_tokens` in the connectors listing.
+- `server/app/api/events.py` — ensure `QuotaCapReached` from the LLM recommendation endpoint surfaces the DJ-facing 429 message instead of the generic 502.
+
+**Frontend:**
+- `dashboard/lib/api-types.generated.ts` — regenerated from backend OpenAPI (do not hand-edit).
+- `dashboard/lib/api.ts` — add `setAdminLlmConnectorCap(id, cap)` method.
+- `dashboard/app/admin/ai/page.tsx` — add cap input + usage-vs-cap progress bar to each per-DJ connector row.
+
+**Tests:**
+- `server/tests/test_llm_quota_cap.py` — new: aggregation helper, gateway pre-flight enforcement, cap setter validation.
+- `server/tests/test_llm_api.py` — extend: admin cap PATCH endpoint (auth, validation, set/clear).
+- `dashboard/app/admin/ai/__tests__/` or inline — cap UI rendering + progress bar (if an existing test harness for the page exists; otherwise add focused component-free logic test).
+
+---
+
+## Task 1: Add `QuotaCapReached` exception
+
+**Files:**
+- Modify: `server/app/services/llm/exceptions.py`
+- Test: `server/tests/test_llm_quota_cap.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Create `server/tests/test_llm_quota_cap.py`:
+
+```python
+"""Tests for per-DJ monthly token caps (issue #339)."""
+
+from __future__ import annotations
+
+from app.services.llm.exceptions import LlmError, QuotaCapReached
+
+
+def test_quota_cap_reached_is_llm_error():
+ exc = QuotaCapReached("cap reached")
+ assert isinstance(exc, LlmError)
+ assert str(exc) == "cap reached"
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v`
+Expected: FAIL — `ImportError: cannot import name 'QuotaCapReached'`
+
+- [ ] **Step 3: Add the exception**
+
+In `server/app/services/llm/exceptions.py`, after the `QuotaExceeded` class:
+
+```python
+class QuotaCapReached(LlmError):
+ """The DJ's admin-set monthly token cap for this connector is reached.
+
+ Distinct from :class:`QuotaExceeded` (a provider-side billing/quota error):
+ this is a WrzDJ-internal pre-flight refusal raised *before* any provider
+ call, so no tokens are spent. The DJ-facing message is fixed and contains
+ no internal details — see the gateway pre-flight check.
+ """
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add server/app/services/llm/exceptions.py server/tests/test_llm_quota_cap.py
+git commit -m "feat(llm): add QuotaCapReached exception for monthly token caps"
+```
+
+---
+
+## Task 2: Add `monthly_token_cap` column + migration
+
+**Files:**
+- Modify: `server/app/models/llm_connector.py`
+- Create: `server/alembic/versions/050_llm_connector_monthly_token_cap.py`
+- Test: `server/tests/test_llm_quota_cap.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Append to `server/tests/test_llm_quota_cap.py`:
+
+```python
+import json
+
+from app.models.llm_connector import LlmConnector
+from app.models.user import User
+from app.services.auth import get_password_hash
+
+
+def _make_dj(db, username="capdj"):
+ user = User(username=username, password_hash=get_password_hash("password123"), role="dj")
+ db.add(user)
+ db.commit()
+ db.refresh(user)
+ return user
+
+
+def _make_connector(db, user, *, monthly_token_cap=None):
+ row = LlmConnector(
+ user_id=user.id,
+ connector_type="openai_apikey",
+ display_name="Cap connector",
+ status="active",
+ credentials=json.dumps({"api_key": "sk-fake-key"}),
+ model_hint="gpt-5-mini",
+ monthly_token_cap=monthly_token_cap,
+ )
+ db.add(row)
+ db.commit()
+ db.refresh(row)
+ return row
+
+
+def test_connector_defaults_to_no_cap(db):
+ user = _make_dj(db)
+ connector = _make_connector(db, user)
+ assert connector.monthly_token_cap is None
+
+
+def test_connector_stores_cap(db):
+ user = _make_dj(db, username="capdj2")
+ connector = _make_connector(db, user, monthly_token_cap=100_000)
+ db.refresh(connector)
+ assert connector.monthly_token_cap == 100_000
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v`
+Expected: FAIL — `TypeError: 'monthly_token_cap' is an invalid keyword argument for LlmConnector`
+
+- [ ] **Step 3: Add the model column**
+
+In `server/app/models/llm_connector.py`, inside `LlmConnector`, after the `last_health_check_status` column (before `__table_args__`):
+
+```python
+ # Admin-set monthly token cap (issue #339). NULL = unlimited. When set, the
+ # gateway refuses dispatch once the current calendar month's summed
+ # tokens_in + tokens_out for this connector meets or exceeds the cap. The
+ # cap is admin-only (set via /api/admin/llm/connectors/{id}/cap) and is
+ # checked PRE-FLIGHT only — editing it never disrupts an in-flight call.
+ monthly_token_cap: Mapped[int | None] = mapped_column(Integer, nullable=True)
+```
+
+- [ ] **Step 4: Create the migration**
+
+Create `server/alembic/versions/050_llm_connector_monthly_token_cap.py`:
+
+```python
+"""Add monthly_token_cap to llm_connectors (issue #339).
+
+Revision ID: 050
+Revises: 049
+Create Date: 2026-05-28
+
+Adds an admin-set per-DJ monthly token cap to ``llm_connectors``:
+
+- ``monthly_token_cap`` (Integer, nullable) — NULL means unlimited. When set,
+ the LLM gateway refuses dispatch once the current calendar month's summed
+ ``tokens_in + tokens_out`` for the connector meets or exceeds this value.
+
+Nullable with no server default so existing connectors stay unlimited.
+"""
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "050"
+down_revision: str | None = "049"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ op.add_column(
+ "llm_connectors",
+ sa.Column("monthly_token_cap", sa.Integer(), nullable=True),
+ )
+
+
+def downgrade() -> None:
+ op.drop_column("llm_connectors", "monthly_token_cap")
+```
+
+- [ ] **Step 5: Run test to verify it passes**
+
+Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v`
+Expected: PASS (SQLite test DB recreates schema from models)
+
+- [ ] **Step 6: Verify alembic on isolated Postgres DB**
+
+Run (isolated DB avoids the shared-DB drift from sibling worktrees):
+```bash
+DATABASE_URL="postgresql+psycopg://wrzdj:wrzdj@localhost:5432/wrzdj_issue339" .venv/bin/alembic upgrade head
+DATABASE_URL="postgresql+psycopg://wrzdj:wrzdj@localhost:5432/wrzdj_issue339" .venv/bin/alembic check
+```
+Expected: `No new upgrade operations detected.`
+
+If the isolated DB was already at head from a prior run, recreate it first:
+```bash
+docker exec wrzdj-db-1 psql -U wrzdj -d postgres -c "DROP DATABASE IF EXISTS wrzdj_issue339;" -c "CREATE DATABASE wrzdj_issue339;"
+```
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add server/app/models/llm_connector.py server/alembic/versions/050_llm_connector_monthly_token_cap.py server/tests/test_llm_quota_cap.py
+git commit -m "feat(llm): add monthly_token_cap column + migration 050"
+```
+
+---
+
+## Task 3: Add current-month usage aggregation + cap setter helpers
+
+**Files:**
+- Modify: `server/app/services/llm/connector_storage.py`
+- Test: `server/tests/test_llm_quota_cap.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Append to `server/tests/test_llm_quota_cap.py`:
+
+```python
+from datetime import timedelta
+
+import pytest
+
+from app.core.time import utcnow
+from app.models.llm_connector import LlmCallLog
+from app.services.llm.connector_storage import (
+ current_month_token_usage,
+ set_monthly_cap,
+)
+
+
+def _log(db, connector_id, *, tokens_in, tokens_out, when=None):
+ row = LlmCallLog(
+ connector_id=connector_id,
+ purpose="test",
+ status="ok",
+ latency_ms=10,
+ tokens_in=tokens_in,
+ tokens_out=tokens_out,
+ )
+ db.add(row)
+ db.flush()
+ if when is not None:
+ row.created_at = when
+ db.commit()
+ return row
+
+
+def test_current_month_usage_sums_in_and_out(db):
+ user = _make_dj(db, username="usagedj")
+ connector = _make_connector(db, user)
+ _log(db, connector.id, tokens_in=100, tokens_out=50)
+ _log(db, connector.id, tokens_in=10, tokens_out=5)
+ assert current_month_token_usage(db, connector.id) == 165
+
+
+def test_current_month_usage_excludes_prior_months(db):
+ user = _make_dj(db, username="usagedj2")
+ connector = _make_connector(db, user)
+ # 40 days ago — previous month, must be excluded.
+ _log(db, connector.id, tokens_in=1000, tokens_out=1000, when=utcnow() - timedelta(days=40))
+ _log(db, connector.id, tokens_in=7, tokens_out=3)
+ assert current_month_token_usage(db, connector.id) == 10
+
+
+def test_current_month_usage_treats_null_tokens_as_zero(db):
+ user = _make_dj(db, username="usagedj3")
+ connector = _make_connector(db, user)
+ _log(db, connector.id, tokens_in=None, tokens_out=None)
+ _log(db, connector.id, tokens_in=5, tokens_out=None)
+ assert current_month_token_usage(db, connector.id) == 5
+
+
+def test_set_monthly_cap_accepts_positive_int(db):
+ user = _make_dj(db, username="capset")
+ connector = _make_connector(db, user)
+ set_monthly_cap(connector, 50_000)
+ assert connector.monthly_token_cap == 50_000
+
+
+def test_set_monthly_cap_accepts_none_to_clear(db):
+ user = _make_dj(db, username="capclear")
+ connector = _make_connector(db, user, monthly_token_cap=10)
+ set_monthly_cap(connector, None)
+ assert connector.monthly_token_cap is None
+
+
+def test_set_monthly_cap_rejects_negative(db):
+ user = _make_dj(db, username="capneg")
+ connector = _make_connector(db, user)
+ with pytest.raises(ValueError):
+ set_monthly_cap(connector, -1)
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v`
+Expected: FAIL — `ImportError: cannot import name 'current_month_token_usage'`
+
+- [ ] **Step 3: Implement the helpers**
+
+In `server/app/services/llm/connector_storage.py`, add a module-level helper for the month boundary and the two functions. Add near the other aggregation helpers (after `get_usage_stats`):
+
+```python
+def _calendar_month_start() -> "datetime":
+ """First instant (UTC, naive) of the current calendar month."""
+ from app.core.time import utcnow
+
+ now = utcnow()
+ return now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
+
+
+def current_month_token_usage(db: Session, connector_id: int) -> int:
+ """Sum tokens_in + tokens_out for ``connector_id`` in the current month.
+
+ Direct aggregation against the indexed ``llm_call_log.created_at`` column.
+ NULL token counts are coalesced to 0. Returns 0 when there are no rows.
+ Used by the gateway pre-flight cap check + the admin usage-vs-cap display.
+ """
+ month_start = _calendar_month_start()
+ total = db.execute(
+ select(
+ func.coalesce(func.sum(LlmCallLog.tokens_in), 0)
+ + func.coalesce(func.sum(LlmCallLog.tokens_out), 0)
+ ).where(
+ LlmCallLog.connector_id == connector_id,
+ LlmCallLog.created_at >= month_start,
+ )
+ ).scalar_one()
+ return int(total or 0)
+
+
+def set_monthly_cap(connector: LlmConnector, cap: int | None) -> LlmConnector:
+ """Set (or clear) the connector's monthly token cap. Caller commits.
+
+ ``cap=None`` clears the cap (unlimited). A non-None cap must be a
+ non-negative integer; negative values are rejected with ``ValueError``
+ (→ HTTP 400 at the API boundary).
+ """
+ if cap is not None and cap < 0:
+ raise ValueError("monthly_token_cap must be a non-negative integer or null")
+ connector.monthly_token_cap = cap
+ return connector
+```
+
+Add `datetime` to the typing import context — the `_calendar_month_start` return annotation uses a string forward-ref `"datetime"`, but for clarity add `from datetime import datetime` at the top of the module if not already imported. Check the existing imports first; if `datetime` is not imported, add it. Then change the annotation to `-> datetime:` (drop the quotes).
+
+Add both new names to the `__all__` list:
+
+```python
+ "current_month_token_usage",
+ "set_monthly_cap",
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add server/app/services/llm/connector_storage.py server/tests/test_llm_quota_cap.py
+git commit -m "feat(llm): add current-month usage aggregation + cap setter"
+```
+
+---
+
+## Task 4: Gateway pre-flight cap enforcement
+
+**Files:**
+- Modify: `server/app/services/llm/gateway.py`
+- Test: `server/tests/test_llm_quota_cap.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Append to `server/tests/test_llm_quota_cap.py`:
+
+```python
+from unittest.mock import AsyncMock, patch
+
+from app.services.llm.adapters.openai_apikey import OpenAIApiKeyAdapter
+from app.services.llm.base import ChatRequest, ChatResponse, Message, TokenUsage
+from app.services.llm.exceptions import QuotaCapReached
+from app.services.llm.gateway import Gateway
+
+
+def _req() -> ChatRequest:
+ return ChatRequest(messages=[Message(role="user", content="hi")])
+
+
+@pytest.mark.asyncio
+async def test_dispatch_allows_when_under_cap(db):
+ user = _make_dj(db, username="undercap")
+ connector = _make_connector(db, user, monthly_token_cap=1_000)
+ _log(db, connector.id, tokens_in=100, tokens_out=100) # 200 used, under 1000
+
+ fake = ChatResponse(text="ok", tool_calls=[], stop_reason="end_turn",
+ usage=TokenUsage(prompt=5, completion=2))
+ with patch.object(OpenAIApiKeyAdapter, "chat", new=AsyncMock(return_value=fake)):
+ resp = await Gateway.dispatch(db, user, _req(), purpose="test")
+ assert resp.text == "ok"
+
+
+@pytest.mark.asyncio
+async def test_dispatch_refuses_when_cap_reached(db):
+ user = _make_dj(db, username="atcap")
+ connector = _make_connector(db, user, monthly_token_cap=200)
+ _log(db, connector.id, tokens_in=150, tokens_out=50) # 200 used, == cap
+
+ # The adapter must NOT be called — refusal is pre-flight.
+ chat_mock = AsyncMock()
+ with patch.object(OpenAIApiKeyAdapter, "chat", new=chat_mock):
+ with pytest.raises(QuotaCapReached):
+ await Gateway.dispatch(db, user, _req(), purpose="test")
+ chat_mock.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_dispatch_unlimited_when_cap_none(db):
+ user = _make_dj(db, username="nolimit")
+ connector = _make_connector(db, user, monthly_token_cap=None)
+ _log(db, connector.id, tokens_in=10_000, tokens_out=10_000)
+
+ fake = ChatResponse(text="ok", tool_calls=[], stop_reason="end_turn",
+ usage=TokenUsage(prompt=1, completion=1))
+ with patch.object(OpenAIApiKeyAdapter, "chat", new=AsyncMock(return_value=fake)):
+ resp = await Gateway.dispatch(db, user, _req(), purpose="test")
+ assert resp.text == "ok"
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -k cap -v`
+Expected: FAIL — `test_dispatch_refuses_when_cap_reached` fails because the adapter is called and no `QuotaCapReached` is raised.
+
+- [ ] **Step 3: Implement the pre-flight check**
+
+In `server/app/services/llm/gateway.py`:
+
+Add the import for the helper + exception. Update the `from app.services.llm.connector_storage import ...` line:
+
+```python
+from app.services.llm.connector_storage import (
+ audit_event,
+ current_month_token_usage,
+ log_call,
+)
+```
+
+Add `QuotaCapReached` to the exceptions import block:
+
+```python
+from app.services.llm.exceptions import (
+ AuthInvalid,
+ LlmError,
+ NoLlmConfigured,
+ ProviderUnavailable,
+ QuotaCapReached,
+ QuotaExceeded,
+ RateLimited,
+ ToolTranslationError,
+)
+```
+
+Add a module-level helper after `_fallback_trigger`:
+
+```python
+def _enforce_monthly_cap(db: Session, connector: LlmConnector) -> None:
+ """Pre-flight: refuse dispatch when the connector's monthly cap is reached.
+
+ No-op when the connector has no cap (``monthly_token_cap is None``).
+ Compares the current calendar month's summed token usage against the cap;
+ refuses when usage already meets or exceeds it. Raised BEFORE any provider
+ call, so no tokens are spent and editing the cap never disrupts an
+ already-dispatched (in-flight) call.
+
+ The error message is fixed and leaks no internals (usage totals, cap value,
+ connector id) — see issue #339 security note.
+ """
+ cap = connector.monthly_token_cap
+ if cap is None:
+ return
+ used = current_month_token_usage(db, connector.id)
+ if used >= cap:
+ raise QuotaCapReached(
+ "Your monthly token cap is reached. Contact your admin to raise it."
+ )
+```
+
+In `Gateway.dispatch`, add the pre-flight check immediately after `primary = _resolve_connector(...)` / `actor_id = ...` and before "Attempt 1":
+
+```python
+ primary = _resolve_connector(db, actor)
+ actor_id = actor.id if actor else _system_actor_id(db, primary)
+
+ # Pre-flight: refuse if the resolved connector's monthly cap is reached
+ # (issue #339). Raised before any provider call — no tokens spent.
+ _enforce_monthly_cap(db, primary)
+```
+
+Also enforce the cap on the fallback connector before the fallback attempt. In the fallback branch, after `fallback = _resolve_org_default(db)` and the `if fallback is None or fallback.id == primary.id: raise` guard, before the `audit_event(...)` write, add:
+
+```python
+ # The fallback connector may itself be capped — refuse rather than
+ # silently spending another DJ's budget.
+ _enforce_monthly_cap(db, fallback)
+```
+
+`QuotaCapReached` is a subclass of `LlmError` but is NOT in `_FALLBACK_TRIGGERS`, so `_fallback_trigger()` returns `None` for it and the primary-connector cap refusal short-circuits to `raise` (no fallback) — which is correct: a cap is not a transient/credential error.
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v`
+Expected: PASS (all cap tests)
+
+Run the full gateway suite to confirm no regression:
+Run: `.venv/bin/pytest tests/test_llm_gateway.py -v`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add server/app/services/llm/gateway.py server/tests/test_llm_quota_cap.py
+git commit -m "feat(llm): enforce monthly token cap pre-flight in gateway dispatch"
+```
+
+---
+
+## Task 5: Expose cap in schemas + admin connectors listing
+
+**Files:**
+- Modify: `server/app/schemas/llm.py`
+- Modify: `server/app/api/admin_llm.py`
+- Test: `server/tests/test_llm_api.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Add to `server/tests/test_llm_api.py` (find the admin connectors-listing test area; add a new test). First inspect the file for an existing admin connector + admin_headers fixture pattern, then add:
+
+```python
+def test_admin_connectors_listing_includes_cap_and_usage(client, db, admin_headers, dj_user):
+ # Create a connector for a DJ with a cap, and log some usage this month.
+ import json as _json
+
+ from app.models.llm_connector import LlmCallLog, LlmConnector
+
+ connector = LlmConnector(
+ user_id=dj_user.id,
+ connector_type="openai_apikey",
+ display_name="Capped",
+ status="active",
+ credentials=_json.dumps({"api_key": "sk-fake-key"}),
+ monthly_token_cap=1000,
+ )
+ db.add(connector)
+ db.commit()
+ db.refresh(connector)
+ db.add(LlmCallLog(connector_id=connector.id, purpose="test", status="ok",
+ latency_ms=5, tokens_in=120, tokens_out=80))
+ db.commit()
+
+ resp = client.get("/api/admin/llm/connectors", headers=admin_headers)
+ assert resp.status_code == 200
+ row = next(r for r in resp.json() if r["id"] == connector.id)
+ assert row["monthly_token_cap"] == 1000
+ assert row["current_month_tokens"] == 200
+```
+
+If `test_llm_api.py` has no `dj_user` fixture, create the DJ inline (mirror the local connector-creation helpers already used in that file).
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `.venv/bin/pytest tests/test_llm_api.py -k cap_and_usage -v`
+Expected: FAIL — `KeyError: 'monthly_token_cap'` or `'current_month_tokens'`
+
+- [ ] **Step 3: Update schemas**
+
+In `server/app/schemas/llm.py`:
+
+Add to `ConnectorOut` (after `last_health_check_status`):
+
+```python
+ # Admin-set monthly token cap (issue #339). None = unlimited.
+ monthly_token_cap: int | None = None
+```
+
+Add to `AdminConnectorOut` (after `dj_username`):
+
+```python
+ # Current calendar-month token usage (tokens_in + tokens_out), so the admin
+ # UI can render a usage-vs-cap progress bar without a second round-trip.
+ current_month_tokens: int = 0
+```
+
+Add a new request schema near `AdminPolicyPatch`:
+
+```python
+class AdminConnectorCapPatch(BaseModel):
+ """Admin set/clear a connector's monthly token cap (issue #339).
+
+ ``monthly_token_cap = null`` clears the cap (unlimited). A non-null value
+ must be a non-negative integer; ``0`` means "no further calls this month".
+ """
+
+ monthly_token_cap: int | None = Field(default=None, ge=0, le=1_000_000_000)
+```
+
+- [ ] **Step 4: Populate `current_month_tokens` in the listing**
+
+In `server/app/api/admin_llm.py`:
+
+Import the helper:
+
+```python
+from app.services.llm.connector_storage import (
+ AUDIT_POLICY_CHANGED,
+ AUDIT_REVOKED_BY_ADMIN,
+ audit_event,
+ current_month_token_usage,
+ get_connector,
+ get_usage_stats,
+ get_user_label,
+ list_all_connectors,
+ revoke_connector,
+)
+```
+
+Update `_connector_to_admin_out` to accept and inject `current_month_tokens`:
+
+```python
+def _connector_to_admin_out(
+ row: LlmConnector, dj_username: str, current_month_tokens: int = 0
+) -> AdminConnectorOut:
+ return AdminConnectorOut.model_validate(
+ {
+ **{c.name: getattr(row, c.name) for c in LlmConnector.__table__.columns},
+ "dj_username": dj_username,
+ "current_month_tokens": current_month_tokens,
+ }
+ )
+```
+
+In `list_connectors_admin`, compute usage per row:
+
+```python
+ return [
+ _connector_to_admin_out(
+ r,
+ usernames.get(r.user_id) or f"user#{r.user_id}",
+ current_month_token_usage(db, r.id),
+ )
+ for r in rows
+ ]
+```
+
+Update the two other `_connector_to_admin_out(...)` call sites in `revoke_connector_admin` (and the new cap endpoint in Task 6) to pass `current_month_token_usage(db, row.id)`.
+
+- [ ] **Step 5: Run test to verify it passes**
+
+Run: `.venv/bin/pytest tests/test_llm_api.py -k cap_and_usage -v`
+Expected: PASS
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add server/app/schemas/llm.py server/app/api/admin_llm.py server/tests/test_llm_api.py
+git commit -m "feat(llm): expose monthly cap + current-month usage in admin listing"
+```
+
+---
+
+## Task 6: Admin PATCH endpoint to set/clear a connector cap
+
+**Files:**
+- Modify: `server/app/api/admin_llm.py`
+- Test: `server/tests/test_llm_api.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Add to `server/tests/test_llm_api.py`:
+
+```python
+def test_admin_set_connector_cap(client, db, admin_headers, dj_user):
+ import json as _json
+
+ from app.models.llm_connector import LlmConnector
+
+ connector = LlmConnector(
+ user_id=dj_user.id, connector_type="openai_apikey", display_name="C",
+ status="active", credentials=_json.dumps({"api_key": "sk-fake-key"}),
+ )
+ db.add(connector)
+ db.commit()
+ db.refresh(connector)
+
+ resp = client.patch(
+ f"/api/admin/llm/connectors/{connector.id}/cap",
+ headers=admin_headers,
+ json={"monthly_token_cap": 50000},
+ )
+ assert resp.status_code == 200
+ assert resp.json()["monthly_token_cap"] == 50000
+
+ # Clear it.
+ resp = client.patch(
+ f"/api/admin/llm/connectors/{connector.id}/cap",
+ headers=admin_headers,
+ json={"monthly_token_cap": None},
+ )
+ assert resp.status_code == 200
+ assert resp.json()["monthly_token_cap"] is None
+
+
+def test_admin_set_cap_rejects_negative(client, db, admin_headers, dj_user):
+ import json as _json
+
+ from app.models.llm_connector import LlmConnector
+
+ connector = LlmConnector(
+ user_id=dj_user.id, connector_type="openai_apikey", display_name="C2",
+ status="active", credentials=_json.dumps({"api_key": "sk-fake-key"}),
+ )
+ db.add(connector)
+ db.commit()
+ db.refresh(connector)
+
+ resp = client.patch(
+ f"/api/admin/llm/connectors/{connector.id}/cap",
+ headers=admin_headers,
+ json={"monthly_token_cap": -5},
+ )
+ assert resp.status_code == 422 # Pydantic ge=0 rejection
+
+
+def test_admin_set_cap_404_for_missing_connector(client, admin_headers):
+ resp = client.patch(
+ "/api/admin/llm/connectors/999999/cap",
+ headers=admin_headers,
+ json={"monthly_token_cap": 100},
+ )
+ assert resp.status_code == 404
+
+
+def test_set_cap_requires_admin(client, db, auth_headers, test_user):
+ # A non-admin (plain DJ) must be rejected.
+ import json as _json
+
+ from app.models.llm_connector import LlmConnector
+
+ connector = LlmConnector(
+ user_id=test_user.id, connector_type="openai_apikey", display_name="C3",
+ status="active", credentials=_json.dumps({"api_key": "sk-fake-key"}),
+ )
+ db.add(connector)
+ db.commit()
+ db.refresh(connector)
+
+ resp = client.patch(
+ f"/api/admin/llm/connectors/{connector.id}/cap",
+ headers=auth_headers,
+ json={"monthly_token_cap": 100},
+ )
+ assert resp.status_code == 403
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `.venv/bin/pytest tests/test_llm_api.py -k cap -v`
+Expected: FAIL — 404/405 (endpoint not yet defined)
+
+- [ ] **Step 3: Add the endpoint**
+
+In `server/app/api/admin_llm.py`:
+
+Add `AdminConnectorCapPatch` to the schema imports and `set_monthly_cap` + audit constant to the storage imports. Add a new audit constant usage — reuse `AUDIT_POLICY_CHANGED` for cap changes (it is the closest existing lifecycle event and avoids a model change), OR add a dedicated `AUDIT_CAP_CHANGED` if preferred. Use `AUDIT_POLICY_CHANGED` to avoid touching the model's audit constants and migrations.
+
+Imports:
+
+```python
+from app.schemas.llm import (
+ AdminAuditOut,
+ AdminConnectorCapPatch,
+ AdminConnectorOut,
+ AdminPolicyOut,
+ AdminPolicyPatch,
+ AdminUsageOut,
+ AuditEventRow,
+ UsageRow,
+)
+from app.services.llm.connector_storage import (
+ AUDIT_POLICY_CHANGED,
+ AUDIT_REVOKED_BY_ADMIN,
+ audit_event,
+ current_month_token_usage,
+ get_connector,
+ get_usage_stats,
+ get_user_label,
+ list_all_connectors,
+ revoke_connector,
+ set_monthly_cap,
+)
+```
+
+Add the endpoint (place it after `revoke_connector_admin`):
+
+```python
+@router.patch("/connectors/{connector_id}/cap", response_model=AdminConnectorOut)
+@limiter.limit("30/minute")
+def set_connector_cap_admin(
+ request: FastAPIRequest,
+ connector_id: int,
+ payload: AdminConnectorCapPatch,
+ admin: User = Depends(get_current_admin),
+ db: Session = Depends(get_db),
+) -> AdminConnectorOut:
+ """Set or clear a connector's monthly token cap (admin-only, issue #339).
+
+ ``monthly_token_cap = null`` clears the cap (unlimited). The change is
+ pre-flight only: an in-flight gateway call already past its cap check is
+ unaffected. Pydantic enforces the non-negative bound (``ge=0``).
+ """
+ row = get_connector(db, connector_id)
+ if row is None:
+ raise HTTPException(status_code=404, detail="Connector not found")
+
+ try:
+ set_monthly_cap(row, payload.monthly_token_cap)
+ except ValueError as exc:
+ raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+ audit_event(
+ db,
+ actor_user_id=admin.id,
+ target_connector_id=row.id,
+ event_type=AUDIT_POLICY_CHANGED,
+ )
+ db.commit()
+ db.refresh(row)
+ return _connector_to_admin_out(
+ row, get_user_label(db, row.user_id), current_month_token_usage(db, row.id)
+ )
+```
+
+Also update `revoke_connector_admin`'s final return to pass usage:
+
+```python
+ return _connector_to_admin_out(
+ row, get_user_label(db, row.user_id), current_month_token_usage(db, row.id)
+ )
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `.venv/bin/pytest tests/test_llm_api.py -k cap -v`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add server/app/api/admin_llm.py server/tests/test_llm_api.py
+git commit -m "feat(llm): admin endpoint to set/clear per-connector monthly cap"
+```
+
+---
+
+## Task 7: Surface `QuotaCapReached` as a clear DJ-facing error
+
+**Files:**
+- Modify: `server/app/api/events.py:923-988` (the `/recommendations/llm` endpoint)
+- Test: `server/tests/test_llm_recommendation_via_gateway.py` (or `test_llm_quota_cap.py`)
+
+- [ ] **Step 1: Write the failing test**
+
+Inspect `server/tests/test_llm_recommendation_via_gateway.py` for the existing event + DJ + connector fixture pattern and how `/recommendations/llm` is exercised. Add a test that pre-fills usage at/over a cap and asserts a 429 with the DJ-facing message:
+
+```python
+def test_llm_recommendation_returns_429_when_cap_reached(client, db, ...):
+ # ... set up event owned by a DJ with a capped, active connector and
+ # a connected music service (tidal/beatport token), then log usage >= cap.
+ # POST /api/events/{code}/recommendations/llm with a prompt.
+ assert resp.status_code == 429
+ assert "monthly token cap is reached" in resp.json()["detail"].lower()
+```
+
+Model this test on the existing setup in `test_llm_recommendation_via_gateway.py`. If that file's fixtures are too heavy to reuse cleanly, instead unit-test the mapping by patching `generate_recommendations_from_llm` to raise `QuotaCapReached` and asserting the endpoint returns 429 with the message.
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `.venv/bin/pytest tests/test_llm_recommendation_via_gateway.py -k cap -v`
+Expected: FAIL — endpoint returns 502 (generic) instead of 429 with the cap message.
+
+- [ ] **Step 3: Handle `QuotaCapReached` before the generic catch**
+
+In `server/app/api/events.py`, in `get_llm_recommendations`, change the try/except around `generate_recommendations_from_llm` to catch the cap error first:
+
+```python
+ from app.services.llm.exceptions import QuotaCapReached
+
+ try:
+ result = await generate_recommendations_from_llm(db, user, event, prompt_request.prompt)
+ except QuotaCapReached as exc:
+ # DJ-facing message only — no internal usage/cap details leaked.
+ raise HTTPException(status_code=429, detail=str(exc)) from exc
+ except Exception:
+ import logging
+
+ logging.getLogger(__name__).exception("LLM recommendation failed")
+ raise HTTPException(
+ status_code=502,
+ detail="LLM service error. Try again or use algorithmic recommendations.",
+ )
+```
+
+Place the `from app.services.llm.exceptions import QuotaCapReached` import with the other local imports at the top of the function (next to the existing `from app.services.recommendation...` imports).
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `.venv/bin/pytest tests/test_llm_recommendation_via_gateway.py -k cap -v`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add server/app/api/events.py server/tests/test_llm_recommendation_via_gateway.py
+git commit -m "feat(llm): surface QuotaCapReached as 429 with DJ-facing message"
+```
+
+---
+
+## Task 8: Regenerate frontend types + add api.ts method
+
+**Files:**
+- Modify: `dashboard/lib/api-types.generated.ts` (regenerated)
+- Modify: `dashboard/lib/api-types.ts` (add `LlmAdminConnectorCapPatch` alias)
+- Modify: `dashboard/lib/api.ts`
+- Test: `dashboard/lib/__tests__/api.test.ts`
+
+- [ ] **Step 1: Regenerate types from backend OpenAPI**
+
+Run from `dashboard/`:
+```bash
+npm run types:export
+npm run types:generate
+```
+Verify `AdminConnectorCapPatch` and `current_month_tokens` / `monthly_token_cap` appear in `dashboard/lib/api-types.generated.ts`.
+
+- [ ] **Step 2: Add type alias**
+
+In `dashboard/lib/api-types.ts`, near the other LLM aliases:
+
+```typescript
+export type LlmAdminConnectorCapPatch = Schemas['AdminConnectorCapPatch'];
+```
+
+- [ ] **Step 3: Write the failing test**
+
+In `dashboard/lib/__tests__/api.test.ts`, add a test mirroring the existing admin-LLM method tests (find one like `revokeAdminLlmConnector`):
+
+```typescript
+it('setAdminLlmConnectorCap PATCHes the cap endpoint', async () => {
+ const connector = { id: 7, monthly_token_cap: 5000 };
+ mockFetchOnce(connector);
+ const result = await api.setAdminLlmConnectorCap(7, 5000);
+ expect(lastFetchUrl()).toContain('/api/admin/llm/connectors/7/cap');
+ expect(lastFetchInit().method).toBe('PATCH');
+ expect(JSON.parse(lastFetchInit().body as string)).toEqual({ monthly_token_cap: 5000 });
+ expect(result).toEqual(connector);
+});
+```
+
+Adjust `mockFetchOnce`/`lastFetchUrl`/`lastFetchInit` to match the helpers already used in that test file.
+
+- [ ] **Step 4: Run test to verify it fails**
+
+Run from `dashboard/`: `npm test -- --run api.test`
+Expected: FAIL — `api.setAdminLlmConnectorCap is not a function`
+
+- [ ] **Step 5: Add the method**
+
+In `dashboard/lib/api.ts`, in the "Admin LLM policy + oversight" section (after `getAdminLlmUsage`):
+
+```typescript
+ async setAdminLlmConnectorCap(
+ id: number,
+ monthlyTokenCap: number | null,
+ ): Promise {
+ return this.fetch(`/api/admin/llm/connectors/${id}/cap`, {
+ method: 'PATCH',
+ body: JSON.stringify({ monthly_token_cap: monthlyTokenCap }),
+ });
+ }
+```
+
+Add `LlmAdminConnectorCapPatch` to the imports if you reference it; the method signature above uses primitives, so an import is optional.
+
+- [ ] **Step 6: Run test to verify it passes**
+
+Run from `dashboard/`: `npm test -- --run api.test`
+Expected: PASS
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add dashboard/lib/api-types.generated.ts dashboard/lib/api-types.ts dashboard/lib/api.ts dashboard/lib/__tests__/api.test.ts
+git commit -m "feat(ai-ui): add setAdminLlmConnectorCap api client method + types"
+```
+
+---
+
+## Task 9: Admin UI — cap input + usage-vs-cap progress bar
+
+**Files:**
+- Modify: `dashboard/app/admin/ai/page.tsx`
+- Test: extend the page's test if one exists, otherwise a focused logic test for the percent helper.
+
+- [ ] **Step 1: Add a cap-percent helper + extract a small pure function (testable)**
+
+In `dashboard/app/admin/ai/page.tsx`, add near the top-level helpers (e.g. after `formatTimestamp`):
+
+```typescript
+// Percent of the monthly cap consumed. Returns null when there is no cap
+// (unlimited) so the UI can render "Unlimited" instead of a bar. Clamps to
+// 0–100 so an over-cap connector (possible: cap lowered mid-month) shows full.
+function capPercent(used: number, cap: number | null | undefined): number | null {
+ if (cap == null) return null;
+ if (cap === 0) return 100;
+ return Math.min(100, Math.max(0, Math.round((used / cap) * 100)));
+}
+```
+
+- [ ] **Step 2: Add a "Monthly cap" column to the connectors table**
+
+Add a `` to the table head (after "Result", before "Actions").
+
+In each connector `
`, add a cell that shows the current usage, an editable cap input, and a progress bar:
+
+```tsx
+
+
+ handleCapBlur(c, e.target.value)}
+ aria-label={`Monthly token cap for ${c.dj_username} ${c.display_name}`}
+ />
+
+```
+
+- [ ] **Step 3: Add the `handleCapBlur` handler**
+
+Add inside the component (near `handleRevoke`):
+
+```typescript
+ const handleCapBlur = async (connector: LlmAdminConnector, raw: string) => {
+ const trimmed = raw.trim();
+ // Empty input clears the cap (unlimited).
+ let next: number | null;
+ if (trimmed === '') {
+ next = null;
+ } else {
+ const parsed = parseInt(trimmed, 10);
+ if (Number.isNaN(parsed) || parsed < 0) {
+ setError('Monthly cap must be a non-negative whole number.');
+ return;
+ }
+ next = parsed;
+ }
+ // No-op when unchanged.
+ if (next === (connector.monthly_token_cap ?? null)) return;
+ try {
+ const updated = await api.setAdminLlmConnectorCap(connector.id, next);
+ setConnectors((prev) => prev.map((c) => (c.id === connector.id ? updated : c)));
+ } catch (err) {
+ setError(err instanceof Error ? err.message : 'Failed to update cap');
+ }
+ };
+```
+
+- [ ] **Step 4: Type check + lint + tests**
+
+Run from `dashboard/`:
+```bash
+npx tsc --noEmit
+npm run lint
+npm test -- --run
+git checkout next-env.d.ts 2>/dev/null || true
+```
+Expected: all green. Fix any type errors (e.g. `current_month_tokens` should be a `number` on `LlmAdminConnector` from the regenerated types).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add dashboard/app/admin/ai/page.tsx
+git commit -m "feat(ai-ui): admin cap input + usage-vs-cap progress bar per connector"
+```
+
+---
+
+## Task 10: Full local CI + finalize
+
+- [ ] **Step 1: Backend CI**
+
+From `server/`:
+```bash
+.venv/bin/ruff check .
+.venv/bin/ruff format --check .
+.venv/bin/bandit -r app -c pyproject.toml -q
+.venv/bin/pytest --tb=short -q
+```
+Fix anything red. Run `.venv/bin/ruff format .` then `.venv/bin/ruff check --fix .` if needed.
+
+- [ ] **Step 2: Alembic on isolated DB**
+
+```bash
+docker exec wrzdj-db-1 psql -U wrzdj -d postgres -c "DROP DATABASE IF EXISTS wrzdj_issue339;" -c "CREATE DATABASE wrzdj_issue339;"
+DATABASE_URL="postgresql+psycopg://wrzdj:wrzdj@localhost:5432/wrzdj_issue339" .venv/bin/alembic upgrade head
+DATABASE_URL="postgresql+psycopg://wrzdj:wrzdj@localhost:5432/wrzdj_issue339" .venv/bin/alembic check
+```
+Expected: `No new upgrade operations detected.`
+
+- [ ] **Step 3: Frontend CI**
+
+From `dashboard/`:
+```bash
+npm run lint
+npx tsc --noEmit
+npm test -- --run
+git checkout next-env.d.ts 2>/dev/null || true
+```
+
+- [ ] **Step 4: Push + PR**
+
+Use `superpowers:finishing-a-development-branch` option 2. Create the PR with `gh pr create --base epic/ai-engine`. PR body MUST include `Closes #339`, a `## Design decisions` section (direct-aggregation rationale, pre-flight-only enforcement, reuse of `AUDIT_POLICY_CHANGED`, 429 mapping), and a note that it targets `epic/ai-engine`.
+
+---
+
+## Self-Review Notes
+
+- **Spec coverage:** column (T2), aggregation (T3), pre-flight `QuotaCapReached` (T4), admin set/edit endpoint (T6), DJ-facing message (T4 msg + T7 mapping), admin UI cap input + progress bar (T9). Acceptance: cap enforced (T4), clear DJ error (T4/T7), admin edits without disrupting in-flight calls (pre-flight-only, documented T4/T6). ✓
+- **Type consistency:** `current_month_token_usage(db, connector_id)`, `set_monthly_cap(connector, cap)`, `monthly_token_cap`, `current_month_tokens`, `setAdminLlmConnectorCap(id, cap)`, `capPercent(used, cap)`, `handleCapBlur(connector, raw)` used consistently across tasks. ✓
+- **Security:** Pydantic `ge=0` + service `ValueError` guard; admin-only via `get_current_admin`; fixed DJ-facing message leaks no internals; parameterized SQLAlchemy queries only. ✓
diff --git a/docs/superpowers/plans/2026-05-28-llm-gateway-streaming.md b/docs/superpowers/plans/2026-05-28-llm-gateway-streaming.md
new file mode 100644
index 00000000..0b3b06b8
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-28-llm-gateway-streaming.md
@@ -0,0 +1,1861 @@
+# LLM Gateway Streaming Support Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add provider-agnostic streaming (`Gateway.stream`) to the LLM Gateway with native SSE for OpenAI, Anthropic, and OpenAI-compatible adapters, an authenticated SSE backend endpoint, and a minimal frontend consumer — closing GitHub issue #335.
+
+**Architecture:** A new `ChatResponseChunk` canonical model carries incremental text, partial tool-call fragments, and (on the final chunk) `stop_reason` + `usage`. The `LlmAdapter` ABC gains an `async def stream(self, request) -> AsyncIterator[ChatResponseChunk]` with a default that raises `StreamingUnsupported`. OpenAI-wire adapters (platform + compatible) parse SSE `data:` lines with incremental tool-call JSON assembly; the Anthropic adapter consumes the SDK's typed event stream (content_block_delta / input_json_delta). `Gateway.stream` mirrors `dispatch`'s connector resolution and writes a single counts-only `llm_call_log` row (plus auth/audit rows) when the stream completes or errors. A new authenticated `POST /api/llm/connectors/{id}/stream-test` endpoint emits `text/event-stream`; client disconnect cancels the upstream request via async generator cleanup. The frontend gets an `apiClient.streamConnectorTest()` consumer using `fetch` + `ReadableStream` (EventSource cannot send the Bearer header).
+
+**Tech Stack:** Python / FastAPI / `sse_starlette` (already a dep) / `httpx` async streaming / `anthropic` SDK `messages.stream()` / pytest-asyncio. Frontend: Next.js / TypeScript / `fetch` streaming.
+
+---
+
+## File Structure
+
+- **Create** `server/app/services/llm/streaming.py` — `ChatResponseChunk` model + `StreamingUnsupported` exception + shared SSE-line helpers (`iter_sse_data_lines`, OpenAI partial tool-call accumulator). One responsibility: streaming primitives shared by adapters.
+- **Modify** `server/app/services/llm/base.py` — add `stream()` to the `LlmAdapter` ABC with a non-abstract default that raises `StreamingUnsupported`; re-export `ChatResponseChunk`.
+- **Modify** `server/app/services/llm/exceptions.py` — add `StreamingUnsupported(LlmError)`.
+- **Modify** `server/app/services/llm/adapters/_httpx_openai.py` — add `stream_openai_chat(...)` async generator.
+- **Modify** `server/app/services/llm/adapters/openai_apikey.py` — implement `stream()`.
+- **Modify** `server/app/services/llm/adapters/openai_compatible.py` — implement `stream()`.
+- **Modify** `server/app/services/llm/adapters/anthropic_apikey.py` — implement `stream()`.
+- **Modify** `server/app/services/llm/gateway.py` — add `Gateway.stream(...)` + `_attempt_stream(...)` helper (additive, separate functions — no edits to existing `dispatch`/`_attempt` bodies, to minimize merge conflicts with siblings #337/#339).
+- **Modify** `server/app/api/llm.py` — add `POST /connectors/{id}/stream-test` SSE endpoint.
+- **Modify** `dashboard/lib/api.ts` — add `streamConnectorTest(id, onChunk)` consumer + a `StreamChunk` type.
+- **Modify** `dashboard/app/admin/ai/page.tsx` — wire a minimal "stream test" affordance OR document scope as plumbing-only (decision recorded in Task 11).
+- **Create** `server/tests/test_llm_streaming.py` — chunk model + adapter stream parsing (OpenAI text, OpenAI tool-call fragments, Anthropic deltas, compatible, unsupported default).
+- **Create** `server/tests/test_llm_gateway_stream.py` — gateway resolution + logging + cancellation propagation.
+- **Create** `server/tests/test_llm_stream_endpoint.py` — SSE endpoint auth + content-type + body shape.
+
+---
+
+## Task 1: `ChatResponseChunk` model + `StreamingUnsupported` exception
+
+**Files:**
+- Create: `server/app/services/llm/streaming.py`
+- Modify: `server/app/services/llm/exceptions.py`
+- Modify: `server/app/services/llm/base.py`
+- Test: `server/tests/test_llm_streaming.py`
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+# server/tests/test_llm_streaming.py
+"""Tests for streaming primitives: ChatResponseChunk + SSE helpers."""
+
+from __future__ import annotations
+
+from app.services.llm.base import ChatResponseChunk, LlmAdapter
+from app.services.llm.exceptions import StreamingUnsupported
+
+
+def test_chunk_defaults_are_empty():
+ chunk = ChatResponseChunk()
+ assert chunk.text_delta == ""
+ assert chunk.tool_call_deltas == []
+ assert chunk.stop_reason is None
+ assert chunk.usage is None
+ assert chunk.done is False
+
+
+def test_chunk_final_carries_stop_reason_and_usage():
+ from app.services.llm.base import TokenUsage
+
+ chunk = ChatResponseChunk(
+ stop_reason="end_turn",
+ usage=TokenUsage(prompt=3, completion=5),
+ done=True,
+ )
+ assert chunk.done is True
+ assert chunk.stop_reason == "end_turn"
+ assert chunk.usage.completion == 5
+
+
+def test_tool_call_delta_fragment_shape():
+ from app.services.llm.base import ToolCallDelta
+
+ delta = ToolCallDelta(index=0, id="call_1", name="search", input_json_fragment='{"q":')
+ assert delta.index == 0
+ assert delta.id == "call_1"
+ assert delta.name == "search"
+ assert delta.input_json_fragment == '{"q":'
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -q`
+Expected: FAIL with `ImportError: cannot import name 'ChatResponseChunk'`
+
+- [ ] **Step 3: Add `StreamingUnsupported` to exceptions**
+
+In `server/app/services/llm/exceptions.py`, append:
+
+```python
+
+
+class StreamingUnsupported(LlmError):
+ """The resolved adapter does not implement provider-native streaming."""
+```
+
+- [ ] **Step 4: Create `streaming.py` with chunk-side helpers (model lives in base.py)**
+
+The chunk + delta models live in `base.py` (Task adds them there) so they sit alongside `ChatResponse`. `streaming.py` holds the SSE-line helpers only — created in Task 4. For this task, only add the models to `base.py`.
+
+In `server/app/services/llm/base.py`, after `ChatResponse`, add:
+
+```python
+class ToolCallDelta(BaseModel):
+ """A fragment of a streamed tool call.
+
+ Providers emit tool-call arguments incrementally. ``index`` groups fragments
+ belonging to the same call (OpenAI sends an array index; Anthropic uses the
+ content-block index). ``id`` / ``name`` arrive once at the start of a call;
+ ``input_json_fragment`` carries the raw, possibly-partial argument JSON text.
+ Consumers concatenate fragments per ``index`` and JSON-parse the result when
+ the stream completes.
+ """
+
+ index: int
+ id: str | None = None
+ name: str | None = None
+ input_json_fragment: str = ""
+
+
+class ChatResponseChunk(BaseModel):
+ """One incremental chunk of a streamed chat response.
+
+ Non-final chunks carry ``text_delta`` and/or ``tool_call_deltas``. The final
+ chunk sets ``done=True`` and carries the canonical ``stop_reason`` plus
+ ``usage`` (when the provider reports it). ``stop_reason``/``usage`` may be
+ ``None`` on every non-final chunk.
+ """
+
+ text_delta: str = ""
+ tool_call_deltas: list[ToolCallDelta] = Field(default_factory=list)
+ stop_reason: Literal["end_turn", "tool_use", "max_tokens", "error"] | None = None
+ usage: TokenUsage | None = None
+ done: bool = False
+```
+
+- [ ] **Step 5: Add `stream()` default to the `LlmAdapter` ABC**
+
+In `server/app/services/llm/base.py`, add these imports at the top (merge with existing):
+
+```python
+from collections.abc import AsyncIterator
+```
+
+Then inside `class LlmAdapter`, after `health_check`, add:
+
+```python
+ async def stream(self, request: ChatRequest) -> AsyncIterator[ChatResponseChunk]:
+ """Stream a chat response as incremental chunks.
+
+ Default raises :class:`StreamingUnsupported`. Adapters that support
+ provider-native streaming override this. The body is unreachable but
+ present so the method is an async generator for type-checkers.
+ """
+ from app.services.llm.exceptions import StreamingUnsupported
+
+ raise StreamingUnsupported(
+ f"connector_type={self.connector_type!r} does not support streaming"
+ )
+ yield # pragma: no cover (makes this an async generator)
+```
+
+- [ ] **Step 6: Run test to verify it passes**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -q`
+Expected: PASS (3 tests)
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add server/app/services/llm/base.py server/app/services/llm/exceptions.py server/tests/test_llm_streaming.py
+git commit -m "feat(llm): add ChatResponseChunk + streaming ABC default"
+```
+
+---
+
+## Task 2: Default `stream()` raises `StreamingUnsupported`
+
+**Files:**
+- Test: `server/tests/test_llm_streaming.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Append to `server/tests/test_llm_streaming.py`:
+
+```python
+import pytest
+
+from app.services.llm.base import ChatRequest, Message
+
+
+class _BareAdapter(LlmAdapter):
+ connector_type = "bare"
+
+ async def chat(self, request): # pragma: no cover
+ raise NotImplementedError
+
+ async def health_check(self): # pragma: no cover
+ raise NotImplementedError
+
+
+@pytest.mark.asyncio
+async def test_default_stream_raises_streaming_unsupported():
+ adapter = _BareAdapter(connector=None)
+ req = ChatRequest(messages=[Message(role="user", content="hi")])
+ with pytest.raises(StreamingUnsupported):
+ async for _ in adapter.stream(req):
+ pass
+```
+
+- [ ] **Step 2: Run test to verify it passes (default already implemented in Task 1)**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py::test_default_stream_raises_streaming_unsupported -q`
+Expected: PASS
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add server/tests/test_llm_streaming.py
+git commit -m "test(llm): default adapter stream raises StreamingUnsupported"
+```
+
+---
+
+## Task 3: OpenAI partial tool-call accumulator helper
+
+**Files:**
+- Create: `server/app/services/llm/streaming.py`
+- Test: `server/tests/test_llm_streaming.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Append to `server/tests/test_llm_streaming.py`:
+
+```python
+def test_parse_openai_stream_line_text():
+ from app.services.llm.streaming import parse_openai_stream_event
+
+ chunk = parse_openai_stream_event(
+ {"choices": [{"delta": {"content": "Hello"}, "finish_reason": None}]}
+ )
+ assert chunk is not None
+ assert chunk.text_delta == "Hello"
+ assert chunk.tool_call_deltas == []
+ assert chunk.done is False
+
+
+def test_parse_openai_stream_line_tool_call_fragment():
+ from app.services.llm.streaming import parse_openai_stream_event
+
+ chunk = parse_openai_stream_event(
+ {
+ "choices": [
+ {
+ "delta": {
+ "tool_calls": [
+ {
+ "index": 0,
+ "id": "call_1",
+ "function": {"name": "search", "arguments": '{"q":'},
+ }
+ ]
+ },
+ "finish_reason": None,
+ }
+ ]
+ }
+ )
+ assert chunk is not None
+ assert chunk.text_delta == ""
+ assert len(chunk.tool_call_deltas) == 1
+ d = chunk.tool_call_deltas[0]
+ assert d.index == 0 and d.id == "call_1" and d.name == "search"
+ assert d.input_json_fragment == '{"q":'
+
+
+def test_parse_openai_stream_line_finish():
+ from app.services.llm.streaming import parse_openai_stream_event
+
+ chunk = parse_openai_stream_event(
+ {
+ "choices": [{"delta": {}, "finish_reason": "tool_calls"}],
+ "usage": {"prompt_tokens": 7, "completion_tokens": 11},
+ }
+ )
+ assert chunk is not None
+ assert chunk.done is True
+ assert chunk.stop_reason == "tool_use"
+ assert chunk.usage is not None and chunk.usage.prompt == 7
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -k parse_openai_stream -q`
+Expected: FAIL with `ModuleNotFoundError`/`ImportError` for `streaming.parse_openai_stream_event`
+
+- [ ] **Step 3: Create `streaming.py` helpers**
+
+```python
+# server/app/services/llm/streaming.py
+"""Shared streaming primitives for LLM adapters.
+
+Holds SSE-line parsing helpers reused by the OpenAI-wire adapters. The chunk /
+delta models themselves live in ``base.py`` alongside ``ChatResponse``.
+"""
+
+from __future__ import annotations
+
+from app.services.llm.base import ChatResponseChunk, ToolCallDelta, TokenUsage
+from app.services.llm.tool_translation import _normalise_finish_reason # noqa: PLC2701
+
+# OpenAI streaming finish_reason → canonical, reusing the non-stream mapping.
+_FINISH_REASON_OPENAI = {
+ "stop": "end_turn",
+ "tool_calls": "tool_use",
+ "function_call": "tool_use",
+ "length": "max_tokens",
+}
+
+
+def parse_openai_stream_event(payload: dict) -> ChatResponseChunk | None:
+ """Translate one parsed OpenAI streaming JSON object into a chunk.
+
+ Returns ``None`` for payloads carrying no usable signal (e.g. the initial
+ role-only delta). The final event sets ``done=True`` with the mapped
+ ``stop_reason`` and (when present) token usage.
+ """
+ choices = payload.get("choices") or []
+ choice = choices[0] if choices else {}
+ delta = choice.get("delta") or {}
+
+ text_delta = delta.get("content") or ""
+
+ tool_call_deltas: list[ToolCallDelta] = []
+ for tc in delta.get("tool_calls") or []:
+ if not isinstance(tc, dict):
+ continue
+ fn = tc.get("function") or {}
+ tool_call_deltas.append(
+ ToolCallDelta(
+ index=int(tc.get("index", 0)),
+ id=tc.get("id"),
+ name=(fn.get("name") if isinstance(fn, dict) else None),
+ input_json_fragment=(
+ (fn.get("arguments") or "") if isinstance(fn, dict) else ""
+ ),
+ )
+ )
+
+ finish_reason = choice.get("finish_reason")
+ usage_payload = payload.get("usage") or {}
+
+ done = finish_reason is not None
+ stop_reason = None
+ usage = None
+ if done:
+ stop_reason = _normalise_finish_reason(finish_reason, _FINISH_REASON_OPENAI)
+ if usage_payload:
+ usage = TokenUsage(
+ prompt=int(usage_payload.get("prompt_tokens", 0)),
+ completion=int(usage_payload.get("completion_tokens", 0)),
+ )
+
+ if not text_delta and not tool_call_deltas and not done:
+ return None
+
+ return ChatResponseChunk(
+ text_delta=text_delta,
+ tool_call_deltas=tool_call_deltas,
+ stop_reason=stop_reason,
+ usage=usage,
+ done=done,
+ )
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -k parse_openai_stream -q`
+Expected: PASS (3 tests)
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add server/app/services/llm/streaming.py server/tests/test_llm_streaming.py
+git commit -m "feat(llm): OpenAI streaming event → ChatResponseChunk parser"
+```
+
+---
+
+## Task 4: `stream_openai_chat` async generator (httpx)
+
+**Files:**
+- Modify: `server/app/services/llm/adapters/_httpx_openai.py`
+- Test: `server/tests/test_llm_streaming.py`
+
+- [ ] **Step 1: Write the failing test (mock httpx streaming response)**
+
+Append to `server/tests/test_llm_streaming.py`:
+
+```python
+class _FakeStreamResponse:
+ """Minimal stand-in for httpx streaming response."""
+
+ def __init__(self, lines: list[str], status_code: int = 200):
+ self._lines = lines
+ self.status_code = status_code
+ self.headers = {}
+
+ async def aiter_lines(self):
+ for line in self._lines:
+ yield line
+
+ async def aread(self):
+ return b""
+
+
+class _FakeStreamClient:
+ def __init__(self, response):
+ self._response = response
+
+ async def __aenter__(self):
+ return self
+
+ async def __aexit__(self, *exc):
+ return False
+
+ def stream(self, method, url, **kwargs):
+ client = self
+
+ class _Ctx:
+ async def __aenter__(self_inner):
+ return client._response
+
+ async def __aexit__(self_inner, *exc):
+ return False
+
+ return _Ctx()
+
+
+@pytest.mark.asyncio
+async def test_stream_openai_chat_yields_text_then_final(monkeypatch):
+ from app.services.llm.adapters import _httpx_openai
+
+ sse_lines = [
+ 'data: {"choices":[{"delta":{"role":"assistant"},"finish_reason":null}]}',
+ 'data: {"choices":[{"delta":{"content":"Hi"},"finish_reason":null}]}',
+ 'data: {"choices":[{"delta":{"content":" there"},"finish_reason":null}]}',
+ 'data: {"choices":[{"delta":{},"finish_reason":"stop"}],'
+ '"usage":{"prompt_tokens":4,"completion_tokens":2}}',
+ "data: [DONE]",
+ ]
+ fake_resp = _FakeStreamResponse(sse_lines)
+ monkeypatch.setattr(
+ _httpx_openai.httpx, "AsyncClient", lambda *a, **k: _FakeStreamClient(fake_resp)
+ )
+
+ req = ChatRequest(messages=[Message(role="user", content="hi")], model="gpt-x")
+ chunks = []
+ async for c in _httpx_openai.stream_openai_chat(
+ base_url="https://api.openai.com/v1",
+ api_key="sk-test",
+ request=req,
+ fallback_model="gpt-x",
+ ):
+ chunks.append(c)
+
+ text = "".join(c.text_delta for c in chunks)
+ assert text == "Hi there"
+ assert chunks[-1].done is True
+ assert chunks[-1].stop_reason == "end_turn"
+ assert chunks[-1].usage.prompt == 4
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py::test_stream_openai_chat_yields_text_then_final -q`
+Expected: FAIL — `stream_openai_chat` not defined
+
+- [ ] **Step 3: Implement `stream_openai_chat` in `_httpx_openai.py`**
+
+Add imports near the top of `server/app/services/llm/adapters/_httpx_openai.py`:
+
+```python
+from collections.abc import AsyncIterator
+
+from app.services.llm.base import ChatResponseChunk
+from app.services.llm.exceptions import AuthInvalid, QuotaExceeded, RateLimited
+from app.services.llm.streaming import parse_openai_stream_event
+```
+
+(Merge with the existing import block; `ProviderUnavailable` / `ToolTranslationError` are already imported.)
+
+Add this function after `call_openai_chat`:
+
+```python
+def _map_stream_status(status_code: int) -> None:
+ """Raise the canonical typed error for a non-2xx streaming status."""
+ if status_code in (401, 403):
+ raise AuthInvalid(f"Auth failed (HTTP {status_code})")
+ if status_code == 402:
+ raise QuotaExceeded("Quota or billing failure (HTTP 402)")
+ if status_code == 429:
+ raise RateLimited("Rate limited (HTTP 429)")
+ if 500 <= status_code < 600:
+ raise ProviderUnavailable(f"Upstream error (HTTP {status_code})")
+ raise ToolTranslationError(f"Upstream rejected request (HTTP {status_code})")
+
+
+async def stream_openai_chat(
+ *,
+ base_url: str,
+ api_key: str | None,
+ request: ChatRequest,
+ fallback_model: str | None,
+ extra_headers: dict | None = None,
+ max_tokens_field: str = "max_tokens",
+) -> AsyncIterator[ChatResponseChunk]:
+ """Issue a streaming Chat Completions request, yielding canonical chunks.
+
+ Cancellation: if the consumer stops iterating (e.g. SSE client disconnect),
+ the ``async with client.stream(...)`` context exits and httpx closes the
+ upstream connection, cancelling the provider request. Errors are mapped to
+ canonical typed exceptions before the first chunk; mid-stream network drops
+ surface as ``ProviderUnavailable``.
+ """
+ model = request.model or fallback_model
+ if not model:
+ raise ToolTranslationError(
+ "model is required (set ChatRequest.model or LlmConnector.model_hint)"
+ )
+
+ endpoint = _build_chat_endpoint(base_url)
+ headers: dict[str, str] = {
+ "Content-Type": "application/json",
+ "Accept": "text/event-stream",
+ }
+ if api_key:
+ headers["Authorization"] = f"Bearer {api_key}" # nosec B106
+ if extra_headers:
+ headers.update(extra_headers)
+
+ timeout = request.timeout_seconds or DEFAULT_TIMEOUT_SECONDS
+ timeout = min(max(timeout, 1.0), MAX_TIMEOUT_SECONDS)
+
+ payload = _build_payload(request, model, max_tokens_field=max_tokens_field)
+ payload["stream"] = True
+ # Ask OpenAI to include usage in the terminal stream event.
+ payload["stream_options"] = {"include_usage": True}
+
+ try:
+ async with httpx.AsyncClient(timeout=timeout) as client:
+ async with client.stream(
+ "POST", endpoint, json=payload, headers=headers
+ ) as resp:
+ if resp.status_code >= 300:
+ # Drain the (small) error body so the connection releases.
+ await resp.aread()
+ _map_stream_status(resp.status_code)
+ async for line in resp.aiter_lines():
+ if not line or not line.startswith("data:"):
+ continue
+ data = line[len("data:") :].strip()
+ if data == "[DONE]":
+ break
+ try:
+ obj = json.loads(data)
+ except json.JSONDecodeError:
+ # Tolerate keepalive/comment lines.
+ continue
+ chunk = parse_openai_stream_event(obj)
+ if chunk is not None:
+ yield chunk
+ except httpx.TimeoutException as exc:
+ raise ProviderUnavailable("Upstream timeout") from exc
+ except httpx.HTTPError as exc:
+ raise ProviderUnavailable("Upstream network error") from exc
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py::test_stream_openai_chat_yields_text_then_final -q`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add server/app/services/llm/adapters/_httpx_openai.py server/tests/test_llm_streaming.py
+git commit -m "feat(llm): httpx OpenAI-wire streaming generator"
+```
+
+---
+
+## Task 5: OpenAI Platform + OpenAI-compatible adapter `stream()`
+
+**Files:**
+- Modify: `server/app/services/llm/adapters/openai_apikey.py`
+- Modify: `server/app/services/llm/adapters/openai_compatible.py`
+- Test: `server/tests/test_llm_streaming.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Append to `server/tests/test_llm_streaming.py`:
+
+```python
+@pytest.mark.asyncio
+async def test_openai_apikey_adapter_stream(monkeypatch):
+ import json as _json
+
+ from app.models.llm_connector import LlmConnector
+ from app.services.llm.adapters import openai_apikey
+ from app.services.llm.base import ChatResponseChunk
+
+ captured = {}
+
+ async def fake_stream(**kwargs):
+ captured.update(kwargs)
+ yield ChatResponseChunk(text_delta="ok", done=False)
+ yield ChatResponseChunk(stop_reason="end_turn", done=True)
+
+ monkeypatch.setattr(openai_apikey, "stream_openai_chat", fake_stream)
+
+ connector = LlmConnector(
+ user_id=1,
+ connector_type="openai_apikey",
+ display_name="x",
+ status="active",
+ credentials=_json.dumps({"api_key": "sk-test"}),
+ model_hint="gpt-x",
+ )
+ adapter = openai_apikey.OpenAIApiKeyAdapter(connector)
+ req = ChatRequest(messages=[Message(role="user", content="hi")])
+ chunks = [c async for c in adapter.stream(req)]
+ assert [c.text_delta for c in chunks] == ["ok", ""]
+ assert chunks[-1].done is True
+ assert captured["max_tokens_field"] == "max_completion_tokens"
+ assert captured["api_key"] == "sk-test"
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py::test_openai_apikey_adapter_stream -q`
+Expected: FAIL — adapter `stream` falls through to default `StreamingUnsupported`
+
+- [ ] **Step 3: Implement `stream()` in `openai_apikey.py`**
+
+Add to imports:
+
+```python
+from collections.abc import AsyncIterator
+
+from app.services.llm.adapters._httpx_openai import (
+ build_healthcheck_request,
+ call_openai_chat,
+ stream_openai_chat,
+)
+from app.services.llm.base import ChatResponseChunk
+```
+
+(Merge with the existing `_httpx_openai` import — add `stream_openai_chat`.)
+
+Add the method to `OpenAIApiKeyAdapter` (after `health_check`):
+
+```python
+ async def stream(self, request: ChatRequest) -> AsyncIterator[ChatResponseChunk]:
+ api_key = self._extract_api_key()
+ async for chunk in stream_openai_chat(
+ base_url=OPENAI_BASE_URL,
+ api_key=api_key,
+ request=request,
+ fallback_model=self.connector.model_hint or DEFAULT_MODEL,
+ max_tokens_field=_MAX_TOKENS_FIELD,
+ ):
+ yield chunk
+```
+
+- [ ] **Step 4: Implement `stream()` in `openai_compatible.py`**
+
+Add to imports:
+
+```python
+from collections.abc import AsyncIterator
+
+from app.services.llm.adapters._httpx_openai import (
+ build_healthcheck_request,
+ call_openai_chat,
+ stream_openai_chat,
+)
+from app.services.llm.base import ChatResponseChunk
+```
+
+Add the method to `OpenAICompatibleAdapter` (after `health_check`):
+
+```python
+ async def stream(self, request: ChatRequest) -> AsyncIterator[ChatResponseChunk]:
+ base_url, bearer = self._extract_credentials()
+ async for chunk in stream_openai_chat(
+ base_url=base_url,
+ api_key=bearer,
+ request=request,
+ fallback_model=self.connector.model_hint or DEFAULT_MODEL,
+ ):
+ yield chunk
+```
+
+- [ ] **Step 5: Add a compatible-adapter stream test**
+
+Append to `server/tests/test_llm_streaming.py`:
+
+```python
+@pytest.mark.asyncio
+async def test_openai_compatible_adapter_stream(monkeypatch):
+ import json as _json
+
+ from app.models.llm_connector import LlmConnector
+ from app.services.llm.adapters import openai_compatible
+ from app.services.llm.base import ChatResponseChunk
+
+ async def fake_stream(**kwargs):
+ assert kwargs["base_url"] == "http://127.0.0.1:1234/v1"
+ yield ChatResponseChunk(text_delta="hey", done=False)
+ yield ChatResponseChunk(stop_reason="end_turn", done=True)
+
+ monkeypatch.setattr(openai_compatible, "stream_openai_chat", fake_stream)
+
+ connector = LlmConnector(
+ user_id=1,
+ connector_type="openai_compatible",
+ display_name="local",
+ status="active",
+ credentials=_json.dumps({"base_url": "http://127.0.0.1:1234/v1"}),
+ model_hint="local-model",
+ )
+ adapter = openai_compatible.OpenAICompatibleAdapter(connector)
+ req = ChatRequest(messages=[Message(role="user", content="hi")])
+ chunks = [c async for c in adapter.stream(req)]
+ assert "".join(c.text_delta for c in chunks) == "hey"
+ assert chunks[-1].done is True
+```
+
+- [ ] **Step 6: Run tests to verify they pass**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -k "openai_apikey_adapter_stream or openai_compatible_adapter_stream" -q`
+Expected: PASS (2 tests)
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add server/app/services/llm/adapters/openai_apikey.py server/app/services/llm/adapters/openai_compatible.py server/tests/test_llm_streaming.py
+git commit -m "feat(llm): streaming for OpenAI platform + compatible adapters"
+```
+
+---
+
+## Task 6: Anthropic adapter `stream()`
+
+**Files:**
+- Modify: `server/app/services/llm/adapters/anthropic_apikey.py`
+- Test: `server/tests/test_llm_streaming.py`
+
+- [ ] **Step 1: Write the failing test (fake SDK event stream)**
+
+Append to `server/tests/test_llm_streaming.py`:
+
+```python
+class _FakeEvent:
+ """Stand-in for an anthropic SDK stream event (attribute access)."""
+
+ def __init__(self, **kw):
+ for k, v in kw.items():
+ setattr(self, k, v)
+
+
+class _FakeAnthropicStream:
+ def __init__(self, events):
+ self._events = events
+
+ async def __aenter__(self):
+ return self
+
+ async def __aexit__(self, *exc):
+ return False
+
+ async def __aiter__(self):
+ for e in self._events:
+ yield e
+
+
+def _anthropic_text_events():
+ return [
+ _FakeEvent(type="message_start"),
+ _FakeEvent(
+ type="content_block_start",
+ index=0,
+ content_block=_FakeEvent(type="text", text=""),
+ ),
+ _FakeEvent(
+ type="content_block_delta",
+ index=0,
+ delta=_FakeEvent(type="text_delta", text="Hel"),
+ ),
+ _FakeEvent(
+ type="content_block_delta",
+ index=0,
+ delta=_FakeEvent(type="text_delta", text="lo"),
+ ),
+ _FakeEvent(type="content_block_stop", index=0),
+ _FakeEvent(
+ type="message_delta",
+ delta=_FakeEvent(stop_reason="end_turn"),
+ usage=_FakeEvent(output_tokens=5),
+ ),
+ _FakeEvent(type="message_stop"),
+ ]
+
+
+@pytest.mark.asyncio
+async def test_anthropic_adapter_stream_text(monkeypatch):
+ import json as _json
+
+ from app.models.llm_connector import LlmConnector
+ from app.services.llm.adapters import anthropic_apikey
+
+ class _FakeMessages:
+ def stream(self, **kwargs):
+ return _FakeAnthropicStream(_anthropic_text_events())
+
+ class _FakeClient:
+ def __init__(self, *a, **k):
+ self.messages = _FakeMessages()
+
+ async def __aenter__(self):
+ return self
+
+ async def __aexit__(self, *exc):
+ return False
+
+ monkeypatch.setattr(anthropic_apikey, "AsyncAnthropic", _FakeClient)
+
+ connector = LlmConnector(
+ user_id=1,
+ connector_type="anthropic_apikey",
+ display_name="claude",
+ status="active",
+ credentials=_json.dumps({"api_key": "sk-ant-test"}),
+ model_hint="claude-x",
+ )
+ adapter = anthropic_apikey.AnthropicApiKeyAdapter(connector)
+ req = ChatRequest(messages=[Message(role="user", content="hi")])
+ chunks = [c async for c in adapter.stream(req)]
+ assert "".join(c.text_delta for c in chunks) == "Hello"
+ assert chunks[-1].done is True
+ assert chunks[-1].stop_reason == "end_turn"
+ assert chunks[-1].usage.completion == 5
+
+
+def _anthropic_tool_events():
+ return [
+ _FakeEvent(type="message_start"),
+ _FakeEvent(
+ type="content_block_start",
+ index=0,
+ content_block=_FakeEvent(type="tool_use", id="toolu_1", name="search"),
+ ),
+ _FakeEvent(
+ type="content_block_delta",
+ index=0,
+ delta=_FakeEvent(type="input_json_delta", partial_json='{"q":'),
+ ),
+ _FakeEvent(
+ type="content_block_delta",
+ index=0,
+ delta=_FakeEvent(type="input_json_delta", partial_json='"house"}'),
+ ),
+ _FakeEvent(type="content_block_stop", index=0),
+ _FakeEvent(
+ type="message_delta",
+ delta=_FakeEvent(stop_reason="tool_use"),
+ usage=_FakeEvent(output_tokens=9),
+ ),
+ _FakeEvent(type="message_stop"),
+ ]
+
+
+@pytest.mark.asyncio
+async def test_anthropic_adapter_stream_tool_use(monkeypatch):
+ import json as _json
+
+ from app.models.llm_connector import LlmConnector
+ from app.services.llm.adapters import anthropic_apikey
+
+ class _FakeMessages:
+ def stream(self, **kwargs):
+ return _FakeAnthropicStream(_anthropic_tool_events())
+
+ class _FakeClient:
+ def __init__(self, *a, **k):
+ self.messages = _FakeMessages()
+
+ async def __aenter__(self):
+ return self
+
+ async def __aexit__(self, *exc):
+ return False
+
+ monkeypatch.setattr(anthropic_apikey, "AsyncAnthropic", _FakeClient)
+
+ connector = LlmConnector(
+ user_id=1,
+ connector_type="anthropic_apikey",
+ display_name="claude",
+ status="active",
+ credentials=_json.dumps({"api_key": "sk-ant-test"}),
+ model_hint="claude-x",
+ )
+ adapter = anthropic_apikey.AnthropicApiKeyAdapter(connector)
+ req = ChatRequest(messages=[Message(role="user", content="hi")])
+ chunks = [c async for c in adapter.stream(req)]
+
+ # Reassemble tool-call fragments by index.
+ frags = [d for c in chunks for d in c.tool_call_deltas]
+ assert frags[0].id == "toolu_1" and frags[0].name == "search"
+ joined = "".join(d.input_json_fragment for d in frags)
+ assert _json.loads(joined) == {"q": "house"}
+ assert chunks[-1].done is True
+ assert chunks[-1].stop_reason == "tool_use"
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -k anthropic_adapter_stream -q`
+Expected: FAIL — `StreamingUnsupported`
+
+- [ ] **Step 3: Implement `stream()` in `anthropic_apikey.py`**
+
+Add to imports (merge with existing):
+
+```python
+from collections.abc import AsyncIterator
+
+from app.services.llm.base import (
+ ChatRequest,
+ ChatResponse,
+ ChatResponseChunk,
+ LlmAdapter,
+ Message,
+ TokenUsage,
+ ToolCallDelta,
+)
+from app.services.llm.exceptions import (
+ AuthInvalid,
+ ProviderUnavailable,
+ QuotaExceeded,
+ RateLimited,
+ ToolTranslationError,
+)
+```
+
+Add a module-level finish-reason map near `DEFAULT_MODEL`:
+
+```python
+_STREAM_FINISH_REASON = {
+ "end_turn": "end_turn",
+ "stop_sequence": "end_turn",
+ "tool_use": "tool_use",
+ "max_tokens": "max_tokens",
+}
+```
+
+Add the method (after `health_check`):
+
+```python
+ async def stream(self, request: ChatRequest) -> AsyncIterator[ChatResponseChunk]:
+ model = request.model or self.connector.model_hint or DEFAULT_MODEL
+ max_tokens = request.max_tokens or DEFAULT_MAX_TOKENS
+ timeout = min(
+ max(request.timeout_seconds or DEFAULT_TIMEOUT_SECONDS, 1.0),
+ MAX_TIMEOUT_SECONDS,
+ )
+
+ anthropic_messages = to_anthropic_messages(request.messages)
+ tools, choice = to_anthropic_tools(request.tools, request.force_tool)
+
+ kwargs: dict[str, Any] = {
+ "model": model,
+ "max_tokens": max_tokens,
+ "messages": anthropic_messages,
+ }
+ if request.system:
+ kwargs["system"] = request.system
+ if request.temperature is not None:
+ kwargs["temperature"] = request.temperature
+ if tools:
+ kwargs["tools"] = tools
+ if choice is not None:
+ kwargs["tool_choice"] = choice
+
+ # Per-content-block index → tool id/name (sent once at block start).
+ block_index_to_tool: dict[int, str] = {}
+ stop_reason: str | None = None
+ output_tokens: int | None = None
+
+ try:
+ async with self._client(timeout=timeout) as client:
+ async with client.messages.stream(**kwargs) as stream:
+ async for event in stream:
+ etype = getattr(event, "type", None)
+ if etype == "content_block_start":
+ block = getattr(event, "content_block", None)
+ if getattr(block, "type", None) == "tool_use":
+ idx = int(getattr(event, "index", 0))
+ tool_id = getattr(block, "id", None)
+ name = getattr(block, "name", None)
+ block_index_to_tool[idx] = name or ""
+ yield ChatResponseChunk(
+ tool_call_deltas=[
+ ToolCallDelta(index=idx, id=tool_id, name=name)
+ ]
+ )
+ elif etype == "content_block_delta":
+ delta = getattr(event, "delta", None)
+ dtype = getattr(delta, "type", None)
+ if dtype == "text_delta":
+ yield ChatResponseChunk(
+ text_delta=getattr(delta, "text", "") or ""
+ )
+ elif dtype == "input_json_delta":
+ idx = int(getattr(event, "index", 0))
+ yield ChatResponseChunk(
+ tool_call_deltas=[
+ ToolCallDelta(
+ index=idx,
+ input_json_fragment=getattr(
+ delta, "partial_json", ""
+ )
+ or "",
+ )
+ ]
+ )
+ elif etype == "message_delta":
+ delta = getattr(event, "delta", None)
+ sr = getattr(delta, "stop_reason", None)
+ if sr is not None:
+ stop_reason = sr
+ usage = getattr(event, "usage", None)
+ if usage is not None:
+ ot = getattr(usage, "output_tokens", None)
+ if ot is not None:
+ output_tokens = int(ot)
+ except APITimeoutError as exc:
+ raise ProviderUnavailable("Upstream timeout") from exc
+ except APIConnectionError as exc:
+ raise ProviderUnavailable("Upstream network error") from exc
+ except APIStatusError as exc:
+ self._raise_for_status(exc)
+ except APIError as exc:
+ raise ProviderUnavailable(
+ f"Anthropic API error: {type(exc).__name__}"
+ ) from exc
+
+ canonical_stop = _STREAM_FINISH_REASON.get(stop_reason or "", "end_turn")
+ if block_index_to_tool and canonical_stop != "tool_use":
+ canonical_stop = "tool_use"
+ final_usage = (
+ TokenUsage(prompt=0, completion=output_tokens)
+ if output_tokens is not None
+ else None
+ )
+ yield ChatResponseChunk(
+ stop_reason=canonical_stop,
+ usage=final_usage,
+ done=True,
+ )
+```
+
+Note: Anthropic streams `output_tokens` in `message_delta` but `input_tokens` only in `message_start.usage`. For the counts-only call log this completion count is sufficient; prompt is recorded as 0 when unavailable. (Documented as a design decision.)
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -k anthropic_adapter_stream -q`
+Expected: PASS (2 tests)
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add server/app/services/llm/adapters/anthropic_apikey.py server/tests/test_llm_streaming.py
+git commit -m "feat(llm): Anthropic provider-native streaming (text + tool_use deltas)"
+```
+
+---
+
+## Task 7: `Gateway.stream` + `_attempt_stream` with counts-only logging
+
+**Files:**
+- Modify: `server/app/services/llm/gateway.py`
+- Test: `server/tests/test_llm_gateway_stream.py`
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+# server/tests/test_llm_gateway_stream.py
+"""Tests for Gateway.stream — resolution mirrors dispatch, counts-only logging."""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from app.models.llm_connector import LlmCallLog, LlmConnector
+from app.models.user import User
+from app.services.auth import get_password_hash
+from app.services.llm.adapters.openai_apikey import OpenAIApiKeyAdapter
+from app.services.llm.base import ChatRequest, ChatResponseChunk, Message, TokenUsage
+from app.services.llm.exceptions import NoLlmConfigured, ProviderUnavailable
+from app.services.llm.gateway import Gateway
+
+
+@pytest.fixture
+def dj_user(db) -> User:
+ user = User(
+ username="streamdj",
+ password_hash=get_password_hash("password123"),
+ role="dj",
+ )
+ db.add(user)
+ db.commit()
+ db.refresh(user)
+ return user
+
+
+def _make_connector(db, user, **kw) -> LlmConnector:
+ row = LlmConnector(
+ user_id=user.id,
+ connector_type=kw.get("connector_type", "openai_apikey"),
+ display_name=kw.get("display_name", "Test"),
+ status=kw.get("status", "active"),
+ credentials=json.dumps({"api_key": "sk-fake"}),
+ model_hint="gpt-5-mini",
+ )
+ db.add(row)
+ db.commit()
+ db.refresh(row)
+ return row
+
+
+def _fake_stream(chunks):
+ async def _gen(self, request):
+ for c in chunks:
+ yield c
+
+ return _gen
+
+
+@pytest.mark.asyncio
+async def test_stream_no_actor_no_default_raises(db):
+ req = ChatRequest(messages=[Message(role="user", content="hi")])
+ with pytest.raises(NoLlmConfigured):
+ async for _ in Gateway.stream(db, None, req, purpose="test"):
+ pass
+
+
+@pytest.mark.asyncio
+async def test_stream_dispatches_and_logs_counts_only(db, dj_user, monkeypatch):
+ connector = _make_connector(db, dj_user)
+ chunks = [
+ ChatResponseChunk(text_delta="Hel"),
+ ChatResponseChunk(text_delta="lo"),
+ ChatResponseChunk(
+ stop_reason="end_turn",
+ usage=TokenUsage(prompt=4, completion=2),
+ done=True,
+ ),
+ ]
+ monkeypatch.setattr(OpenAIApiKeyAdapter, "stream", _fake_stream(chunks))
+
+ req = ChatRequest(messages=[Message(role="user", content="hi")])
+ out = [c async for c in Gateway.stream(db, dj_user, req, purpose="recommendation")]
+ assert "".join(c.text_delta for c in out) == "Hello"
+
+ log = db.query(LlmCallLog).filter(LlmCallLog.connector_id == connector.id).one()
+ assert log.status == "ok"
+ assert log.purpose == "recommendation"
+ assert log.tokens_in == 4
+ assert log.tokens_out == 2
+ db.refresh(connector)
+ assert connector.last_used_at is not None
+
+
+@pytest.mark.asyncio
+async def test_stream_error_logs_provider_unavailable(db, dj_user, monkeypatch):
+ connector = _make_connector(db, dj_user)
+
+ async def _boom(self, request):
+ raise ProviderUnavailable("down")
+ yield # pragma: no cover
+
+ monkeypatch.setattr(OpenAIApiKeyAdapter, "stream", _boom)
+
+ req = ChatRequest(messages=[Message(role="user", content="hi")])
+ with pytest.raises(ProviderUnavailable):
+ async for _ in Gateway.stream(db, dj_user, req, purpose="test"):
+ pass
+
+ log = db.query(LlmCallLog).filter(LlmCallLog.connector_id == connector.id).one()
+ assert log.status == "provider_unavailable"
+
+
+@pytest.mark.asyncio
+async def test_stream_consumer_cancel_logs_and_propagates(db, dj_user, monkeypatch):
+ """Consumer stops early (client disconnect) → GeneratorExit, log written once."""
+ connector = _make_connector(db, dj_user)
+
+ async def _infinite(self, request):
+ i = 0
+ while True:
+ yield ChatResponseChunk(text_delta=str(i))
+ i += 1
+
+ monkeypatch.setattr(OpenAIApiKeyAdapter, "stream", _infinite)
+
+ req = ChatRequest(messages=[Message(role="user", content="hi")])
+ agen = Gateway.stream(db, dj_user, req, purpose="test")
+ first = await agen.__anext__()
+ assert first.text_delta == "0"
+ await agen.aclose() # simulate client disconnect
+
+ log = db.query(LlmCallLog).filter(LlmCallLog.connector_id == connector.id).one()
+ assert log.status in ("ok", "cancelled")
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_gateway_stream.py -q`
+Expected: FAIL — `Gateway.stream` not defined
+
+- [ ] **Step 3: Implement `Gateway.stream` + `_attempt_stream`**
+
+Add imports at the top of `server/app/services/llm/gateway.py` (merge):
+
+```python
+from collections.abc import AsyncIterator
+
+from app.services.llm.base import ChatRequest, ChatResponse, ChatResponseChunk
+```
+
+Add a `stream` staticmethod inside `class Gateway` (after `dispatch`):
+
+```python
+ @staticmethod
+ async def stream(
+ db: Session,
+ actor: User | None,
+ request: ChatRequest,
+ *,
+ purpose: str,
+ ) -> AsyncIterator[ChatResponseChunk]:
+ """Stream a chat response, mirroring ``dispatch`` resolution + logging.
+
+ Resolution is identical to ``dispatch`` (per-DJ default → MRU → org
+ default). Logging differs only in timing: a single counts-only
+ ``llm_call_log`` row is written when the stream finishes (success),
+ errors, or is cancelled by the consumer (client disconnect → the async
+ generator is closed and ``GeneratorExit`` fires the ``finally``).
+
+ Auto-fallback (``fallback_policy``) is intentionally NOT applied to
+ streaming: chunks have already been delivered to the consumer by the
+ time a mid-stream error surfaces, so transparently restarting on another
+ connector would corrupt the output. Streaming always fails fast.
+ """
+ primary = _resolve_connector(db, actor)
+ actor_id = actor.id if actor else _system_actor_id(db, primary)
+ async for chunk in _attempt_stream(
+ db, primary, request, purpose=purpose, actor_id=actor_id
+ ):
+ yield chunk
+```
+
+Add the module-level `_attempt_stream` async generator (after `_attempt`):
+
+```python
+async def _attempt_stream(
+ db: Session,
+ connector: LlmConnector,
+ request: ChatRequest,
+ *,
+ purpose: str,
+ actor_id: int,
+) -> AsyncIterator[ChatResponseChunk]:
+ """Run a single adapter stream, logging exactly one outcome row.
+
+ The call log is written in a ``finally`` so it fires on success, on a typed
+ error, AND on consumer cancellation (``GeneratorExit`` raised into the
+ generator when the SSE client disconnects). The status reflects which path
+ fired; counts come only from a terminal chunk's ``usage`` (never content).
+ """
+ adapter_cls = get_adapter_class(connector.connector_type)
+ adapter = adapter_cls(connector)
+
+ started = monotonic()
+ status = "ok"
+ error_code: str | None = None
+ tokens_in: int | None = None
+ tokens_out: int | None = None
+ auth_failed = False
+
+ try:
+ async for chunk in adapter.stream(request):
+ if chunk.usage is not None:
+ tokens_in = chunk.usage.prompt
+ tokens_out = chunk.usage.completion
+ yield chunk
+ except GeneratorExit:
+ # Consumer disconnected — record as cancelled and re-raise so the
+ # adapter's own finally/cleanup closes the upstream connection.
+ status = "cancelled"
+ error_code = "client_disconnect"
+ raise
+ except AuthInvalid:
+ status = "auth_invalid"
+ error_code = "401"
+ auth_failed = True
+ raise
+ except RateLimited as exc:
+ status = "rate_limited"
+ error_code = str(exc.retry_after_seconds or "")
+ raise
+ except QuotaExceeded:
+ status = "quota_exceeded"
+ error_code = "402"
+ raise
+ except ProviderUnavailable as exc:
+ status = "provider_unavailable"
+ error_code = type(exc).__name__
+ raise
+ except ToolTranslationError:
+ status = "tool_translation_error"
+ error_code = "translation"
+ raise
+ except LlmError:
+ status = "error"
+ error_code = "llm_error"
+ raise
+ finally:
+ latency_ms = int((monotonic() - started) * 1000)
+ if status == "ok":
+ connector.last_used_at = utcnow()
+ connector.last_error = None
+ if auth_failed:
+ connector.status = STATUS_AUTH_INVALID
+ connector.last_error = "auth_invalid"
+ log_call(
+ db,
+ connector_id=connector.id,
+ purpose=purpose,
+ status=status,
+ latency_ms=latency_ms,
+ tokens_in=tokens_in if status == "ok" else None,
+ tokens_out=tokens_out if status == "ok" else None,
+ error_code=error_code,
+ )
+ if auth_failed:
+ audit_event(
+ db,
+ actor_user_id=actor_id,
+ target_connector_id=connector.id,
+ event_type=AUDIT_AUTH_INVALID_OBSERVED,
+ )
+ db.commit()
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_gateway_stream.py -q`
+Expected: PASS (4 tests)
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add server/app/services/llm/gateway.py server/tests/test_llm_gateway_stream.py
+git commit -m "feat(llm): Gateway.stream with counts-only logging + cancellation"
+```
+
+---
+
+## Task 8: SSE backend endpoint `POST /api/llm/connectors/{id}/stream-test`
+
+**Files:**
+- Modify: `server/app/api/llm.py`
+- Test: `server/tests/test_llm_stream_endpoint.py`
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+# server/tests/test_llm_stream_endpoint.py
+"""SSE stream-test endpoint: auth, content-type, body shape, ownership."""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from app.models.llm_connector import LlmConnector
+from app.services.llm.adapters.openai_apikey import OpenAIApiKeyAdapter
+from app.services.llm.base import ChatResponseChunk, TokenUsage
+
+
+def _make_connector(db, user) -> LlmConnector:
+ row = LlmConnector(
+ user_id=user.id,
+ connector_type="openai_apikey",
+ display_name="Test",
+ status="active",
+ credentials=json.dumps({"api_key": "sk-fake"}),
+ model_hint="gpt-5-mini",
+ )
+ db.add(row)
+ db.commit()
+ db.refresh(row)
+ return row
+
+
+def test_stream_test_requires_auth(client, db, test_user):
+ connector = _make_connector(db, test_user)
+ resp = client.post(f"/api/llm/connectors/{connector.id}/stream-test")
+ assert resp.status_code == 401
+
+
+def test_stream_test_404_for_unowned(client, db, test_user, auth_headers):
+ # Connector owned by a different user.
+ from app.models.user import User
+ from app.services.auth import get_password_hash
+
+ other = User(username="other", password_hash=get_password_hash("x123456789"), role="dj")
+ db.add(other)
+ db.commit()
+ db.refresh(other)
+ connector = _make_connector(db, other)
+ resp = client.post(
+ f"/api/llm/connectors/{connector.id}/stream-test", headers=auth_headers
+ )
+ assert resp.status_code == 404
+
+
+def test_stream_test_streams_chunks(client, db, test_user, auth_headers, monkeypatch):
+ connector = _make_connector(db, test_user)
+
+ async def _fake_stream(self, request):
+ yield ChatResponseChunk(text_delta="Hi")
+ yield ChatResponseChunk(text_delta=" there")
+ yield ChatResponseChunk(
+ stop_reason="end_turn", usage=TokenUsage(prompt=2, completion=2), done=True
+ )
+
+ monkeypatch.setattr(OpenAIApiKeyAdapter, "stream", _fake_stream)
+
+ resp = client.post(
+ f"/api/llm/connectors/{connector.id}/stream-test", headers=auth_headers
+ )
+ assert resp.status_code == 200
+ assert resp.headers["content-type"].startswith("text/event-stream")
+ body = resp.text
+ # Each SSE event line starts with "data:". Reconstruct the JSON payloads.
+ payloads = [
+ json.loads(line[len("data:") :].strip())
+ for line in body.splitlines()
+ if line.startswith("data:")
+ ]
+ text = "".join(p.get("text_delta", "") for p in payloads)
+ assert "Hi there" in text
+ assert any(p.get("done") for p in payloads)
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_stream_endpoint.py -q`
+Expected: FAIL — 404/405 (endpoint missing)
+
+- [ ] **Step 3: Implement the SSE endpoint in `llm.py`**
+
+Add imports (merge with existing):
+
+```python
+import json as _json
+
+from sse_starlette.sse import EventSourceResponse
+
+from app.models.user import User
+from app.services.llm.base import ChatRequest, Message
+from app.services.llm.exceptions import LlmError, NoLlmConfigured
+from app.services.llm.gateway import Gateway
+```
+
+Add the endpoint (place after `test_connector`):
+
+```python
+# A short, fixed prompt for the streaming health probe. Streams a single
+# sentence so the DJ sees tokens arrive in real time, exercising the full
+# resolve → adapter.stream → SSE path end-to-end.
+_STREAM_TEST_PROMPT = "Reply with one short friendly sentence confirming you are online."
+
+
+@router.post("/connectors/{connector_id}/stream-test")
+@limiter.limit("10/minute")
+async def stream_test_connector(
+ request: FastAPIRequest,
+ connector_id: int,
+ user: User = Depends(get_current_active_user),
+ db: Session = Depends(get_db),
+) -> EventSourceResponse:
+ """Stream a short sentence through the connector as ``text/event-stream``.
+
+ Validates ownership up front (404 for connectors the DJ doesn't own — never
+ leaks existence). Each SSE ``data:`` frame is a JSON ``ChatResponseChunk``.
+ On a typed gateway error, a terminal ``event: error`` frame is emitted with a
+ sanitised code (never the upstream payload), then the stream ends. Client
+ disconnect cancels the upstream provider request (the gateway generator's
+ ``finally`` writes the counts-only call log + closes the adapter).
+ """
+ row = _get_owned_connector_or_404(db, connector_id, user.id)
+
+ chat_request = ChatRequest(
+ messages=[Message(role="user", content=_STREAM_TEST_PROMPT)],
+ max_tokens=64,
+ temperature=0.0,
+ model=row.model_hint or None,
+ )
+
+ async def _publisher():
+ try:
+ async for chunk in Gateway.stream(
+ db, user, chat_request, purpose="stream_test"
+ ):
+ yield {"data": _json.dumps(chunk.model_dump())}
+ except NoLlmConfigured:
+ yield {"event": "error", "data": _json.dumps({"code": "no_connector"})}
+ except LlmError as exc:
+ # Map to a sanitised, stable code — never echo the provider message.
+ code = type(exc).__name__
+ logger.info("stream-test failed for connector %s: %s", connector_id, code)
+ yield {"event": "error", "data": _json.dumps({"code": code})}
+
+ return EventSourceResponse(
+ _publisher(),
+ media_type="text/event-stream",
+ headers={"X-Accel-Buffering": "no"},
+ )
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_stream_endpoint.py -q`
+Expected: PASS (3 tests)
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add server/app/api/llm.py server/tests/test_llm_stream_endpoint.py
+git commit -m "feat(api): authenticated SSE stream-test endpoint for connectors"
+```
+
+---
+
+## Task 9: Backend CI green (ruff / format / bandit / full pytest)
+
+**Files:** none new — fix-ups only.
+
+- [ ] **Step 1: Auto-format + lint-fix**
+
+Run: `cd server && .venv/bin/ruff format . && .venv/bin/ruff check --fix .`
+
+- [ ] **Step 2: Lint check**
+
+Run: `cd server && .venv/bin/ruff check . && .venv/bin/ruff format --check .`
+Expected: no errors. If `_normalise_finish_reason` import triggers a private-import lint (PLC2701), keep the `# noqa` already added in Task 3, or inline a local copy of the 4-line mapping function into `streaming.py` to avoid importing a private name.
+
+- [ ] **Step 3: Bandit**
+
+Run: `cd server && .venv/bin/bandit -r app -c pyproject.toml -q`
+Expected: no new findings (the `# nosec B106` on the Authorization header is preserved).
+
+- [ ] **Step 4: Full backend test suite + coverage gate**
+
+Run: `cd server && .venv/bin/pytest --tb=short -q`
+Expected: PASS, coverage ≥ gate. If new streaming files drag coverage, the dedicated stream tests above should cover them; add targeted tests for any uncovered branch the report flags.
+
+- [ ] **Step 5: Commit any fix-ups**
+
+```bash
+git add -A
+git commit -m "chore(llm): backend lint/format/coverage fix-ups for streaming"
+```
+
+---
+
+## Task 10: Frontend SSE consumer `streamConnectorTest`
+
+**Files:**
+- Modify: `dashboard/lib/api.ts`
+- Test: `dashboard/lib/__tests__/api.test.ts` (append)
+
+- [ ] **Step 1: Write the failing test**
+
+Append to `dashboard/lib/__tests__/api.test.ts` (match the file's existing import + setup style):
+
+```typescript
+describe('streamConnectorTest', () => {
+ it('parses SSE data frames and invokes onChunk per frame', async () => {
+ const sse =
+ 'data: {"text_delta":"Hi","done":false}\n\n' +
+ 'data: {"text_delta":" there","done":false}\n\n' +
+ 'data: {"text_delta":"","stop_reason":"end_turn","done":true}\n\n';
+ const encoder = new TextEncoder();
+ const stream = new ReadableStream({
+ start(controller) {
+ controller.enqueue(encoder.encode(sse));
+ controller.close();
+ },
+ });
+ const fetchMock = vi.fn().mockResolvedValue(
+ new Response(stream, {
+ status: 200,
+ headers: { 'Content-Type': 'text/event-stream' },
+ }),
+ );
+ vi.stubGlobal('fetch', fetchMock);
+
+ apiClient.setToken('jwt-token');
+ const chunks: Array<{ text_delta?: string; done?: boolean }> = [];
+ await apiClient.streamConnectorTest(7, (c) => chunks.push(c));
+
+ expect(chunks.map((c) => c.text_delta).join('')).toBe('Hi there');
+ expect(chunks.at(-1)?.done).toBe(true);
+ // Auth header present.
+ const init = fetchMock.mock.calls[0][1] as RequestInit;
+ const headers = new Headers(init.headers);
+ expect(headers.get('Authorization')).toBe('Bearer jwt-token');
+ vi.unstubAllGlobals();
+ });
+});
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd dashboard && npm test -- --run lib/__tests__/api.test.ts`
+Expected: FAIL — `apiClient.streamConnectorTest is not a function`
+
+- [ ] **Step 3: Add the type + method to `api.ts`**
+
+Add near the other LLM types (search for `LlmConnectorTestResult`):
+
+```typescript
+export interface LlmStreamChunk {
+ text_delta?: string;
+ tool_call_deltas?: Array<{
+ index: number;
+ id?: string | null;
+ name?: string | null;
+ input_json_fragment?: string;
+ }>;
+ stop_reason?: 'end_turn' | 'tool_use' | 'max_tokens' | 'error' | null;
+ usage?: { prompt: number; completion: number } | null;
+ done?: boolean;
+}
+```
+
+Add the method to the `ApiClient` class (near `testLlmConnector`):
+
+```typescript
+ /**
+ * Stream a short health-check sentence through a connector via SSE.
+ *
+ * Uses fetch + ReadableStream rather than EventSource because EventSource
+ * cannot send the Authorization header this authenticated endpoint requires.
+ * Pass an AbortSignal to cancel — aborting closes the connection, which the
+ * backend treats as a client disconnect and cancels the upstream provider
+ * request. ``onChunk`` is invoked for every parsed SSE data frame.
+ */
+ async streamConnectorTest(
+ id: number,
+ onChunk: (chunk: LlmStreamChunk) => void,
+ signal?: AbortSignal,
+ ): Promise {
+ const headers = new Headers({ Accept: 'text/event-stream' });
+ if (this.token) headers.set('Authorization', `Bearer ${this.token}`);
+
+ const response = await fetch(
+ `${getApiUrl()}/api/llm/connectors/${id}/stream-test`,
+ { method: 'POST', headers, signal },
+ );
+ if (!response.ok || !response.body) {
+ if (response.status === 401 && this.onUnauthorized) this.onUnauthorized();
+ throw new ApiError('Stream test failed', response.status);
+ }
+
+ const reader = response.body.getReader();
+ const decoder = new TextDecoder();
+ let buffer = '';
+ try {
+ while (true) {
+ const { done, value } = await reader.read();
+ if (done) break;
+ buffer += decoder.decode(value, { stream: true });
+ // SSE frames are separated by a blank line.
+ let sep: number;
+ while ((sep = buffer.indexOf('\n\n')) !== -1) {
+ const frame = buffer.slice(0, sep);
+ buffer = buffer.slice(sep + 2);
+ for (const line of frame.split('\n')) {
+ if (!line.startsWith('data:')) continue;
+ const data = line.slice('data:'.length).trim();
+ if (!data || data === '[DONE]') continue;
+ try {
+ onChunk(JSON.parse(data) as LlmStreamChunk);
+ } catch {
+ // Ignore unparseable keepalive frames.
+ }
+ }
+ }
+ }
+ } finally {
+ reader.releaseLock();
+ }
+ }
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd dashboard && npm test -- --run lib/__tests__/api.test.ts`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add dashboard/lib/api.ts dashboard/lib/__tests__/api.test.ts
+git commit -m "feat(ai-ui): SSE stream consumer for connector stream-test"
+```
+
+---
+
+## Task 11: Minimal UI consumer wiring (admin/ai stream test) + scope note
+
+**Files:**
+- Modify: `dashboard/app/admin/ai/page.tsx`
+
+**Decision:** The recommendation flow is a backend background pipeline that returns a final JSON payload to the UI (not a live token feed), so retrofitting it to SSE would be a large, risky change outside this issue's intent. Per the issue's "use reasonable judgment on scope and document it", the frontend consumer is the reusable `apiClient.streamConnectorTest` plumbing (Task 10) plus a minimal live "Stream test" affordance on the existing AI settings surface. The recommendation UI migration to SSE is explicitly deferred (future set-builder UI, §11.6) and noted in the PR body.
+
+- [ ] **Step 1: Read the admin/ai page to find the connector row / actions area**
+
+Run: `cd dashboard && grep -n "testLlmConnector\|Test\|connector" app/admin/ai/page.tsx | head -30`
+
+- [ ] **Step 2: Add a "Stream test" button that appends streamed text into local state**
+
+Add (adapt names to the file's existing component structure — this is the behavior to wire, not a verbatim drop-in):
+
+```tsx
+// Local state near the component's other useState hooks:
+const [streamText, setStreamText] = useState('');
+const [streaming, setStreaming] = useState(null);
+
+async function handleStreamTest(connectorId: number) {
+ setStreamText('');
+ setStreaming(connectorId);
+ try {
+ await apiClient.streamConnectorTest(connectorId, (chunk) => {
+ if (chunk.text_delta) setStreamText((prev) => prev + chunk.text_delta);
+ });
+ } catch {
+ setStreamText('(stream test failed)');
+ } finally {
+ setStreaming(null);
+ }
+}
+```
+
+And in the per-connector action area, next to the existing test button:
+
+```tsx
+
+{streaming === connector.id && streamText && (
+
{streamText}
+)}
+```
+
+- [ ] **Step 3: Type-check + existing page tests**
+
+Run: `cd dashboard && npx tsc --noEmit`
+Run: `cd dashboard && npm test -- --run app/admin/ai`
+Expected: PASS. If the admin/ai page has snapshot/DOM tests that assert exact button sets, update those fixtures to include the new button.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git checkout dashboard/next-env.d.ts 2>/dev/null || true
+git add dashboard/app/admin/ai/page.tsx
+git commit -m "feat(ai-ui): minimal live stream-test affordance on AI settings"
+```
+
+---
+
+## Task 12: Full local CI sweep + finishing the branch
+
+**Files:** none new.
+
+- [ ] **Step 1: Backend CI**
+
+Run from `server/`:
+```bash
+.venv/bin/ruff check .
+.venv/bin/ruff format --check .
+.venv/bin/bandit -r app -c pyproject.toml -q
+.venv/bin/pytest --tb=short -q
+```
+Expected: all green, coverage gate satisfied.
+
+- [ ] **Step 2: Frontend CI**
+
+Run from `dashboard/`:
+```bash
+npm run lint
+npx tsc --noEmit
+npm test -- --run
+```
+Expected: all green. Then `git checkout dashboard/next-env.d.ts` if auto-modified.
+
+- [ ] **Step 3: Confirm no Alembic migration was introduced**
+
+Run: `cd server && git diff --name-only origin/epic/ai-engine...HEAD | grep alembic || echo "no migrations — correct"`
+Expected: `no migrations — correct` (streaming requires no schema change).
+
+- [ ] **Step 4: Use superpowers:finishing-a-development-branch (option 2: Push + PR)**
+
+Create the PR with `gh pr create --base epic/ai-engine`. PR body must include `Closes #335`, a `## Design decisions` section, and a note that the PR targets `epic/ai-engine`.
+
+---
+
+## Self-Review
+
+**Spec coverage (issue #335 acceptance criteria):**
+- `Gateway.stream(...) -> AsyncIterator[ChatResponseChunk]` → Task 7. ✅
+- `ChatResponseChunk` carries incremental text + partial tool_calls + final stop_reason + usage → Task 1 (model), Tasks 3/4/6 (population). ✅
+- Each adapter implements provider-native streaming (OpenAI, Anthropic, OpenAI-compatible) → Tasks 5, 6. ✅
+- Non-streaming adapters degrade gracefully (`StreamingUnsupported`) → Tasks 1, 2. ✅
+- SSE backend endpoint (text/event-stream) → Task 8. ✅
+- Tool-use mid-stream parses across providers (OpenAI partial JSON, Anthropic delta blocks) → Task 3 (OpenAI tool frags), Task 6 (`input_json_delta`). ✅
+- Cancellation propagates upstream (frontend disconnect → adapter cancels upstream) → Task 7 (`GeneratorExit` → adapter `async with` cleanup closes httpx/SDK stream), Task 10 (`AbortSignal`). ✅
+- Counts-only call log + audit consistency with non-stream path → Task 7 `_attempt_stream`. ✅
+- Frontend consumer → Tasks 10 (plumbing) + 11 (minimal UI, recommendation-migration deferral documented). ✅
+
+**Placeholder scan:** No TBD/TODO. Frontend Task 11 step 2 is explicitly behavior-to-wire (adapt to existing component) because the exact JSX scaffold depends on the live file — the implementer reads it in step 1.
+
+**Type consistency:** `ChatResponseChunk` fields (`text_delta`, `tool_call_deltas`, `stop_reason`, `usage`, `done`) and `ToolCallDelta` fields (`index`, `id`, `name`, `input_json_fragment`) are used identically in base.py, streaming.py, adapters, gateway, endpoint, and frontend type. `stream_openai_chat` signature matches its callers in both OpenAI adapters. `Gateway.stream` / `_attempt_stream` signatures match `dispatch` / `_attempt`.
diff --git a/docs/superpowers/plans/2026-05-28-per-feature-connector-preference.md b/docs/superpowers/plans/2026-05-28-per-feature-connector-preference.md
new file mode 100644
index 00000000..58fc2f36
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-28-per-feature-connector-preference.md
@@ -0,0 +1,1314 @@
+# Per-Feature Connector Preference Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Let each DJ pin a specific LLM connector to a specific agentic feature (e.g. recommendation → connector A, set_builder → connector B), with graceful fallback when the pinned connector is gone or auth-invalid.
+
+**Architecture:** A new `LlmFeaturePreference` table maps `(user_id, feature) → connector_id` with a UNIQUE constraint. `Gateway.dispatch` already receives `purpose` (the feature key), so resolution gains a new first step: look up the DJ's pinned connector for `purpose`, use it if active, else fall through to the existing chain (per-DJ default → MRU → org default → `NoLlmConfigured`). New `/api/llm/feature-preferences` endpoints (set/clear/list) are scoped to the current DJ and validate connector ownership + feature against an allowlist. The DJ AI settings UI gains a "Per-feature defaults" section.
+
+**Tech Stack:** FastAPI, SQLAlchemy 2.0, Alembic, slowapi, Pydantic v2, Next.js/React 19/TypeScript, vitest.
+
+---
+
+## File Structure
+
+**Backend (create):**
+- `server/app/models/llm_feature_preference.py` — the new model + feature allowlist constants.
+- `server/alembic/versions/050_llm_feature_preference.py` — migration (down_revision = `049`).
+- `server/tests/test_llm_feature_preference.py` — model + gateway resolution + endpoint tests.
+
+**Backend (modify):**
+- `server/app/models/__init__.py` — register `LlmFeaturePreference`.
+- `server/app/services/llm/connector_storage.py` — feature-preference CRUD helpers.
+- `server/app/services/llm/gateway.py` — add feature-preference as the first resolution step.
+- `server/app/api/llm.py` — set/clear/list feature-preference endpoints.
+- `server/app/schemas/llm.py` — request/response schemas + known-feature constant.
+
+**Frontend (modify):**
+- `dashboard/lib/api.ts` — `listFeaturePreferences`, `setFeaturePreference`, `clearFeaturePreference`.
+- `dashboard/components/AiProvidersSection.tsx` — "Per-feature defaults" section.
+- `dashboard/lib/api-types.ts` — re-export the new generated schema types.
+- `dashboard/lib/api-types.generated.ts` — regenerated from OpenAPI (via `npm run types:export && npm run types:generate`).
+
+**Design decisions (locked in):**
+- Feature key reuses the gateway `purpose` string. Known features allowlist: `{"recommendation", "set_builder"}`. `recommendation` is the only `purpose` in use today; `set_builder` is named in the issue spec for an upcoming feature. The allowlist lives in one place (`schemas/llm.py`) and is imported by both the API validation and the model docstring reference.
+- The endpoint surface is `POST /api/llm/feature-preferences` (upsert set), `DELETE /api/llm/feature-preferences/{feature}` (clear), `GET /api/llm/feature-preferences` (list). Upsert semantics keep "set" and "change" as one operation (the UNIQUE constraint makes change == replace).
+- Ownership: setting a preference validates the connector belongs to the current DJ (404 if not, mirroring the existing connector-ownership 404 convention so another DJ's connector existence is never leaked).
+- Graceful fallback: gateway resolution skips a pinned preference whose connector is deleted (FK row gone) or whose status != `active`. No exception — falls through to the next resolution step.
+- We do NOT add a frontend "set inactive connector" guard beyond what the picker offers; the gateway already skips inactive pins, and the API rejects pinning a non-active connector with 400 (mirrors the per-DJ default endpoint), so a DJ can't silently break their own routing.
+
+---
+
+## Task 1: LlmFeaturePreference model + feature allowlist
+
+**Files:**
+- Create: `server/app/models/llm_feature_preference.py`
+- Modify: `server/app/models/__init__.py`
+- Test: `server/tests/test_llm_feature_preference.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Create `server/tests/test_llm_feature_preference.py`:
+
+```python
+"""Tests for per-feature connector preference (issue #337)."""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+from sqlalchemy.exc import IntegrityError
+
+from app.models.llm_connector import LlmConnector
+from app.models.llm_feature_preference import KNOWN_FEATURES, LlmFeaturePreference
+from app.models.user import User
+from app.services.auth import get_password_hash
+
+
+@pytest.fixture
+def dj_user(db) -> User:
+ user = User(
+ username="prefdj",
+ password_hash=get_password_hash("password123"),
+ role="dj",
+ )
+ db.add(user)
+ db.commit()
+ db.refresh(user)
+ return user
+
+
+def _make_connector(db, user, *, display_name="Pref connector", status="active"):
+ row = LlmConnector(
+ user_id=user.id,
+ connector_type="openai_apikey",
+ display_name=display_name,
+ status=status,
+ credentials=json.dumps({"api_key": "sk-fake-key"}),
+ model_hint="gpt-5-mini",
+ )
+ db.add(row)
+ db.commit()
+ db.refresh(row)
+ return row
+
+
+def test_known_features_contains_recommendation_and_set_builder():
+ assert "recommendation" in KNOWN_FEATURES
+ assert "set_builder" in KNOWN_FEATURES
+
+
+def test_unique_constraint_one_pref_per_user_feature(db, dj_user):
+ c1 = _make_connector(db, dj_user, display_name="A")
+ c2 = _make_connector(db, dj_user, display_name="B")
+ db.add(
+ LlmFeaturePreference(user_id=dj_user.id, feature="recommendation", connector_id=c1.id)
+ )
+ db.commit()
+ db.add(
+ LlmFeaturePreference(user_id=dj_user.id, feature="recommendation", connector_id=c2.id)
+ )
+ with pytest.raises(IntegrityError):
+ db.commit()
+ db.rollback()
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q`
+Expected: FAIL with `ModuleNotFoundError: No module named 'app.models.llm_feature_preference'`
+
+- [ ] **Step 3: Write the model**
+
+Create `server/app/models/llm_feature_preference.py`:
+
+```python
+"""Per-feature connector preference — pins a DJ's connector to a feature.
+
+A DJ can pin the recommendation engine to one connector and the set-builder
+to another. The gateway consults this table first (keyed by ``purpose``)
+before falling back to the per-DJ default / MRU / org-default chain.
+
+See issue #337, spec §11.8.
+"""
+
+from datetime import datetime
+
+from sqlalchemy import DateTime, ForeignKey, Integer, String, UniqueConstraint, func
+from sqlalchemy.orm import Mapped, mapped_column
+
+from app.models.base import Base
+
+# Allowlist of feature keys a DJ may pin. These mirror the gateway ``purpose``
+# strings. ``recommendation`` is the only purpose dispatched today;
+# ``set_builder`` is reserved for the upcoming set-builder feature (issue spec
+# §11.8). Validation of API input against this set lives in ``schemas/llm.py``
+# (KNOWN_FEATURES is re-exported there to keep a single source of truth).
+KNOWN_FEATURES = frozenset({"recommendation", "set_builder"})
+
+
+class LlmFeaturePreference(Base):
+ """Maps ``(user_id, feature)`` to a pinned ``connector_id``.
+
+ At most one row per ``(user_id, feature)`` — enforced by a UNIQUE
+ constraint. Deleting the connector cascades (ON DELETE CASCADE) so a stale
+ preference never points at a missing connector.
+ """
+
+ __tablename__ = "llm_feature_preferences"
+
+ id: Mapped[int] = mapped_column(Integer, primary_key=True)
+ user_id: Mapped[int] = mapped_column(
+ ForeignKey("users.id", ondelete="CASCADE"), index=True, nullable=False
+ )
+ feature: Mapped[str] = mapped_column(String(40), nullable=False)
+ connector_id: Mapped[int] = mapped_column(
+ ForeignKey("llm_connectors.id", ondelete="CASCADE"), index=True, nullable=False
+ )
+ created_at: Mapped[datetime] = mapped_column(
+ DateTime, nullable=False, server_default=func.now()
+ )
+
+ __table_args__ = (
+ UniqueConstraint("user_id", "feature", name="uq_llm_feature_pref_user_feature"),
+ )
+```
+
+- [ ] **Step 4: Register the model**
+
+Modify `server/app/models/__init__.py` — add the import after the `llm_connector` import line and the name to `__all__` (alphabetical-ish, keep grouped with other Llm names):
+
+```python
+from app.models.llm_connector import LlmAuditEvent, LlmCallLog, LlmConnector
+from app.models.llm_feature_preference import LlmFeaturePreference
+```
+
+And add `"LlmFeaturePreference",` to the `__all__` list (right after `"LlmConnector",`).
+
+- [ ] **Step 5: Run test to verify it passes**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q`
+Expected: PASS (3 tests)
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add server/app/models/llm_feature_preference.py server/app/models/__init__.py server/tests/test_llm_feature_preference.py
+git commit -m "feat(llm): add LlmFeaturePreference model + feature allowlist"
+```
+
+---
+
+## Task 2: Alembic migration
+
+**Files:**
+- Create: `server/alembic/versions/050_llm_feature_preference.py`
+
+- [ ] **Step 1: Write the migration**
+
+Create `server/alembic/versions/050_llm_feature_preference.py`:
+
+```python
+"""Add llm_feature_preferences table.
+
+Revision ID: 050
+Revises: 049
+Create Date: 2026-05-28
+
+Per-feature connector preference (issue #337). Maps ``(user_id, feature)`` to a
+pinned ``connector_id`` with a UNIQUE constraint so a DJ has at most one pinned
+connector per feature. Both FKs cascade on delete so a deleted user or
+connector never leaves a dangling preference.
+"""
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "050"
+down_revision: str | None = "049"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ op.create_table(
+ "llm_feature_preferences",
+ sa.Column("id", sa.Integer(), nullable=False),
+ sa.Column("user_id", sa.Integer(), nullable=False),
+ sa.Column("feature", sa.String(length=40), nullable=False),
+ sa.Column("connector_id", sa.Integer(), nullable=False),
+ sa.Column(
+ "created_at",
+ sa.DateTime(),
+ server_default=sa.func.now(),
+ nullable=False,
+ ),
+ sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
+ sa.ForeignKeyConstraint(["connector_id"], ["llm_connectors.id"], ondelete="CASCADE"),
+ sa.PrimaryKeyConstraint("id"),
+ sa.UniqueConstraint("user_id", "feature", name="uq_llm_feature_pref_user_feature"),
+ )
+ op.create_index(
+ "ix_llm_feature_preferences_user_id",
+ "llm_feature_preferences",
+ ["user_id"],
+ )
+ op.create_index(
+ "ix_llm_feature_preferences_connector_id",
+ "llm_feature_preferences",
+ ["connector_id"],
+ )
+
+
+def downgrade() -> None:
+ op.drop_index("ix_llm_feature_preferences_connector_id", table_name="llm_feature_preferences")
+ op.drop_index("ix_llm_feature_preferences_user_id", table_name="llm_feature_preferences")
+ op.drop_table("llm_feature_preferences")
+```
+
+- [ ] **Step 2: Run migration + drift check**
+
+Run: `cd server && .venv/bin/alembic upgrade head && .venv/bin/alembic check`
+Expected: `upgrade` runs cleanly to revision `050`, and `alembic check` prints `No new upgrade operations detected.`
+
+If `alembic check` reports drift, reconcile the migration columns/indexes with the model (`index=True` on `user_id` and `connector_id` matches the two `create_index` calls).
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add server/alembic/versions/050_llm_feature_preference.py
+git commit -m "feat(llm): migration 050 for llm_feature_preferences"
+```
+
+---
+
+## Task 3: connector_storage CRUD helpers
+
+**Files:**
+- Modify: `server/app/services/llm/connector_storage.py`
+- Test: `server/tests/test_llm_feature_preference.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Append to `server/tests/test_llm_feature_preference.py`:
+
+```python
+def test_set_feature_preference_upserts(db, dj_user):
+ from app.services.llm.connector_storage import (
+ get_feature_preferences_for_user,
+ set_feature_preference,
+ )
+
+ c1 = _make_connector(db, dj_user, display_name="A")
+ c2 = _make_connector(db, dj_user, display_name="B")
+
+ set_feature_preference(db, user_id=dj_user.id, feature="recommendation", connector_id=c1.id)
+ db.commit()
+ prefs = get_feature_preferences_for_user(db, dj_user.id)
+ assert {p.feature: p.connector_id for p in prefs} == {"recommendation": c1.id}
+
+ # Re-set the same feature → replace, not duplicate.
+ set_feature_preference(db, user_id=dj_user.id, feature="recommendation", connector_id=c2.id)
+ db.commit()
+ prefs = get_feature_preferences_for_user(db, dj_user.id)
+ assert {p.feature: p.connector_id for p in prefs} == {"recommendation": c2.id}
+
+
+def test_clear_feature_preference_removes_row(db, dj_user):
+ from app.services.llm.connector_storage import (
+ clear_feature_preference,
+ get_feature_preferences_for_user,
+ set_feature_preference,
+ )
+
+ c1 = _make_connector(db, dj_user, display_name="A")
+ set_feature_preference(db, user_id=dj_user.id, feature="recommendation", connector_id=c1.id)
+ db.commit()
+
+ removed = clear_feature_preference(db, user_id=dj_user.id, feature="recommendation")
+ db.commit()
+ assert removed is True
+ assert get_feature_preferences_for_user(db, dj_user.id) == []
+
+ # Clearing a non-existent preference is a no-op (returns False).
+ assert clear_feature_preference(db, user_id=dj_user.id, feature="recommendation") is False
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q`
+Expected: FAIL with `ImportError: cannot import name 'set_feature_preference'`
+
+- [ ] **Step 3: Add the helpers**
+
+In `server/app/services/llm/connector_storage.py`, add the model import to the existing `from app.models.llm_connector import (...)` block is NOT possible (different module). Add a new import near the top imports:
+
+```python
+from app.models.llm_feature_preference import LlmFeaturePreference
+```
+
+Then add these functions (place them after `unset_default_for_user`):
+
+```python
+def get_feature_preferences_for_user(db: Session, user_id: int) -> list[LlmFeaturePreference]:
+ """Return all of a DJ's per-feature connector pins."""
+ return (
+ db.query(LlmFeaturePreference)
+ .filter(LlmFeaturePreference.user_id == user_id)
+ .order_by(LlmFeaturePreference.feature.asc())
+ .all()
+ )
+
+
+def get_feature_preference(
+ db: Session, *, user_id: int, feature: str
+) -> LlmFeaturePreference | None:
+ """Return the DJ's pin for ``feature``, or ``None`` if unset."""
+ return (
+ db.query(LlmFeaturePreference)
+ .filter(
+ LlmFeaturePreference.user_id == user_id,
+ LlmFeaturePreference.feature == feature,
+ )
+ .one_or_none()
+ )
+
+
+def set_feature_preference(
+ db: Session, *, user_id: int, feature: str, connector_id: int
+) -> LlmFeaturePreference:
+ """Upsert the DJ's pin for ``feature`` → ``connector_id``. Caller commits.
+
+ Replace-in-place when a row already exists so the UNIQUE constraint on
+ ``(user_id, feature)`` is never violated.
+ """
+ existing = get_feature_preference(db, user_id=user_id, feature=feature)
+ if existing is not None:
+ existing.connector_id = connector_id
+ db.flush()
+ return existing
+ row = LlmFeaturePreference(user_id=user_id, feature=feature, connector_id=connector_id)
+ db.add(row)
+ db.flush()
+ return row
+
+
+def clear_feature_preference(db: Session, *, user_id: int, feature: str) -> bool:
+ """Delete the DJ's pin for ``feature``. Returns True iff a row was removed.
+
+ Caller commits.
+ """
+ existing = get_feature_preference(db, user_id=user_id, feature=feature)
+ if existing is None:
+ return False
+ db.delete(existing)
+ db.flush()
+ return True
+```
+
+Add the four function names to the `__all__` list alphabetically:
+`"clear_feature_preference",`, `"get_feature_preference",`, `"get_feature_preferences_for_user",`, `"set_feature_preference",`.
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add server/app/services/llm/connector_storage.py server/tests/test_llm_feature_preference.py
+git commit -m "feat(llm): feature-preference CRUD helpers in connector_storage"
+```
+
+---
+
+## Task 4: Gateway resolution — feature preference first
+
+**Files:**
+- Modify: `server/app/services/llm/gateway.py`
+- Test: `server/tests/test_llm_feature_preference.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Append to `server/tests/test_llm_feature_preference.py`:
+
+```python
+from unittest.mock import AsyncMock, patch # noqa: E402 (grouped with gateway tests)
+
+from app.services.llm.adapters.openai_apikey import OpenAIApiKeyAdapter # noqa: E402
+from app.services.llm.base import ChatRequest, ChatResponse, Message, TokenUsage # noqa: E402
+from app.services.llm.gateway import Gateway # noqa: E402
+
+
+def _ok_response() -> ChatResponse:
+ return ChatResponse(
+ text="ok", tool_calls=[], stop_reason="end_turn", usage=TokenUsage(prompt=1, completion=1)
+ )
+
+
+@pytest.mark.asyncio
+async def test_gateway_prefers_feature_pin_over_default(db, dj_user):
+ from app.services.llm.connector_storage import set_default_for_user, set_feature_preference
+
+ pinned = _make_connector(db, dj_user, display_name="pinned")
+ other = _make_connector(db, dj_user, display_name="default")
+ set_default_for_user(db, connector=other) # per-DJ default points elsewhere
+ set_feature_preference(
+ db, user_id=dj_user.id, feature="recommendation", connector_id=pinned.id
+ )
+ db.commit()
+
+ captured = {}
+
+ async def fake_chat(self, request): # noqa: ANN001
+ captured["connector_id"] = self.connector.id
+ return _ok_response()
+
+ with patch.object(OpenAIApiKeyAdapter, "chat", new=fake_chat):
+ await Gateway.dispatch(
+ db,
+ dj_user,
+ ChatRequest(messages=[Message(role="user", content="hi")]),
+ purpose="recommendation",
+ )
+ assert captured["connector_id"] == pinned.id
+
+
+@pytest.mark.asyncio
+async def test_gateway_falls_back_when_pinned_connector_auth_invalid(db, dj_user):
+ from app.services.llm.connector_storage import set_default_for_user, set_feature_preference
+
+ pinned = _make_connector(db, dj_user, display_name="pinned", status="auth_invalid")
+ fallback = _make_connector(db, dj_user, display_name="fallback")
+ set_default_for_user(db, connector=fallback)
+ set_feature_preference(
+ db, user_id=dj_user.id, feature="recommendation", connector_id=pinned.id
+ )
+ db.commit()
+
+ captured = {}
+
+ async def fake_chat(self, request): # noqa: ANN001
+ captured["connector_id"] = self.connector.id
+ return _ok_response()
+
+ with patch.object(OpenAIApiKeyAdapter, "chat", new=fake_chat):
+ await Gateway.dispatch(
+ db,
+ dj_user,
+ ChatRequest(messages=[Message(role="user", content="hi")]),
+ purpose="recommendation",
+ )
+ # Skips the auth_invalid pin, falls through to the per-DJ default.
+ assert captured["connector_id"] == fallback.id
+
+
+@pytest.mark.asyncio
+async def test_gateway_ignores_pin_for_unknown_feature(db, dj_user):
+ """A pin set for one feature must not leak into another purpose."""
+ from app.services.llm.connector_storage import set_feature_preference
+
+ pinned = _make_connector(db, dj_user, display_name="pinned")
+ mru = _make_connector(db, dj_user, display_name="mru")
+ set_feature_preference(
+ db, user_id=dj_user.id, feature="recommendation", connector_id=pinned.id
+ )
+ db.commit()
+
+ captured = {}
+
+ async def fake_chat(self, request): # noqa: ANN001
+ captured["connector_id"] = self.connector.id
+ return _ok_response()
+
+ with patch.object(OpenAIApiKeyAdapter, "chat", new=fake_chat):
+ await Gateway.dispatch(
+ db,
+ dj_user,
+ ChatRequest(messages=[Message(role="user", content="hi")]),
+ purpose="set_builder",
+ )
+ # No pin for set_builder → MRU resolution (most recently created here is `mru`).
+ assert captured["connector_id"] == mru.id
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q -k gateway`
+Expected: FAIL — the pin is ignored because `_resolve_connector` doesn't know about `purpose`.
+
+- [ ] **Step 3: Thread purpose into resolution**
+
+In `server/app/services/llm/gateway.py`:
+
+Add the storage import near the existing imports:
+
+```python
+from app.services.llm.connector_storage import audit_event, get_feature_preference, log_call
+```
+
+(modify the existing `from app.services.llm.connector_storage import audit_event, log_call` line)
+
+In `Gateway.dispatch`, change the resolve call to pass `purpose`:
+
+```python
+ primary = _resolve_connector(db, actor, purpose=purpose)
+```
+
+Update `_resolve_connector`'s signature and add the feature-preference step as the FIRST check inside the `if actor is not None:` block:
+
+```python
+def _resolve_connector(db: Session, actor: User | None, *, purpose: str) -> LlmConnector:
+ if actor is not None:
+ # 0. Per-feature pin (issue #337) takes precedence over the per-DJ
+ # default and MRU. Skipped gracefully when the pinned connector was
+ # deleted (FK row gone) or is no longer active, so a stale/broken
+ # pin never silently breaks the DJ — resolution falls through.
+ pref = get_feature_preference(db, user_id=actor.id, feature=purpose)
+ if pref is not None:
+ pinned = db.get(LlmConnector, pref.connector_id)
+ if (
+ pinned is not None
+ and pinned.user_id == actor.id
+ and pinned.status == STATUS_ACTIVE
+ ):
+ return pinned
+
+ # Per-DJ explicit default takes precedence over MRU (issue #336).
+ ...
+```
+
+(Leave the rest of `_resolve_connector` unchanged — the `pinned` default block, the MRU block, the org-default fallback.)
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q -k gateway`
+Expected: PASS
+
+Then run the full gateway suite to confirm no regression:
+Run: `cd server && .venv/bin/pytest tests/test_llm_gateway.py tests/test_llm_default_connector.py -q`
+Expected: PASS
+
+- [ ] **Step 5: Update gateway module docstring**
+
+In `server/app/services/llm/gateway.py`, update the "Resolution order" docstring at the top to list the feature-preference step first:
+
+```
+Resolution order:
+1. If ``actor`` is not ``None``:
+ a. The DJ's per-feature pin for ``purpose`` if set and the pinned connector
+ is active (``LlmFeaturePreference`` — issue #337).
+ b. Else: the DJ's explicit default active connector if one is pinned
+ (``LlmConnector.is_default = True``) — issue #336.
+ c. Else: most-recently-used active connector for the DJ.
+2. Else: ``SystemSettings.llm_default_connector_id`` if set and active.
+3. Else: raise :class:`NoLlmConfigured`.
+```
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add server/app/services/llm/gateway.py server/tests/test_llm_feature_preference.py
+git commit -m "feat(llm): gateway resolves per-feature pin first, falls back gracefully"
+```
+
+---
+
+## Task 5: API schemas
+
+**Files:**
+- Modify: `server/app/schemas/llm.py`
+
+- [ ] **Step 1: Add the schemas + feature literal**
+
+In `server/app/schemas/llm.py`, after the existing imports add the known-feature import + a `Literal`-derived alias. Near the top (after `from typing import Literal`):
+
+```python
+from app.models.llm_feature_preference import KNOWN_FEATURES
+
+# Sorted tuple so the OpenAPI enum + frontend list are deterministic.
+KNOWN_FEATURE_VALUES: tuple[str, ...] = tuple(sorted(KNOWN_FEATURES))
+FeatureKey = Literal["recommendation", "set_builder"]
+```
+
+At the end of the file add:
+
+```python
+class FeaturePreferenceOut(BaseModel):
+ """A single per-feature connector pin."""
+
+ model_config = ConfigDict(from_attributes=True)
+
+ feature: FeatureKey
+ connector_id: int
+
+
+class FeaturePreferencesListOut(BaseModel):
+ """All of a DJ's per-feature pins + the catalogue of pinnable features."""
+
+ preferences: list[FeaturePreferenceOut]
+ known_features: list[FeatureKey]
+
+
+class FeaturePreferenceSet(BaseModel):
+ """Set/change a per-feature pin. Upsert — replaces any existing pin."""
+
+ feature: FeatureKey
+ connector_id: int = Field(..., ge=1)
+```
+
+> Note: `FeatureKey` is hand-maintained to match `KNOWN_FEATURES` (Pydantic `Literal` can't be built from a runtime frozenset and still emit a static OpenAPI enum). The model docstring in `llm_feature_preference.py` flags that both must stay in sync; a test in Task 7 asserts they match.
+
+- [ ] **Step 2: Verify it imports**
+
+Run: `cd server && .venv/bin/python -c "from app.schemas.llm import FeaturePreferenceSet, FeaturePreferencesListOut, KNOWN_FEATURE_VALUES; print(KNOWN_FEATURE_VALUES)"`
+Expected: prints `('recommendation', 'set_builder')`
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add server/app/schemas/llm.py
+git commit -m "feat(llm): feature-preference API schemas"
+```
+
+---
+
+## Task 6: API endpoints
+
+**Files:**
+- Modify: `server/app/api/llm.py`
+- Test: `server/tests/test_llm_feature_preference.py`
+
+- [ ] **Step 1: Write the failing test**
+
+Append to `server/tests/test_llm_feature_preference.py`:
+
+```python
+from fastapi.testclient import TestClient # noqa: E402
+
+
+def _login(client: TestClient, username: str, password: str) -> dict[str, str]:
+ resp = client.post("/api/auth/login", data={"username": username, "password": password})
+ assert resp.status_code == 200, resp.json()
+ return {"Authorization": f"Bearer {resp.json()['access_token']}"}
+
+
+def test_set_list_clear_feature_preference_endpoints(client, db, test_user, auth_headers):
+ c = _make_connector(db, test_user, display_name="Endpoint connector")
+
+ # Set
+ resp = client.post(
+ "/api/llm/feature-preferences",
+ json={"feature": "recommendation", "connector_id": c.id},
+ headers=auth_headers,
+ )
+ assert resp.status_code == 200, resp.json()
+ body = resp.json()
+ assert {p["feature"]: p["connector_id"] for p in body["preferences"]} == {
+ "recommendation": c.id
+ }
+ assert "set_builder" in body["known_features"]
+
+ # List
+ resp = client.get("/api/llm/feature-preferences", headers=auth_headers)
+ assert resp.status_code == 200
+ assert resp.json()["preferences"][0]["connector_id"] == c.id
+
+ # Clear
+ resp = client.delete("/api/llm/feature-preferences/recommendation", headers=auth_headers)
+ assert resp.status_code == 200
+ assert resp.json()["preferences"] == []
+
+
+def test_set_feature_preference_rejects_unknown_feature(client, db, test_user, auth_headers):
+ c = _make_connector(db, test_user, display_name="X")
+ resp = client.post(
+ "/api/llm/feature-preferences",
+ json={"feature": "totally_made_up", "connector_id": c.id},
+ headers=auth_headers,
+ )
+ assert resp.status_code == 422 # Pydantic Literal rejects it
+
+
+def test_set_feature_preference_rejects_other_djs_connector(
+ client, db, test_user, auth_headers
+):
+ # Another DJ owns this connector.
+ other = User(
+ username="otherdj", password_hash=get_password_hash("password123"), role="dj"
+ )
+ db.add(other)
+ db.commit()
+ db.refresh(other)
+ foreign = _make_connector(db, other, display_name="Not yours")
+
+ resp = client.post(
+ "/api/llm/feature-preferences",
+ json={"feature": "recommendation", "connector_id": foreign.id},
+ headers=auth_headers,
+ )
+ assert resp.status_code == 404 # ownership not leaked
+
+
+def test_set_feature_preference_rejects_inactive_connector(
+ client, db, test_user, auth_headers
+):
+ c = _make_connector(db, test_user, display_name="Broken", status="auth_invalid")
+ resp = client.post(
+ "/api/llm/feature-preferences",
+ json={"feature": "recommendation", "connector_id": c.id},
+ headers=auth_headers,
+ )
+ assert resp.status_code == 400
+
+
+def test_clear_unknown_feature_returns_422(client, auth_headers):
+ resp = client.delete("/api/llm/feature-preferences/bogus", headers=auth_headers)
+ assert resp.status_code == 422
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q -k endpoint`
+Expected: FAIL with 404 (route not found) on the first POST.
+
+- [ ] **Step 3: Add the endpoints**
+
+In `server/app/api/llm.py`:
+
+Add to the schema import block:
+
+```python
+from app.schemas.llm import (
+ ConnectorCreate,
+ ConnectorCredentialsRotate,
+ ConnectorOut,
+ ConnectorPatch,
+ ConnectorTestResult,
+ DjPolicyOut,
+ FeatureKey,
+ FeaturePreferenceSet,
+ FeaturePreferencesListOut,
+)
+```
+
+Add to the connector_storage import block:
+
+```python
+from app.services.llm.connector_storage import (
+ ...existing names...,
+ clear_feature_preference,
+ get_feature_preferences_for_user,
+ set_feature_preference,
+)
+```
+
+Add a small helper near `_get_owned_connector_or_404`:
+
+```python
+def _feature_prefs_response(db: Session, user_id: int) -> FeaturePreferencesListOut:
+ """Build the list response: the DJ's current pins + the pinnable catalogue."""
+ from app.schemas.llm import KNOWN_FEATURE_VALUES, FeaturePreferenceOut
+
+ rows = get_feature_preferences_for_user(db, user_id)
+ return FeaturePreferencesListOut(
+ preferences=[FeaturePreferenceOut.model_validate(r) for r in rows],
+ known_features=list(KNOWN_FEATURE_VALUES), # type: ignore[arg-type]
+ )
+```
+
+Add the three endpoints (place after the unset-default endpoint, before the delete-connector endpoint):
+
+```python
+@router.get("/feature-preferences", response_model=FeaturePreferencesListOut)
+@limiter.limit("60/minute")
+def list_feature_preferences(
+ request: FastAPIRequest,
+ user: User = Depends(get_current_active_user),
+ db: Session = Depends(get_db),
+) -> FeaturePreferencesListOut:
+ """List the DJ's per-feature connector pins (issue #337)."""
+ return _feature_prefs_response(db, user.id)
+
+
+@router.post(
+ "/feature-preferences",
+ response_model=FeaturePreferencesListOut,
+ responses={
+ 400: {"description": "Connector is not active and cannot be pinned."},
+ 404: {"description": "Connector not found for current user."},
+ },
+)
+@limiter.limit("30/minute")
+def set_feature_preference_endpoint(
+ request: FastAPIRequest,
+ payload: FeaturePreferenceSet,
+ user: User = Depends(get_current_active_user),
+ db: Session = Depends(get_db),
+) -> FeaturePreferencesListOut:
+ """Pin (or re-pin) a connector to a feature for the current DJ.
+
+ Validates connector ownership server-side (404 for IDs the DJ doesn't own,
+ so another DJ's connector existence is never leaked) and rejects pinning a
+ non-active connector (400) — the gateway would skip it anyway, so silently
+ accepting it is a footgun.
+ """
+ row = _get_owned_connector_or_404(db, payload.connector_id, user.id)
+ if row.status != "active":
+ raise HTTPException(
+ status_code=400,
+ detail="Only an active connector can be pinned to a feature",
+ )
+ set_feature_preference(
+ db, user_id=user.id, feature=payload.feature, connector_id=row.id
+ )
+ db.commit()
+ return _feature_prefs_response(db, user.id)
+
+
+@router.delete("/feature-preferences/{feature}", response_model=FeaturePreferencesListOut)
+@limiter.limit("30/minute")
+def clear_feature_preference_endpoint(
+ request: FastAPIRequest,
+ feature: FeatureKey,
+ user: User = Depends(get_current_active_user),
+ db: Session = Depends(get_db),
+) -> FeaturePreferencesListOut:
+ """Clear the DJ's pin for ``feature`` (no-op if unset). Returns the new list."""
+ clear_feature_preference(db, user_id=user.id, feature=feature)
+ db.commit()
+ return _feature_prefs_response(db, user.id)
+```
+
+> Path-param `feature: FeatureKey` makes FastAPI return 422 for unknown features automatically.
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q -k "endpoint or feature"`
+Expected: PASS
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add server/app/api/llm.py server/tests/test_llm_feature_preference.py
+git commit -m "feat(llm): set/clear/list feature-preference endpoints"
+```
+
+---
+
+## Task 7: Consistency guard + full backend CI
+
+**Files:**
+- Test: `server/tests/test_llm_feature_preference.py`
+
+- [ ] **Step 1: Add a guard test that FeatureKey == KNOWN_FEATURES**
+
+Append to `server/tests/test_llm_feature_preference.py`:
+
+```python
+def test_feature_key_literal_matches_known_features():
+ """FeatureKey (the OpenAPI enum) must stay in sync with KNOWN_FEATURES."""
+ import typing
+
+ from app.schemas.llm import FeatureKey
+
+ literal_values = set(typing.get_args(FeatureKey))
+ assert literal_values == set(KNOWN_FEATURES)
+```
+
+- [ ] **Step 2: Run the full new test file**
+
+Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -q`
+Expected: PASS (all tests)
+
+- [ ] **Step 3: Run full backend CI**
+
+```bash
+cd server
+.venv/bin/ruff check .
+.venv/bin/ruff format --check .
+.venv/bin/bandit -r app -c pyproject.toml -q
+.venv/bin/alembic upgrade head && .venv/bin/alembic check
+.venv/bin/pytest --tb=short -q
+```
+
+Expected: ruff clean, bandit clean, alembic check clean, pytest passes with coverage ≥ gate. Fix any failures before committing.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add server/tests/test_llm_feature_preference.py
+git commit -m "test(llm): guard FeatureKey/KNOWN_FEATURES sync"
+```
+
+---
+
+## Task 8: Frontend — regenerate types + api.ts methods
+
+**Files:**
+- Modify: `dashboard/lib/api-types.generated.ts` (regenerated), `dashboard/lib/api-types.ts`, `dashboard/lib/api.ts`
+
+- [ ] **Step 1: Regenerate OpenAPI types**
+
+```bash
+cd dashboard
+npm run types:export
+npm run types:generate
+git checkout ../dashboard/next-env.d.ts 2>/dev/null || true
+```
+
+Expected: `lib/api-types.generated.ts` now contains `FeaturePreferenceOut`, `FeaturePreferencesListOut`, `FeaturePreferenceSet` schemas.
+
+- [ ] **Step 2: Re-export the new types**
+
+In `dashboard/lib/api-types.ts`, in the LLM gateway block, add:
+
+```typescript
+export type LlmFeaturePreference = Schemas['FeaturePreferenceOut'];
+export type LlmFeaturePreferences = Schemas['FeaturePreferencesListOut'];
+export type LlmFeaturePreferenceSet = Schemas['FeaturePreferenceSet'];
+export type LlmFeatureKey = Schemas['FeaturePreferenceOut']['feature'];
+```
+
+- [ ] **Step 3: Add api.ts methods**
+
+In `dashboard/lib/api.ts`, add the type imports to the existing LLM import + re-export blocks:
+`LlmFeaturePreferences`, `LlmFeaturePreferenceSet`, `LlmFeatureKey`.
+
+Then add methods after `unsetLlmConnectorDefault`:
+
+```typescript
+ async listLlmFeaturePreferences(): Promise {
+ return this.fetch('/api/llm/feature-preferences');
+ }
+
+ async setLlmFeaturePreference(
+ data: LlmFeaturePreferenceSet,
+ ): Promise {
+ return this.fetch('/api/llm/feature-preferences', {
+ method: 'POST',
+ body: JSON.stringify(data),
+ });
+ }
+
+ async clearLlmFeaturePreference(
+ feature: LlmFeatureKey,
+ ): Promise {
+ return this.fetch(`/api/llm/feature-preferences/${feature}`, {
+ method: 'DELETE',
+ });
+ }
+```
+
+- [ ] **Step 4: Type-check**
+
+Run: `cd dashboard && npx tsc --noEmit`
+Expected: no errors.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add dashboard/lib/api-types.generated.ts dashboard/lib/api-types.ts dashboard/lib/api.ts server/openapi.json
+git commit -m "feat(ai-ui): api client methods + types for feature preferences"
+```
+
+---
+
+## Task 9: Frontend — "Per-feature defaults" section
+
+**Files:**
+- Modify: `dashboard/components/AiProvidersSection.tsx`
+- Test: `dashboard/components/__tests__/AiProvidersSection.featurePrefs.test.tsx` (create)
+
+- [ ] **Step 1: Write the failing test**
+
+Check first whether a test file already exists for this component:
+Run: `ls dashboard/components/__tests__/ 2>/dev/null | grep -i aiprovider || ls dashboard/**/__tests__/ 2>/dev/null`
+
+Create `dashboard/components/__tests__/AiProvidersSection.featurePrefs.test.tsx`:
+
+```tsx
+import { render, screen, waitFor, fireEvent } from '@testing-library/react';
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+
+import AiProvidersSection from '../AiProvidersSection';
+import { api } from '@/lib/api';
+
+vi.mock('@/lib/api', () => ({
+ api: {
+ listLlmConnectors: vi.fn(),
+ getLlmPolicy: vi.fn(),
+ listOpenRouterModels: vi.fn(),
+ listLlmFeaturePreferences: vi.fn(),
+ setLlmFeaturePreference: vi.fn(),
+ clearLlmFeaturePreference: vi.fn(),
+ },
+}));
+
+const connector = {
+ id: 1,
+ user_id: 1,
+ connector_type: 'openai_apikey',
+ display_name: 'My OpenAI',
+ status: 'active',
+ base_url_plain: null,
+ model_hint: null,
+ created_at: '2026-01-01T00:00:00Z',
+ updated_at: '2026-01-01T00:00:00Z',
+ last_used_at: null,
+ last_error: null,
+ is_default: false,
+ last_health_check_at: null,
+ last_health_check_status: null,
+};
+
+beforeEach(() => {
+ vi.clearAllMocks();
+ (api.listLlmConnectors as any).mockResolvedValue([connector]);
+ (api.getLlmPolicy as any).mockResolvedValue({
+ llm_apikey_connectors_enabled: true,
+ llm_compatible_connector_enabled: true,
+ allowed_connector_types: ['openai_apikey'],
+ });
+ (api.listLlmFeaturePreferences as any).mockResolvedValue({
+ preferences: [],
+ known_features: ['recommendation', 'set_builder'],
+ });
+});
+
+describe('AiProvidersSection per-feature defaults', () => {
+ it('renders a picker per known feature and sets a pin', async () => {
+ (api.setLlmFeaturePreference as any).mockResolvedValue({
+ preferences: [{ feature: 'recommendation', connector_id: 1 }],
+ known_features: ['recommendation', 'set_builder'],
+ });
+
+ render();
+
+ await waitFor(() => expect(screen.getByText(/Per-feature defaults/i)).toBeInTheDocument());
+
+ const select = screen.getByLabelText(/recommendation/i) as HTMLSelectElement;
+ fireEvent.change(select, { target: { value: '1' } });
+
+ await waitFor(() =>
+ expect(api.setLlmFeaturePreference).toHaveBeenCalledWith({
+ feature: 'recommendation',
+ connector_id: 1,
+ }),
+ );
+ });
+
+ it('clears a pin when "Use account default" is selected', async () => {
+ (api.listLlmFeaturePreferences as any).mockResolvedValue({
+ preferences: [{ feature: 'recommendation', connector_id: 1 }],
+ known_features: ['recommendation', 'set_builder'],
+ });
+ (api.clearLlmFeaturePreference as any).mockResolvedValue({
+ preferences: [],
+ known_features: ['recommendation', 'set_builder'],
+ });
+
+ render();
+ await waitFor(() => expect(screen.getByText(/Per-feature defaults/i)).toBeInTheDocument());
+
+ const select = screen.getByLabelText(/recommendation/i) as HTMLSelectElement;
+ fireEvent.change(select, { target: { value: '' } });
+
+ await waitFor(() =>
+ expect(api.clearLlmFeaturePreference).toHaveBeenCalledWith('recommendation'),
+ );
+ });
+});
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd dashboard && npm test -- --run AiProvidersSection.featurePrefs`
+Expected: FAIL — no "Per-feature defaults" section yet.
+
+- [ ] **Step 3: Implement the section**
+
+In `dashboard/components/AiProvidersSection.tsx`:
+
+Add to the type import block:
+
+```typescript
+import type {
+ AIModelInfo,
+ LlmConnector,
+ LlmConnectorCreate,
+ LlmConnectorType,
+ LlmDjPolicy,
+ LlmFeaturePreferences,
+ LlmFeatureKey,
+} from '@/lib/api-types';
+```
+
+Add a human-readable feature label map near `CONNECTOR_TYPE_LABELS`:
+
+```typescript
+const FEATURE_LABELS: Record = {
+ recommendation: 'Recommendations',
+ set_builder: 'Set builder',
+};
+```
+
+Add state inside the component (next to the other `useState` hooks):
+
+```typescript
+ const [featurePrefs, setFeaturePrefs] = useState(null);
+```
+
+Add `api.listLlmFeaturePreferences()` to the initial `Promise.all`:
+
+```typescript
+ Promise.all([api.listLlmConnectors(), fetchPolicySoft(), fetchFeaturePrefsSoft()])
+ .then(([rows, p, prefs]) => {
+ if (!active) return;
+ setConnectors(rows);
+ setPolicy(p);
+ setFeaturePrefs(prefs);
+ })
+```
+
+Add handlers near `handleUnsetDefault`:
+
+```typescript
+ const handleFeaturePrefChange = async (feature: LlmFeatureKey, value: string) => {
+ try {
+ const updated =
+ value === ''
+ ? await api.clearLlmFeaturePreference(feature)
+ : await api.setLlmFeaturePreference({
+ feature,
+ connector_id: Number(value),
+ });
+ setFeaturePrefs(updated);
+ setError('');
+ } catch (err) {
+ setError(err instanceof Error ? err.message : 'Failed to update feature default');
+ }
+ };
+```
+
+Add the section JSX after the "Connected providers" `` (before the "Add provider" section). Only render it when there is at least one active connector to pin:
+
+```tsx
+ {featurePrefs && featurePrefs.known_features.length > 0 && (
+
+
Per-feature defaults
+
+ Pin a specific provider to each AI feature. Unpinned features use your account
+ default (or most-recently-used) connector. Inactive connectors are skipped
+ automatically.
+