diff --git a/.env.example b/.env.example index 865d3de1..0b3be8d3 100644 --- a/.env.example +++ b/.env.example @@ -118,6 +118,20 @@ BOOTSTRAP_ADMIN_PASSWORD=your-secure-password # TURNSTILE_SITE_KEY=your-site-key # TURNSTILE_SECRET_KEY=your-secret-key +# ============================================================================= +# LLM / AI providers +# ============================================================================= +# LLM credentials are managed per-DJ via the gateway connector system +# (admin: /admin/ai, DJ: /settings/ai) — there is NO env-var credential path. +# The recommendation engine routes every call through the gateway, which +# resolves the actor DJ's connector (or the org default). +# +# Historical note: the one-shot Alembic data migration (046_admin_ai_oauth) +# reads ANTHROPIC_API_KEY *once* on first upgrade, converting it into a +# system-default "anthropic_apikey" connector. Once that migration has run on a +# deploy, the env var is no longer consumed at runtime and can be dropped. The +# legacy env-var fallback in the recommendation engine was removed in #343. + # ============================================================================= # Frontend (Next.js) # ============================================================================= diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bcbc48f8..fa20bef7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,7 +65,17 @@ jobs: # PYSEC-2025-183 (pyjwt 2.10.1+ weak encryption, DISPUTED) - no fix released, the # pyjwt maintainers contest the advisory. We already pin pyjwt to the # latest available (2.12.1). Revisit when an upstream fix lands. - run: pip-audit --ignore-vuln CVE-2024-23342 --ignore-vuln CVE-2026-3219 --ignore-vuln CVE-2026-6357 --ignore-vuln PYSEC-2025-183 + # MAL-2026-4750 (fastapi 0.136.3 "malicious code", WITHDRAWN by OSV 2026-05-26) - + # False positive. 0.136.3 is an official tiangolo release; the flagged + # dependency 'fastar' is a legitimate Rust-tar-bindings package + # (published Oct 2025, predates the release) and is pulled ONLY via + # fastapi's [standard] extra, which we do NOT install (we use plain + # fastapi + uvicorn[standard]) - so it never enters our dependency tree. + # We deliberately stay on 0.136.3 for its underscore-header rejection + # (PR #15589) and SSE field validation (PR #15588). OSV withdrew the + # advisory; pip-audit's feed still serves it. REMOVE this ignore once + # the withdrawn entry is purged from the feed. + run: pip-audit --ignore-vuln CVE-2024-23342 --ignore-vuln CVE-2026-3219 --ignore-vuln CVE-2026-6357 --ignore-vuln PYSEC-2025-183 --ignore-vuln MAL-2026-4750 - name: Run tests with coverage env: diff --git a/CLAUDE.md b/CLAUDE.md index e5895382..2758f1c2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -66,7 +66,7 @@ NEXT_PUBLIC_API_URL="http://LAN_IP:8000" npm run dev - Encryption: `TOKEN_ENCRYPTION_KEY` (Fernet, 44 chars base64) — required in production for OAuth token encryption - Beatport: `BEATPORT_CLIENT_ID`, `BEATPORT_CLIENT_SECRET`, `BEATPORT_REDIRECT_URI`, `BEATPORT_AUTH_BASE_URL` - Soundcharts: `SOUNDCHARTS_APP_ID`, `SOUNDCHARTS_API_KEY` (song discovery for recommendations) -- Anthropic (LLM recommendations): `ANTHROPIC_API_KEY`, `ANTHROPIC_MODEL` (default: `claude-haiku-4-5-20251001`), `ANTHROPIC_MAX_TOKENS`, `ANTHROPIC_TIMEOUT_SECONDS` +- Anthropic (LLM recommendations): credentials live in the LLM Gateway connector system — there is **no env-var credential path**. The one-shot Alembic migration `046_admin_ai_oauth` reads `ANTHROPIC_API_KEY` *once* on first upgrade to seed a connector; the legacy env-var fallback in the recommendation engine was removed in #343. `ANTHROPIC_MODEL` (default: `claude-haiku-4-5-20251001`) is retained only as the default model-name label on recommendation responses and for the admin AI-settings/model-listing endpoints. The `ANTHROPIC_MAX_TOKENS` / `ANTHROPIC_TIMEOUT_SECONDS` settings were removed. ## Running CI Checks Locally @@ -312,13 +312,32 @@ REJECTED → NEW (re-open) - `server/app/services/track_normalizer.py` — track normalization & remix detection - `server/app/services/version_filter.py` — filters unwanted versions (karaoke, demo) with fuzzy matching +### LLM Gateway (provider-agnostic dispatch) +- `server/app/services/llm/` — connector-based dispatch usable by any agentic feature: + - `gateway.py` — `Gateway.dispatch(db, actor, request, *, purpose)` resolves a connector (per-DJ MRU → org default → raise `NoLlmConfigured`) and routes through the matching adapter. Logs every call to `llm_call_log` (counts only — never prompt/completion content) and writes a `llm_audit_event` row for credential lifecycle events. + - `base.py` — canonical `ChatRequest` / `ChatResponse` / `ToolSpec` / `LlmAdapter` ABC + - `registry.py` — connector_type → adapter class lookup; auto-registers all adapters on import + - `tool_translation.py` — JSON-Schema ToolSpec ↔ per-provider tool/function shape + response parsers + - `url_validator.py` — validates custom OpenAI-compatible base URLs (HTTPS any host; HTTP loopback + RFC1918 only) + - `connector_storage.py` — CRUD + validation + audit/call logging helpers + - `exceptions.py` — `AuthInvalid` / `RateLimited` / `QuotaExceeded` / `ProviderUnavailable` / `ToolTranslationError` / `NoLlmConfigured` + - `adapters/openai_apikey.py` — OpenAI Platform API-key adapter (httpx-based) + - `adapters/openai_compatible.py` — Custom OpenAI-compatible endpoint (Hermes Agent, Ollama, vLLM, LMStudio) + - `adapters/anthropic_apikey.py` — Anthropic API-key adapter (uses the `anthropic` SDK) +- Models: `LlmConnector` (encrypted credentials via `EncryptedText`), `LlmCallLog`, `LlmAuditEvent` +- Admin endpoints (`/api/admin/llm/*`): connector policy, force-revoke, usage rollup +- DJ endpoints (`/api/llm/connectors`): list/create/rotate/test/delete (rate-limited, scoped to current user) +- Admin UI: `/admin/ai` (policy + per-DJ table + usage) +- DJ UI: `/settings/ai` (connect/test/delete; includes Hermes onboarding for ChatGPT subscription path) +- The recommendation engine routes through the gateway (`actor = event.created_by`, `purpose = "recommendation"`); `call_llm` now **requires** a `db` session — the legacy direct-Anthropic env-var fallback was removed in #343 (the connector system is the sole credential source). + ### Recommendation Engine - `server/app/services/recommendation/` — multi-stage pipeline: - `service.py` — orchestrator: profile analysis → search → scoring → deduplication - `enrichment.py` — fills missing BPM/key/genre from Beatport/MusicBrainz/Tidal (for recommendations; request-level enrichment is in `sync/orchestrator.py`) - `scorer.py` — multi-dimensional scoring: BPM compatibility, harmonic mixing, genre affinity, artist diversity penalties - `camelot.py` — harmonic mixing wheel (Camelot key compatibility, half-time/double-time BPM) - - `llm_client.py` — Claude Haiku integration (6/min rate limit, forced tool_use schema for structured JSON) + - `llm_client.py` — gateway-backed query generation (forced `tool_use` schema for structured JSON; requires `db` — the legacy direct-Anthropic env-var fallback was removed in #343) - `llm_hooks.py` — structured response models for LLM queries - `template.py` — playlist-based template recommendations (DJ picks a Tidal/Beatport playlist as "vibe" source) - `mb_verify.py` — MusicBrainz artist verification to detect AI-generated filler tracks (cached in DB) diff --git a/dashboard/app/(dj)/account/__tests__/page.test.tsx b/dashboard/app/(dj)/account/__tests__/page.test.tsx index 0832b937..7c79abe2 100644 --- a/dashboard/app/(dj)/account/__tests__/page.test.tsx +++ b/dashboard/app/(dj)/account/__tests__/page.test.tsx @@ -27,6 +27,13 @@ const { mockGetMe, mockChangePassword, mockRequestEmailChange, mockUpdateMyPrefe changePassword: (...args: unknown[]) => changePassword(...args), requestEmailChange: (...args: unknown[]) => requestEmailChange(...args), updateMyPreferences: (...args: unknown[]) => updateMyPreferences(...args), + // The AI providers section (relocated from /settings/ai, #357) mounts + // inside the account page. Stub its API surface so the section can render + // without network access. getLlmPolicy rejects → fail-closed (no extra UI). + // These live on the shared mockApi object so vi.spyOn(mockApi, ...) in + // individual tests still rebinds the same reference the page calls. + listLlmConnectors: () => Promise.resolve([]), + getLlmPolicy: () => Promise.reject(new Error('forbidden')), }, }; }); @@ -58,6 +65,13 @@ describe('AccountPage', () => { }); }); + it('renders the relocated AI / Model providers section', async () => { + render(); + await waitFor(() => { + expect(screen.getByText('AI / Model providers')).toBeInTheDocument(); + }); + }); + it('submits password change with correct payload', async () => { mockChangePassword.mockResolvedValue({ status: 'ok', message: 'Updated' }); render(); diff --git a/dashboard/app/(dj)/account/page.tsx b/dashboard/app/(dj)/account/page.tsx index 4191d22c..0c497b5f 100644 --- a/dashboard/app/(dj)/account/page.tsx +++ b/dashboard/app/(dj)/account/page.tsx @@ -6,6 +6,7 @@ import { useRouter } from 'next/navigation'; import { useAuth } from '@/lib/auth'; import { api } from '@/lib/api'; +import AiProvidersSection from '@/components/AiProvidersSection'; export default function AccountPage() { const router = useRouter(); @@ -115,7 +116,7 @@ export default function AccountPage() { if (isLoading || !isAuthenticated) return null; return ( -
+
← Dashboard @@ -223,6 +224,10 @@ export default function AccountPage() { )}
+
+ +
+

Guest Experience

); } diff --git a/dashboard/components/AiProvidersSection.tsx b/dashboard/components/AiProvidersSection.tsx new file mode 100644 index 00000000..956e41d8 --- /dev/null +++ b/dashboard/components/AiProvidersSection.tsx @@ -0,0 +1,803 @@ +'use client'; + +import { useEffect, useMemo, useState } from 'react'; + +import { api } from '@/lib/api'; +import type { + AIModelInfo, + LlmConnector, + LlmConnectorCreate, + LlmConnectorType, + LlmDjPolicy, + LlmFeatureKey, + LlmFeaturePreferences, +} from '@/lib/api-types'; + +const CONNECTOR_TYPE_LABELS: Record = { + openai_apikey: 'OpenAI API key', + anthropic_apikey: 'Anthropic API key', + openrouter_apikey: 'OpenRouter API key', + xai_apikey: 'xAI Grok API key', + gemini_apikey: 'Google Gemini API key', + openai_compatible: 'Custom OpenAI-compatible endpoint', + bedrock: 'AWS Bedrock', + azure_openai: 'Azure OpenAI', +}; + +const STATUS_LABELS: Record = { + active: { text: 'Active', color: 'var(--color-success)' }, + auth_invalid: { text: 'Auth invalid', color: 'var(--color-danger)' }, + disabled: { text: 'Disabled', color: 'var(--text-secondary)' }, +}; + +// Human-readable labels for the pinnable agentic features (issue #337). Falls +// back to the raw feature key for any feature the backend adds before the UI +// learns its label. +const FEATURE_LABELS: Record = { + recommendation: 'Recommendations', + set_builder: 'Set builder', +}; + +// Provider-specific input placeholders. Missing entries fall back to the +// per-field default below (openai_apikey for the key, openai_compatible for +// the model hint), preserving the previous nested-ternary behavior. +const API_KEY_PLACEHOLDERS: Partial> = { + anthropic_apikey: 'sk-ant-…', + openrouter_apikey: 'sk-or-…', + xai_apikey: 'xai-…', + gemini_apikey: 'AIza…', +}; +const API_KEY_PLACEHOLDER_DEFAULT = 'sk-proj-… / sk-…'; + +const MODEL_HINT_PLACEHOLDERS: Partial> = { + anthropic_apikey: 'claude-haiku-4-5-20251001', + openai_apikey: 'gpt-5-mini', + openrouter_apikey: 'e.g. openai/gpt-4o-mini', + xai_apikey: 'grok-3-mini', + gemini_apikey: 'gemini-2.5-flash', +}; +const MODEL_HINT_PLACEHOLDER_DEFAULT = 'e.g. llama3'; + +interface FormState { + open: boolean; + connector_type: LlmConnectorType; + display_name: string; + api_key: string; + base_url: string; + bearer: string; + model_hint: string; + aws_access_key_id: string; + aws_secret_access_key: string; + aws_region: string; + aws_model_id: string; + azure_resource_name: string; + azure_deployment_name: string; + azure_api_version: string; +} + +const EMPTY_FORM: FormState = { + open: false, + connector_type: 'openai_apikey', + display_name: '', + api_key: '', + base_url: '', + bearer: '', + model_hint: '', + aws_access_key_id: '', + aws_secret_access_key: '', + aws_region: '', + aws_model_id: '', + azure_resource_name: '', + azure_deployment_name: '', + azure_api_version: '', +}; + +/** + * DJ-facing AI connector management UI (connect / test / delete, model hint, + * Hermes onboarding). Relocated from the standalone `/settings/ai` route into + * the `/account` page (issue #357). The component assumes the parent already + * enforces authentication — it does no auth gating of its own. + * + * Fail-closed behavior is preserved: when the DJ-scoped policy endpoint can't + * be read, NO provider types are offered rather than leaking every type. + */ +export default function AiProvidersSection() { + const [policy, setPolicy] = useState(null); + const [connectors, setConnectors] = useState([]); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(''); + const [form, setForm] = useState(EMPTY_FORM); + const [submitting, setSubmitting] = useState(false); + const [submitMessage, setSubmitMessage] = useState(''); + const [submitError, setSubmitError] = useState(''); + const [testStateById, setTestStateById] = useState>({}); + // Live streamed text per connector for the "Stream test" button, plus the id + // currently streaming (drives the disabled state + label). + const [streamTextById, setStreamTextById] = useState>({}); + const [streamingId, setStreamingId] = useState(null); + const [openrouterModels, setOpenrouterModels] = useState([]); + const [openrouterModelsLoaded, setOpenrouterModelsLoaded] = useState(false); + const [featurePrefs, setFeaturePrefs] = useState(null); + + useEffect(() => { + let active = true; + setLoading(true); + setError(''); + Promise.all([api.listLlmConnectors(), fetchPolicySoft(), fetchFeaturePrefsSoft()]) + .then(([rows, p, prefs]) => { + if (!active) return; + setConnectors(rows); + setPolicy(p); + setFeaturePrefs(prefs); + }) + .catch((err) => { + if (!active) return; + setError(err instanceof Error ? err.message : 'Failed to load'); + }) + .finally(() => { + if (active) setLoading(false); + }); + return () => { + active = false; + }; + }, []); + + // Lazily fetch the OpenRouter model catalogue the first time a DJ opens the + // form on the OpenRouter type. Best-effort: an empty list (or a failed fetch) + // simply falls back to the free-text model input. Fetched once per mount. + const wantsOpenrouterModels = form.open && form.connector_type === 'openrouter_apikey'; + useEffect(() => { + if (!wantsOpenrouterModels || openrouterModelsLoaded) return; + setOpenrouterModelsLoaded(true); + api + .listOpenRouterModels() + .then((res) => setOpenrouterModels(res.models)) + .catch(() => { + // Swallow — the dropdown gracefully degrades to free-text entry. + }); + }, [wantsOpenrouterModels, openrouterModelsLoaded]); + + const allowedTypes = useMemo(() => { + // Fail closed: when the policy can't be read, offer no providers rather than + // surfacing every type and letting the DJ pick one the admin disabled (the + // create call would 403). The server is the source of truth for the set. + if (!policy) return []; + return policy.allowed_connector_types as LlmConnectorType[]; + }, [policy]); + + // onChange factory for the plain string form fields — every text input/select + // updates exactly one FormState key with the raw value. connector_type stays + // inline because it needs a cast to LlmConnectorType. + const handleField = + (key: Exclude) => + (e: React.ChangeEvent) => + setForm((f) => ({ ...f, [key]: e.target.value })); + + const handleOpenForm = () => { + if (allowedTypes.length === 0) { + setSubmitError('Connector creation is currently disabled by admin policy.'); + setSubmitMessage(''); + return; + } + setForm({ ...EMPTY_FORM, open: true, connector_type: allowedTypes[0] }); + setSubmitMessage(''); + setSubmitError(''); + }; + + const handleCancel = () => { + setForm(EMPTY_FORM); + setSubmitError(''); + }; + + const handleCreate = async (e: React.FormEvent) => { + e.preventDefault(); + setSubmitting(true); + setSubmitMessage(''); + setSubmitError(''); + const isCompatible = form.connector_type === 'openai_compatible'; + const isBedrock = form.connector_type === 'bedrock'; + const isAzure = form.connector_type === 'azure_openai'; + // API-key providers: everything that isn't openai_compatible or bedrock. + // Azure also carries an api_key (plus its azure_* fields). + const isApiKey = !isCompatible && !isBedrock; + const payload: LlmConnectorCreate = { + connector_type: form.connector_type, + display_name: form.display_name, + // Bedrock has no model_hint field (it uses aws_model_id); never post a + // stale hint left over from a prior connector-type selection. + model_hint: isBedrock ? null : form.model_hint || null, + api_key: isApiKey ? form.api_key : null, + base_url: isCompatible ? form.base_url : null, + bearer: isCompatible ? form.bearer || null : null, + aws_access_key_id: isBedrock ? form.aws_access_key_id : null, + aws_secret_access_key: isBedrock ? form.aws_secret_access_key : null, + aws_region: isBedrock ? form.aws_region : null, + aws_model_id: isBedrock ? form.aws_model_id : null, + azure_resource_name: isAzure ? form.azure_resource_name : null, + azure_deployment_name: isAzure ? form.azure_deployment_name : null, + azure_api_version: isAzure ? form.azure_api_version : null, + }; + try { + const created = await api.createLlmConnector(payload); + setConnectors((prev) => [created, ...prev]); + setForm(EMPTY_FORM); + setSubmitMessage(`Created "${created.display_name}". Run "Test" to verify it works.`); + } catch (err) { + setSubmitError( + err instanceof Error ? err.message : 'Create failed (check your inputs)', + ); + } finally { + setSubmitting(false); + } + }; + + const handleTest = async (id: number) => { + setTestStateById((s) => ({ ...s, [id]: 'Testing…' })); + try { + const result = await api.testLlmConnector(id); + setTestStateById((s) => ({ + ...s, + [id]: result.ok ? 'OK' : `Failed: ${result.error_code ?? 'unknown'}`, + })); + // Refresh the row so updated status renders + const fresh = await api.listLlmConnectors(); + setConnectors(fresh); + } catch (err) { + setTestStateById((s) => ({ + ...s, + [id]: err instanceof Error ? err.message : 'Test failed', + })); + } + }; + + const handleStreamTest = async (id: number) => { + setStreamTextById((s) => ({ ...s, [id]: '' })); + setStreamingId(id); + try { + await api.streamConnectorTest(id, (chunk) => { + if (chunk.text_delta) { + setStreamTextById((s) => ({ ...s, [id]: (s[id] ?? '') + chunk.text_delta })); + } + }); + } catch (err) { + setStreamTextById((s) => ({ + ...s, + [id]: err instanceof Error ? `(stream test failed: ${err.message})` : '(stream test failed)', + })); + } finally { + setStreamingId(null); + } + }; + + const handleDelete = async (id: number) => { + if (!window.confirm('Delete this connector? This cannot be undone.')) return; + try { + await api.deleteLlmConnector(id); + setConnectors((prev) => prev.filter((c) => c.id !== id)); + } catch (err) { + setError(err instanceof Error ? err.message : 'Delete failed'); + } + }; + + // Set / unset the per-DJ explicit default (issue #336). Optimistic update on + // the full list keeps the radio state consistent (exactly one row is default + // at any time) without waiting for a refetch. + const handleSetDefault = async (id: number) => { + try { + const updated = await api.setLlmConnectorDefault(id); + setConnectors((prev) => + prev.map((c) => + c.id === updated.id + ? updated + : c.user_id === updated.user_id + ? { ...c, is_default: false } + : c, + ), + ); + } catch (err) { + setError(err instanceof Error ? err.message : 'Failed to set default'); + } + }; + + const handleUnsetDefault = async (id: number) => { + try { + const updated = await api.unsetLlmConnectorDefault(id); + setConnectors((prev) => prev.map((c) => (c.id === updated.id ? updated : c))); + } catch (err) { + setError(err instanceof Error ? err.message : 'Failed to clear default'); + } + }; + + // Per-feature pin (issue #337). An empty select value clears the pin (use the + // account default); any connector id sets/replaces it. The endpoint returns + // the full updated list, so we store it verbatim. + const handleFeaturePrefChange = async (feature: LlmFeatureKey, value: string) => { + try { + const updated = + value === '' + ? await api.clearLlmFeaturePreference(feature) + : await api.setLlmFeaturePreference({ feature, connector_id: Number(value) }); + setFeaturePrefs(updated); + setError(''); + } catch (err) { + setError(err instanceof Error ? err.message : 'Failed to update feature default'); + } + }; + + return ( +
+

+ AI / Model providers +

+ +

+ Connect your own LLM provider so AI-assisted features (recommendations, etc.) bill to + your account. Credentials are encrypted at rest. Calls consume your account's API or + subscription quota directly. +

+ + {loading &&
Loading…
} + {error &&
{error}
} + {submitMessage && ( +
{submitMessage}
+ )} + {submitError && ( +
{submitError}
+ )} + +
+

Connected providers

+ {connectors.length === 0 && !loading && ( +

No connectors yet.

+ )} + {connectors.map((c) => { + const status = STATUS_LABELS[c.status] ?? { text: c.status, color: 'var(--text-secondary)' }; + // Pin / unpin is only meaningful for active connectors — the gateway + // skips inactive defaults, so don't let the DJ pin a row that + // resolution would silently bypass. + const canPin = c.status === 'active'; + const radioId = `connector-default-${c.id}`; + return ( +
+
+
+
+
{c.display_name}
+ {c.is_default && ( + + Default + + )} +
+
+ {CONNECTOR_TYPE_LABELS[c.connector_type as LlmConnectorType] ?? c.connector_type} + {c.model_hint ? ` · ${c.model_hint}` : ''} + {c.base_url_plain ? ` · ${c.base_url_plain}` : ''} +
+
+ {status.text} + {testStateById[c.id] ? ` · ${testStateById[c.id]}` : ''} +
+ {/* Radio for "Set as default" — exactly one connector per DJ may be pinned. */} + +
+
+ + + +
+ {streamTextById[c.id] !== undefined && streamTextById[c.id] !== '' && ( +
+ {streamTextById[c.id]} +
+ )} +
+
+ ); + })} +
+ + {featurePrefs && featurePrefs.known_features.length > 0 && ( +
+

Per-feature defaults

+

+ Pin a specific provider to each AI feature. Unpinned features use your account + default (or most-recently-used) connector. Inactive connectors are skipped + automatically. +

+ {featurePrefs.known_features.map((feature) => { + const current = + featurePrefs.preferences.find((p) => p.feature === feature)?.connector_id ?? ''; + const selectId = `feature-pref-${feature}`; + const activeConnectors = connectors.filter((c) => c.status === 'active'); + return ( +
+ + +
+ ); + })} +
+ )} + +
+ {allowedTypes.length === 0 && !form.open && !loading && ( +

+ Connector creation is currently disabled by admin policy. +

+ )} + {allowedTypes.length > 0 && !form.open && ( + + )} + {form.open && ( +
+

Add provider

+ +
+ + +
+ +
+ + +
+ + {form.connector_type === 'bedrock' ? ( + <> +
+ + +
+
+ + +
+
+ + +
+
+ + +

+ Calls are signed with AWS SigV4 and billed to your AWS account. + Claude (anthropic.*) and Llama (meta.*) + model families are supported. +

+
+ + ) : form.connector_type === 'azure_openai' ? ( + <> +
+ + +
+
+ + +

+ The resource subdomain in{' '} + https://<resource>.openai.azure.com. +

+
+
+ + +
+
+ + +
+ + ) : form.connector_type !== 'openai_compatible' ? ( +
+ + +
+ ) : ( + <> +
+ + +

+ HTTPS is required for public hosts. HTTP is only allowed for loopback ( + 127.0.0.1, localhost) and private (RFC1918) IPs. +

+
+
+ + +
+
+ + Want to use your ChatGPT Plus / Pro subscription? + +

+ Install{' '} + + Hermes Agent + + , run hermes proxy, and paste the URL it prints below. Your + ChatGPT account never leaves your machine — WrzDJ only talks to your local + Hermes proxy. +

+
+ + )} + + {form.connector_type !== 'bedrock' && ( +
+ + {form.connector_type === 'openrouter_apikey' && openrouterModels.length > 0 ? ( + <> + +

+ Each model routes through OpenRouter and bills your account at that model's + OpenRouter rate (see openrouter.ai/models for per-token pricing). +

+ + ) : ( + + )} +
+ )} + +
+ + +
+
+ )} +
+
+ ); +} + +async function fetchPolicySoft(): Promise { + // Read the DJ-scoped policy endpoint. On any failure we return null and the + // UI fails *closed* (no providers offered) — see `allowedTypes`. This avoids + // showing a DJ a provider the admin disabled, only to have the create call + // reject it with a 403. + try { + return await api.getLlmPolicy(); + } catch { + return null; + } +} + +async function fetchFeaturePrefsSoft(): Promise { + // Read the DJ's per-feature pins. On any failure we return null and the + // "Per-feature defaults" section is simply hidden — it's an enhancement, not + // load-bearing, so a transient error must not break the whole page. + try { + return await api.listLlmFeaturePreferences(); + } catch { + return null; + } +} diff --git a/dashboard/components/__tests__/AiProvidersSection.featurePrefs.test.tsx b/dashboard/components/__tests__/AiProvidersSection.featurePrefs.test.tsx new file mode 100644 index 00000000..cbabb2d3 --- /dev/null +++ b/dashboard/components/__tests__/AiProvidersSection.featurePrefs.test.tsx @@ -0,0 +1,124 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { render, screen, waitFor, fireEvent } from '@testing-library/react'; + +import AiProvidersSection from '../AiProvidersSection'; +import { api } from '@/lib/api'; +import type { LlmConnector } from '@/lib/api-types'; + +const NOW = new Date().toISOString(); + +function makeConnector(overrides: Partial = {}): LlmConnector { + return { + id: 1, + user_id: 42, + connector_type: 'openai_apikey', + display_name: 'My OpenAI', + status: 'active', + base_url_plain: null, + model_hint: null, + created_at: NOW, + updated_at: NOW, + last_used_at: null, + last_error: null, + is_default: false, + last_health_check_at: null, + last_health_check_status: null, + monthly_token_cap: null, + ...overrides, + }; +} + +describe('AiProvidersSection per-feature defaults', () => { + beforeEach(() => { + vi.restoreAllMocks(); + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([makeConnector()]); + vi.spyOn(api, 'getLlmPolicy').mockResolvedValue({ + llm_apikey_connectors_enabled: true, + llm_compatible_connector_enabled: true, + allowed_connector_types: ['openai_apikey'], + }); + vi.spyOn(api, 'listLlmFeaturePreferences').mockResolvedValue({ + preferences: [], + known_features: ['recommendation', 'set_builder'], + }); + }); + + it('renders a picker per known feature and sets a pin', async () => { + const setSpy = vi.spyOn(api, 'setLlmFeaturePreference').mockResolvedValue({ + preferences: [{ feature: 'recommendation', connector_id: 1 }], + known_features: ['recommendation', 'set_builder'], + }); + + render(); + + await waitFor(() => + expect(screen.getByText('Per-feature defaults')).toBeInTheDocument(), + ); + + // One picker per known feature. + expect(screen.getByLabelText('Recommendations')).toBeInTheDocument(); + expect(screen.getByLabelText('Set builder')).toBeInTheDocument(); + + const select = screen.getByLabelText('Recommendations') as HTMLSelectElement; + fireEvent.change(select, { target: { value: '1' } }); + + await waitFor(() => + expect(setSpy).toHaveBeenCalledWith({ + feature: 'recommendation', + connector_id: 1, + }), + ); + }); + + it('clears a pin when "Use account default" is selected', async () => { + vi.spyOn(api, 'listLlmFeaturePreferences').mockResolvedValue({ + preferences: [{ feature: 'recommendation', connector_id: 1 }], + known_features: ['recommendation', 'set_builder'], + }); + const clearSpy = vi.spyOn(api, 'clearLlmFeaturePreference').mockResolvedValue({ + preferences: [], + known_features: ['recommendation', 'set_builder'], + }); + + render(); + await waitFor(() => + expect(screen.getByText('Per-feature defaults')).toBeInTheDocument(), + ); + + const select = screen.getByLabelText('Recommendations') as HTMLSelectElement; + // The current pin should be reflected as the selected value. + expect(select.value).toBe('1'); + + fireEvent.change(select, { target: { value: '' } }); + + await waitFor(() => expect(clearSpy).toHaveBeenCalledWith('recommendation')); + }); + + it('hides the section when the preferences fetch fails (fail soft)', async () => { + vi.spyOn(api, 'listLlmFeaturePreferences').mockRejectedValue(new Error('boom')); + + render(); + + // The connectors list still renders… + await waitFor(() => expect(screen.getByText('My OpenAI')).toBeInTheDocument()); + // …but the per-feature section is absent. + expect(screen.queryByText('Per-feature defaults')).not.toBeInTheDocument(); + }); + + it('only offers active connectors in the picker', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([ + makeConnector({ id: 1, display_name: 'Active one', status: 'active' }), + makeConnector({ id: 2, display_name: 'Broken one', status: 'auth_invalid' }), + ]); + + render(); + await waitFor(() => + expect(screen.getByText('Per-feature defaults')).toBeInTheDocument(), + ); + + const select = screen.getByLabelText('Recommendations') as HTMLSelectElement; + const optionLabels = Array.from(select.options).map((o) => o.textContent); + expect(optionLabels).toContain('Active one'); + expect(optionLabels).not.toContain('Broken one'); + }); +}); diff --git a/dashboard/components/__tests__/AiProvidersSection.test.tsx b/dashboard/components/__tests__/AiProvidersSection.test.tsx new file mode 100644 index 00000000..749bd886 --- /dev/null +++ b/dashboard/components/__tests__/AiProvidersSection.test.tsx @@ -0,0 +1,471 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { render, screen, waitFor, fireEvent } from '@testing-library/react'; + +import AiProvidersSection from '../AiProvidersSection'; +import { api } from '@/lib/api'; +import type { LlmConnector, LlmConnectorType, LlmDjPolicy } from '@/lib/api-types'; + +const ALL_APIKEY_TYPES: LlmConnectorType[] = [ + 'openai_apikey', + 'anthropic_apikey', + 'openrouter_apikey', + 'xai_apikey', + 'bedrock', + 'azure_openai', + 'gemini_apikey', +]; + +// Build a DJ policy payload. `allowed_connector_types` is what the server +// computes from the two toggles; the section renders exactly this set. +function makePolicy( + apikeyEnabled: boolean, + compatibleEnabled: boolean, +): LlmDjPolicy { + const allowed: LlmConnectorType[] = []; + if (apikeyEnabled) allowed.push(...ALL_APIKEY_TYPES); + if (compatibleEnabled) allowed.push('openai_compatible'); + return { + llm_apikey_connectors_enabled: apikeyEnabled, + llm_compatible_connector_enabled: compatibleEnabled, + allowed_connector_types: allowed, + }; +} + +const NOW = new Date().toISOString(); + +function makeConnector(overrides: Partial = {}): LlmConnector { + return { + id: 1, + user_id: 42, + connector_type: 'openai_apikey', + display_name: 'My OpenAI', + status: 'active', + base_url_plain: null, + model_hint: 'gpt-5-mini', + created_at: NOW, + updated_at: NOW, + last_used_at: null, + last_error: null, + is_default: false, + last_health_check_at: null, + last_health_check_status: null, + monthly_token_cap: null, + ...overrides, + }; +} + +describe('AiProvidersSection', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('renders the section heading', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]); + vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden')); + + render(); + + expect(screen.getByText('AI / Model providers')).toBeInTheDocument(); + }); + + it('lists existing connectors', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([ + makeConnector({ display_name: 'My OpenAI' }), + makeConnector({ + id: 2, + connector_type: 'anthropic_apikey', + display_name: 'My Claude', + model_hint: 'claude-haiku', + }), + ]); + vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden')); + + render(); + + await waitFor(() => expect(screen.getByText('My OpenAI')).toBeInTheDocument()); + expect(screen.getByText('My Claude')).toBeInTheDocument(); + }); + + it('respects admin policy when filtering allowed connector types', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]); + vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(false, true)); + + render(); + + await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument()); + fireEvent.click(screen.getByText('+ Add provider')); + + // Provider dropdown should only contain the openai_compatible option + const select = screen.getByLabelText('Provider') as HTMLSelectElement; + const optionValues = Array.from(select.options).map((o) => o.value); + expect(optionValues).toEqual(['openai_compatible']); + }); + + it('reads the DJ-scoped policy endpoint (not the admin one)', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]); + const adminPolicySpy = vi + .spyOn(api, 'getAdminLlmPolicy') + .mockRejectedValue(new Error('should not be called')); + const policySpy = vi + .spyOn(api, 'getLlmPolicy') + .mockResolvedValue(makePolicy(true, true)); + + render(); + + await waitFor(() => expect(policySpy).toHaveBeenCalled()); + expect(adminPolicySpy).not.toHaveBeenCalled(); + }); + + it('fails closed: hides all provider types when policy fetch fails', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]); + // Simulate the DJ policy endpoint being unavailable. + vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('unavailable')); + + render(); + + // No "+ Add provider" button — the picker is hidden entirely. + await waitFor(() => + expect( + screen.getByText('Connector creation is currently disabled by admin policy.'), + ).toBeInTheDocument(), + ); + expect(screen.queryByText('+ Add provider')).not.toBeInTheDocument(); + expect(screen.queryByLabelText('Provider')).not.toBeInTheDocument(); + }); + + it('fails closed: only api-key types when compatible is disabled (no leak of all)', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]); + vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, false)); + + render(); + + await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument()); + fireEvent.click(screen.getByText('+ Add provider')); + + const select = screen.getByLabelText('Provider') as HTMLSelectElement; + const optionValues = Array.from(select.options).map((o) => o.value); + expect(optionValues).not.toContain('openai_compatible'); + expect(optionValues).toContain('openai_apikey'); + }); + + it('offers Azure OpenAI and reveals its config fields', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]); + vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, true)); + + render(); + + await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument()); + fireEvent.click(screen.getByText('+ Add provider')); + + const select = screen.getByLabelText('Provider') as HTMLSelectElement; + const optionValues = Array.from(select.options).map((o) => o.value); + expect(optionValues).toContain('azure_openai'); + + // Switching to Azure surfaces the resource/deployment/api-version inputs. + fireEvent.change(select, { target: { value: 'azure_openai' } }); + expect(screen.getByLabelText('API key')).toBeInTheDocument(); + expect(screen.getByLabelText('Resource name')).toBeInTheDocument(); + expect(screen.getByLabelText('Deployment name')).toBeInTheDocument(); + expect(screen.getByLabelText('API version')).toBeInTheDocument(); + }); + + it('sends Azure config fields on create', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]); + vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, true)); + const createSpy = vi + .spyOn(api, 'createLlmConnector') + .mockResolvedValue(makeConnector({ connector_type: 'azure_openai' })); + + render(); + + await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument()); + fireEvent.click(screen.getByText('+ Add provider')); + + fireEvent.change(screen.getByLabelText('Provider'), { + target: { value: 'azure_openai' }, + }); + fireEvent.change(screen.getByLabelText('Display name'), { + target: { value: 'Venue Azure' }, + }); + fireEvent.change(screen.getByLabelText('API key'), { + target: { value: 'azure-secret' }, + }); + fireEvent.change(screen.getByLabelText('Resource name'), { + target: { value: 'venue-co' }, + }); + fireEvent.change(screen.getByLabelText('Deployment name'), { + target: { value: 'gpt4o-prod' }, + }); + fireEvent.change(screen.getByLabelText('API version'), { + target: { value: '2024-06-01' }, + }); + + fireEvent.click(screen.getByRole('button', { name: 'Save' })); + + await waitFor(() => expect(createSpy).toHaveBeenCalled()); + expect(createSpy).toHaveBeenCalledWith( + expect.objectContaining({ + connector_type: 'azure_openai', + api_key: 'azure-secret', + azure_resource_name: 'venue-co', + azure_deployment_name: 'gpt4o-prod', + azure_api_version: '2024-06-01', + }), + ); + }); + + it('offers AWS Bedrock when api-key connectors are enabled', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]); + vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, false)); + + render(); + + await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument()); + fireEvent.click(screen.getByText('+ Add provider')); + + const select = screen.getByLabelText('Provider') as HTMLSelectElement; + const optionValues = Array.from(select.options).map((o) => o.value); + expect(optionValues).toContain('bedrock'); + expect(optionValues).not.toContain('openai_compatible'); + + // Selecting Bedrock reveals the four AWS credential inputs. + fireEvent.change(select, { target: { value: 'bedrock' } }); + expect(screen.getByLabelText('AWS access key ID')).toBeInTheDocument(); + expect(screen.getByLabelText('AWS secret access key')).toBeInTheDocument(); + expect(screen.getByLabelText('AWS region')).toBeInTheDocument(); + expect(screen.getByLabelText('Bedrock model ID')).toBeInTheDocument(); + }); + + it('runs Test and surfaces the result', async () => { + const row = makeConnector(); + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([row]); + vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, true)); + const testSpy = vi.spyOn(api, 'testLlmConnector').mockResolvedValue({ + ok: true, + error_code: null, + message: null, + }); + // The refresh after Test re-lists connectors + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([row]); + + render(); + + await waitFor(() => expect(screen.getByText('My OpenAI')).toBeInTheDocument()); + fireEvent.click(screen.getByRole('button', { name: 'Test' })); + await waitFor(() => { + expect(testSpy).toHaveBeenCalledWith(1); + }); + }); + + it('runs Stream test and renders the streamed text live', async () => { + const row = makeConnector(); + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([row]); + vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, true)); + const streamSpy = vi + .spyOn(api, 'streamConnectorTest') + .mockImplementation(async (_id, onChunk) => { + onChunk({ text_delta: 'Online' }); + onChunk({ text_delta: ' and ready', done: false }); + onChunk({ stop_reason: 'end_turn', done: true }); + }); + + render(); + + await waitFor(() => expect(screen.getByText('My OpenAI')).toBeInTheDocument()); + fireEvent.click(screen.getByRole('button', { name: 'Stream test' })); + await waitFor(() => { + expect(streamSpy).toHaveBeenCalledWith(1, expect.any(Function)); + }); + await waitFor(() => + expect(screen.getByText('Online and ready')).toBeInTheDocument(), + ); + }); + + it('offers OpenRouter and fetches its model dropdown', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]); + vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, false)); + const modelsSpy = vi.spyOn(api, 'listOpenRouterModels').mockResolvedValue({ + models: [ + { id: 'openai/gpt-4o-mini', name: 'GPT-4o mini' }, + { id: 'anthropic/claude-3.5-sonnet', name: 'Claude 3.5 Sonnet' }, + ], + }); + + render(); + + await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument()); + fireEvent.click(screen.getByText('+ Add provider')); + + const select = screen.getByLabelText('Provider') as HTMLSelectElement; + const optionValues = Array.from(select.options).map((o) => o.value); + expect(optionValues).toContain('openrouter_apikey'); + + // Switch to OpenRouter — the model catalogue should be fetched and rendered. + fireEvent.change(select, { target: { value: 'openrouter_apikey' } }); + await waitFor(() => expect(modelsSpy).toHaveBeenCalled()); + + // The dropdown options appear once the (async) fetch resolves. + await screen.findByRole('option', { name: /GPT-4o mini/ }); + const modelSelect = screen.getByLabelText('Model (optional)') as HTMLSelectElement; + const modelValues = Array.from(modelSelect.options).map((o) => o.value); + expect(modelValues).toContain('openai/gpt-4o-mini'); + expect(modelValues).toContain('anthropic/claude-3.5-sonnet'); + }); + + it('creates an OpenRouter connector with the selected model', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([]); + vi.spyOn(api, 'getLlmPolicy').mockResolvedValue(makePolicy(true, false)); + vi.spyOn(api, 'listOpenRouterModels').mockResolvedValue({ + models: [{ id: 'openai/gpt-4o-mini', name: 'GPT-4o mini' }], + }); + const createSpy = vi.spyOn(api, 'createLlmConnector').mockResolvedValue( + makeConnector({ + connector_type: 'openrouter_apikey', + display_name: 'My OpenRouter', + model_hint: 'openai/gpt-4o-mini', + }), + ); + + render(); + await waitFor(() => expect(screen.getByText('+ Add provider')).toBeInTheDocument()); + fireEvent.click(screen.getByText('+ Add provider')); + + fireEvent.change(screen.getByLabelText('Provider'), { + target: { value: 'openrouter_apikey' }, + }); + fireEvent.change(screen.getByLabelText('Display name'), { + target: { value: 'My OpenRouter' }, + }); + fireEvent.change(screen.getByLabelText('API key'), { + target: { value: 'sk-or-v1-1234567890abcdef1234567890abcdef' }, + }); + + await screen.findByRole('option', { name: /GPT-4o mini/ }); + const modelSelect = screen.getByLabelText('Model (optional)') as HTMLSelectElement; + fireEvent.change(modelSelect, { target: { value: 'openai/gpt-4o-mini' } }); + + fireEvent.click(screen.getByRole('button', { name: 'Save' })); + + await waitFor(() => expect(createSpy).toHaveBeenCalled()); + expect(createSpy).toHaveBeenCalledWith( + expect.objectContaining({ + connector_type: 'openrouter_apikey', + display_name: 'My OpenRouter', + api_key: 'sk-or-v1-1234567890abcdef1234567890abcdef', + base_url: null, + bearer: null, + model_hint: 'openai/gpt-4o-mini', + }), + ); + }); + + // ---------- per-DJ default (issue #336) ---------- + + it('shows the Default badge on the pinned connector', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([ + makeConnector({ id: 1, display_name: 'Pinned', is_default: true }), + makeConnector({ id: 2, display_name: 'Other', is_default: false }), + ]); + vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden')); + + render(); + + await waitFor(() => expect(screen.getByText('Pinned')).toBeInTheDocument()); + // The badge is rendered next to the display name. + expect(screen.getByText('Default')).toBeInTheDocument(); + }); + + it('clicking the radio on an unpinned connector calls setDefault', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([ + makeConnector({ id: 1, display_name: 'A', is_default: true }), + makeConnector({ id: 2, display_name: 'B', is_default: false }), + ]); + vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden')); + const setSpy = vi + .spyOn(api, 'setLlmConnectorDefault') + .mockResolvedValue( + makeConnector({ id: 2, display_name: 'B', is_default: true }), + ); + + render(); + + await waitFor(() => expect(screen.getByText('B')).toBeInTheDocument()); + // The radio for connector B is unchecked; click to pin it. + const radioB = screen.getByLabelText('Set as default'); + fireEvent.click(radioB); + + await waitFor(() => expect(setSpy).toHaveBeenCalledWith(2)); + }); + + it('clicking Unpin on the pinned connector calls unsetDefault', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([ + makeConnector({ id: 1, display_name: 'A', is_default: true }), + ]); + vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden')); + const unsetSpy = vi + .spyOn(api, 'unsetLlmConnectorDefault') + .mockResolvedValue(makeConnector({ id: 1, display_name: 'A', is_default: false })); + + render(); + + await waitFor(() => expect(screen.getByText('A')).toBeInTheDocument()); + fireEvent.click(screen.getByRole('button', { name: 'Unpin' })); + + await waitFor(() => expect(unsetSpy).toHaveBeenCalledWith(1)); + }); + + it('disables the radio on inactive connectors', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([ + makeConnector({ + id: 1, + display_name: 'Broken', + status: 'auth_invalid', + is_default: false, + }), + ]); + vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden')); + + render(); + + await waitFor(() => expect(screen.getByText('Broken')).toBeInTheDocument()); + const radio = screen.getByLabelText('Set as default') as HTMLInputElement; + expect(radio).toBeDisabled(); + }); + + it('optimistically clears the previous default when pinning a new one', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([ + makeConnector({ id: 1, user_id: 42, display_name: 'A', is_default: true }), + makeConnector({ id: 2, user_id: 42, display_name: 'B', is_default: false }), + ]); + vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('forbidden')); + vi.spyOn(api, 'setLlmConnectorDefault').mockResolvedValue( + makeConnector({ id: 2, user_id: 42, display_name: 'B', is_default: true }), + ); + + render(); + + await waitFor(() => expect(screen.getByText('B')).toBeInTheDocument()); + fireEvent.click(screen.getByLabelText('Set as default')); + + // After the optimistic update, the Default badge should sit next to B, not A. + await waitFor(() => { + const badge = screen.getByText('Default'); + // Badge is right beside the display name — walk up to the card. + const card = badge.closest('.card'); + expect(card?.textContent).toContain('B'); + }); + }); + + it('deletes after confirmation', async () => { + vi.spyOn(api, 'listLlmConnectors').mockResolvedValue([makeConnector()]); + vi.spyOn(api, 'getLlmPolicy').mockRejectedValue(new Error('nope')); + const delSpy = vi.spyOn(api, 'deleteLlmConnector').mockResolvedValue(); + vi.spyOn(window, 'confirm').mockReturnValue(true); + + render(); + + await waitFor(() => expect(screen.getByText('My OpenAI')).toBeInTheDocument()); + fireEvent.click(screen.getByRole('button', { name: 'Delete' })); + await waitFor(() => expect(delSpy).toHaveBeenCalledWith(1)); + }); +}); diff --git a/dashboard/lib/__tests__/api.test.ts b/dashboard/lib/__tests__/api.test.ts index b7126f09..655bbe1c 100644 --- a/dashboard/lib/__tests__/api.test.ts +++ b/dashboard/lib/__tests__/api.test.ts @@ -520,6 +520,127 @@ describe('ApiClient', () => { }); }); + describe('LLM Gateway API', () => { + beforeEach(() => { + api.setToken('test-token'); + }); + + it('lists per-DJ connectors', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => [ + { + id: 1, + user_id: 42, + connector_type: 'openai_apikey', + display_name: 'My OpenAI', + status: 'active', + base_url_plain: null, + model_hint: 'gpt-5-mini', + created_at: '2026-01-01T00:00:00Z', + updated_at: '2026-01-01T00:00:00Z', + last_used_at: null, + last_error: null, + }, + ], + }); + + const result = await api.listLlmConnectors(); + expect(result).toHaveLength(1); + expect(result[0].connector_type).toBe('openai_apikey'); + }); + + it('creates a connector via POST', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ + id: 2, + user_id: 42, + connector_type: 'openai_compatible', + display_name: 'Hermes', + status: 'active', + base_url_plain: 'http://127.0.0.1:11434/v1', + model_hint: null, + created_at: '2026-01-01T00:00:00Z', + updated_at: '2026-01-01T00:00:00Z', + last_used_at: null, + last_error: null, + }), + }); + + const result = await api.createLlmConnector({ + connector_type: 'openai_compatible', + display_name: 'Hermes', + base_url: 'http://127.0.0.1:11434/v1', + bearer: null, + api_key: null, + model_hint: null, + }); + expect(result.id).toBe(2); + + const [, options] = mockFetch.mock.calls[0]; + expect(options.method).toBe('POST'); + }); + + it('updates admin LLM policy via PATCH', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ + llm_apikey_connectors_enabled: false, + llm_compatible_connector_enabled: true, + llm_default_connector_id: null, + }), + }); + const result = await api.updateAdminLlmPolicy({ + llm_apikey_connectors_enabled: false, + llm_compatible_connector_enabled: null, + llm_default_connector_id: null, + clear_default: true, + }); + expect(result.llm_apikey_connectors_enabled).toBe(false); + + const [, options] = mockFetch.mock.calls[0]; + expect(options.method).toBe('PATCH'); + }); + + it('fetches admin usage with days param', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ days: 30, rows: [] }), + }); + + await api.getAdminLlmUsage(30); + const [url] = mockFetch.mock.calls[0]; + expect(url).toContain('/api/admin/llm/usage?days=30'); + }); + + it('sets a connector monthly cap via PATCH', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ id: 7, monthly_token_cap: 5000, current_month_tokens: 200 }), + }); + + const result = await api.setAdminLlmConnectorCap(7, 5000); + expect(result.monthly_token_cap).toBe(5000); + + const [url, options] = mockFetch.mock.calls[0]; + expect(url).toContain('/api/admin/llm/connectors/7/cap'); + expect(options.method).toBe('PATCH'); + expect(JSON.parse(options.body)).toEqual({ monthly_token_cap: 5000 }); + }); + + it('clears a connector cap by passing null', async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ id: 7, monthly_token_cap: null, current_month_tokens: 0 }), + }); + + await api.setAdminLlmConnectorCap(7, null); + const [, options] = mockFetch.mock.calls[0]; + expect(JSON.parse(options.body)).toEqual({ monthly_token_cap: null }); + }); + }); + describe('Activity Log API', () => { beforeEach(() => { api.setToken('test-token'); @@ -2293,6 +2414,107 @@ describe('ApiClient', () => { }); }); + describe('streamConnectorTest', () => { + it('parses SSE data frames and invokes onChunk per frame', async () => { + const sse = + 'data: {"text_delta":"Hi","done":false}\n\n' + + 'data: {"text_delta":" there","done":false}\n\n' + + 'data: {"text_delta":"","stop_reason":"end_turn","done":true}\n\n'; + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(sse)); + controller.close(); + }, + }); + const fetchMock = vi.spyOn(global, 'fetch').mockResolvedValueOnce( + new Response(stream, { + status: 200, + headers: { 'Content-Type': 'text/event-stream' }, + }), + ); + + api.setToken('jwt-token'); + const chunks: Array<{ text_delta?: string; done?: boolean }> = []; + await api.streamConnectorTest(7, (c) => chunks.push(c)); + + expect(chunks.map((c) => c.text_delta).join('')).toBe('Hi there'); + expect(chunks.at(-1)?.done).toBe(true); + + const init = fetchMock.mock.calls[0][1] as RequestInit; + const headers = new Headers(init.headers); + expect(headers.get('Authorization')).toBe('Bearer jwt-token'); + expect(init.method).toBe('POST'); + }); + + it('parses CRLF-delimited SSE frames, including event: error (#354)', async () => { + // A spec-compliant server or proxy may frame SSE with CRLF (\r\n\r\n) + // rather than LF. The parser must split frames and lines on either, or it + // silently drops every frame — including the typed `event: error`. + const sse = + 'data: {"text_delta":"Hi","done":false}\r\n\r\n' + + 'event: error\r\ndata: {"code":"ProviderUnavailable"}\r\n\r\n'; + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(sse)); + controller.close(); + }, + }); + vi.spyOn(global, 'fetch').mockResolvedValueOnce( + new Response(stream, { + status: 200, + headers: { 'Content-Type': 'text/event-stream' }, + }), + ); + + api.setToken('jwt-token'); + const chunks: Array<{ text_delta?: string }> = []; + await expect( + api.streamConnectorTest(7, (c) => chunks.push(c)), + ).rejects.toThrowError(/ProviderUnavailable/); + // The CRLF-framed data frame before the error was still parsed. + expect(chunks.map((c) => c.text_delta).join('')).toBe('Hi'); + }); + + it('throws ApiError on non-OK response', async () => { + vi.spyOn(global, 'fetch').mockResolvedValueOnce( + new Response('nope', { status: 500 }), + ); + api.setToken('jwt-token'); + await expect(api.streamConnectorTest(7, () => {})).rejects.toBeInstanceOf(ApiError); + }); + + it('surfaces an SSE event: error frame as a thrown ApiError (#379)', async () => { + // The backend emits `event: error` + a sanitised `{code}` data line for + // typed gateway failures; the consumer must reject, not swallow it. + const sse = + 'data: {"text_delta":"partial","done":false}\n\n' + + 'event: error\ndata: {"code":"ProviderUnavailable"}\n\n'; + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(sse)); + controller.close(); + }, + }); + vi.spyOn(global, 'fetch').mockResolvedValueOnce( + new Response(stream, { + status: 200, + headers: { 'Content-Type': 'text/event-stream' }, + }), + ); + + api.setToken('jwt-token'); + const chunks: Array<{ text_delta?: string }> = []; + await expect( + api.streamConnectorTest(7, (c) => chunks.push(c)), + ).rejects.toThrowError(/ProviderUnavailable/); + // The leading valid chunk was still delivered before the error surfaced. + expect(chunks.map((c) => c.text_delta).join('')).toBe('partial'); + }); + }); + describe('frictionless join api', () => { it('getJoinConfig hits the public collect endpoint', async () => { mockFetch.mockResolvedValueOnce( diff --git a/dashboard/lib/api-types.generated.ts b/dashboard/lib/api-types.generated.ts index e6af9f56..72bc6c28 100644 --- a/dashboard/lib/api-types.generated.ts +++ b/dashboard/lib/api-types.generated.ts @@ -169,6 +169,149 @@ export interface paths { patch?: never; trace?: never; }; + "/api/admin/llm/audit": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** + * List Audit Events + * @description Browse the LLM audit trail (admin-only). + * + * Read-only view over ``llm_audit_event`` with optional filters and + * pagination. The target connector's display name is joined in — credential + * material is never read or returned. + */ + get: operations["list_audit_events_api_admin_llm_audit_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/admin/llm/audit.csv": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** + * Export Audit Events Csv + * @description Export the (filtered) audit trail as CSV (admin-only). + * + * Honors the same filters as ``GET /audit``. Capped at + * ``_AUDIT_CSV_ROW_CAP`` rows to avoid unbounded streaming. Columns: + * timestamp, actor, event_type, target_connector, notes. Never includes + * credential material. + */ + get: operations["export_audit_events_csv_api_admin_llm_audit_csv_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/admin/llm/connectors": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** List Connectors Admin */ + get: operations["list_connectors_admin_api_admin_llm_connectors_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/admin/llm/connectors/{connector_id}/cap": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + /** + * Set Connector Cap Admin + * @description Set or clear a connector's monthly token cap (admin-only, issue #339). + * + * ``monthly_token_cap = null`` clears the cap (unlimited). The change is + * pre-flight only: an in-flight gateway call already past its cap check is + * unaffected. Pydantic enforces the non-negative bound (``ge=0``); the + * service layer re-validates defensively. + */ + patch: operations["set_connector_cap_admin_api_admin_llm_connectors__connector_id__cap_patch"]; + trace?: never; + }; + "/api/admin/llm/connectors/{connector_id}/revoke": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Revoke Connector Admin */ + post: operations["revoke_connector_admin_api_admin_llm_connectors__connector_id__revoke_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/admin/llm/policy": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Get Policy */ + get: operations["get_policy_api_admin_llm_policy_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + /** Patch Policy */ + patch: operations["patch_policy_api_admin_llm_policy_patch"]; + trace?: never; + }; + "/api/admin/llm/usage": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Get Usage */ + get: operations["get_usage_api_admin_llm_usage_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; "/api/admin/settings": { parameters: { query?: never; @@ -1299,24 +1442,25 @@ export interface paths { patch: operations["assign_kiosk_api_kiosk__kiosk_id__assign_patch"]; trace?: never; }; - "/api/public/collect/{code}": { + "/api/llm/connectors": { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; - /** Preview */ - get: operations["preview_api_public_collect__code__get"]; + /** List Connectors */ + get: operations["list_connectors_api_llm_connectors_get"]; put?: never; - post?: never; + /** Create Connector Endpoint */ + post: operations["create_connector_endpoint_api_llm_connectors_post"]; delete?: never; options?: never; head?: never; patch?: never; trace?: never; }; - "/api/public/collect/{code}/enrich-preview": { + "/api/llm/connectors/{connector_id}": { parameters: { query?: never; header?: never; @@ -1325,18 +1469,16 @@ export interface paths { }; get?: never; put?: never; - /** - * Enrich Preview - * @description Lightweight Beatport BPM/key lookup for search-time vibes — no DB writes. - */ - post: operations["enrich_preview_api_public_collect__code__enrich_preview_post"]; - delete?: never; + post?: never; + /** Delete Connector Endpoint */ + delete: operations["delete_connector_endpoint_api_llm_connectors__connector_id__delete"]; options?: never; head?: never; - patch?: never; + /** Update Connector Metadata */ + patch: operations["update_connector_metadata_api_llm_connectors__connector_id__patch"]; trace?: never; }; - "/api/public/collect/{code}/guest/ensure-name": { + "/api/llm/connectors/{connector_id}/credentials": { parameters: { query?: never; header?: never; @@ -1344,148 +1486,196 @@ export interface paths { cookie?: never; }; get?: never; - put?: never; - /** - * Ensure Name - * @description Frictionless-join name management. Auto-generates a nickname when none is - * set, or applies a manual rename. Gated on event.frictionless_join so it can - * never bypass email verification on a hardened (non-frictionless) event. - * - * Not anonymous: requires the `wrzdj_human` HMAC-signed verified-human cookie - * (set via Turnstile) through `require_verified_human_soft`. Calls without a - * resolvable verified-human guest are rejected with 403 - * `human_verification_required`. - */ - post: operations["ensure_name_api_public_collect__code__guest_ensure_name_post"]; + /** Rotate Connector Credentials */ + put: operations["rotate_connector_credentials_api_llm_connectors__connector_id__credentials_put"]; + post?: never; delete?: never; options?: never; head?: never; patch?: never; trace?: never; }; - "/api/public/collect/{code}/join-config": { + "/api/llm/connectors/{connector_id}/default": { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; + get?: never; + put?: never; /** - * Join Config - * @description Public, unauthenticated: lets the join page decide its gate mode on load. + * Set Connector As Default + * @description Pin this connector as the DJ's explicit default (issue #336). + * + * Atomically clears any other defaults the DJ owns before flipping this row, + * so the partial unique index never sees two True rows for the same user. + * + * Setting a disabled / auth_invalid connector as default is rejected with 400 + * so DJs don't silently break their own routing — a default that the gateway + * would skip anyway is a footgun. */ - get: operations["join_config_api_public_collect__code__join_config_get"]; - put?: never; - post?: never; - delete?: never; + post: operations["set_connector_as_default_api_llm_connectors__connector_id__default_post"]; + /** + * Unset Connector As Default + * @description Clear the explicit default — gateway resolution falls back to MRU. + */ + delete: operations["unset_connector_as_default_api_llm_connectors__connector_id__default_delete"]; options?: never; head?: never; patch?: never; trace?: never; }; - "/api/public/collect/{code}/leaderboard": { + "/api/llm/connectors/{connector_id}/stream-test": { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; - /** Leaderboard */ - get: operations["leaderboard_api_public_collect__code__leaderboard_get"]; + get?: never; put?: never; - post?: never; + /** + * Stream Test Connector + * @description Stream a short sentence through the connector as ``text/event-stream``. + * + * Validates ownership up front (404 for connectors the DJ doesn't own — never + * leaks existence). Each SSE ``data:`` frame is a JSON ``ChatResponseChunk``. + * On a typed gateway error an ``event: error`` frame is emitted carrying only a + * sanitised code (never the upstream payload), then the stream ends. Client + * disconnect cancels the upstream provider request — the gateway generator's + * ``finally`` writes the counts-only call log and closes the adapter. + * + * Unlike the public guest SSE stream (``api/sse.py``), this endpoint is + * authenticated, rate-limited (10/min), and strictly bounded (max 64 output + * tokens), so it holds the request-scoped DB session for the brief stream + * lifetime rather than opening a detached ``SessionLocal`` — the pool-pinning + * concern that drove ``api/sse.py``'s pattern applies to unauthenticated, + * indefinitely-open guest connections, not a short admin health probe. + */ + post: operations["stream_test_connector_api_llm_connectors__connector_id__stream_test_post"]; delete?: never; options?: never; head?: never; patch?: never; trace?: never; }; - "/api/public/collect/{code}/live-join-code": { + "/api/llm/connectors/{connector_id}/test": { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; + get?: never; + put?: never; /** - * Get Live Join Code - * @description Return the live join_code for an event that has entered the live phase. + * Test Connector + * @description Run a health check and return a sanitised result. * - * Requires a verified human cookie (not email verification) so the join_code - * is never leaked to unverified bots scraping /collect during the - * collection-to-live transition. The join_code is otherwise revealed only - * via the QR code at the event venue. + * Behaviour identical to the background monitor (issue #340), so the + * ``last_health_check_at`` / ``last_health_check_status`` columns and audit + * rows are written the same way on every invocation regardless of trigger + * source. See ``services/llm/health_check.py`` for the shared helper. */ - get: operations["get_live_join_code_api_public_collect__code__live_join_code_get"]; - put?: never; - post?: never; + post: operations["test_connector_api_llm_connectors__connector_id__test_post"]; delete?: never; options?: never; head?: never; patch?: never; trace?: never; }; - "/api/public/collect/{code}/profile": { + "/api/llm/feature-preferences": { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; - /** Get Profile */ - get: operations["get_profile_api_public_collect__code__profile_get"]; + /** + * List Feature Preferences + * @description List the DJ's per-feature connector pins (issue #337). + */ + get: operations["list_feature_preferences_api_llm_feature_preferences_get"]; put?: never; - /** Set Profile */ - post: operations["set_profile_api_public_collect__code__profile_post"]; + /** + * Set Feature Preference Endpoint + * @description Pin (or re-pin) a connector to a feature for the current DJ. + * + * Validates connector ownership server-side (404 for IDs the DJ doesn't own, + * so another DJ's connector existence is never leaked) and rejects pinning a + * non-active connector (400) — the gateway would skip it anyway, so silently + * accepting it is a footgun. + */ + post: operations["set_feature_preference_endpoint_api_llm_feature_preferences_post"]; delete?: never; options?: never; head?: never; patch?: never; trace?: never; }; - "/api/public/collect/{code}/profile/me": { + "/api/llm/feature-preferences/{feature}": { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; - /** My Picks */ - get: operations["my_picks_api_public_collect__code__profile_me_get"]; + get?: never; put?: never; post?: never; - delete?: never; + /** + * Clear Feature Preference Endpoint + * @description Clear the DJ's pin for ``feature`` (no-op if unset). Returns the new list. + */ + delete: operations["clear_feature_preference_endpoint_api_llm_feature_preferences__feature__delete"]; options?: never; head?: never; patch?: never; trace?: never; }; - "/api/public/collect/{code}/requests": { + "/api/llm/openrouter/models": { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; - get?: never; + /** + * List Openrouter Models + * @description Return the OpenRouter model catalogue for the model-hint dropdown. + * + * Served from a process-wide TTL cache (refreshed hourly). The OpenRouter + * ``/models`` endpoint is public, so no connector credentials are required. + * Returns an empty list if the catalogue is unavailable — the frontend then + * falls back to a free-text model input. + */ + get: operations["list_openrouter_models_api_llm_openrouter_models_get"]; put?: never; - /** Submit */ - post: operations["submit_api_public_collect__code__requests_post"]; + post?: never; delete?: never; options?: never; head?: never; patch?: never; trace?: never; }; - "/api/public/collect/{code}/requests/{request_id}/preview": { + "/api/llm/policy": { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; - /** Request Preview */ - get: operations["request_preview_api_public_collect__code__requests__request_id__preview_get"]; + /** + * Get Dj Policy + * @description DJ-readable connector policy (non-sensitive subset). + * + * The settings/ai page consumes this to fail *closed* — hiding connector + * types the admin has disabled rather than showing every provider and only + * discovering the block when the create call returns 403. Admin-only fields + * (e.g. ``llm_default_connector_id``) are intentionally excluded. + */ + get: operations["get_dj_policy_api_llm_policy_get"]; put?: never; post?: never; delete?: never; @@ -1494,71 +1684,71 @@ export interface paths { patch?: never; trace?: never; }; - "/api/public/collect/{code}/vote": { + "/api/public/collect/{code}": { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; - get?: never; + /** Preview */ + get: operations["preview_api_public_collect__code__get"]; put?: never; - /** Vote */ - post: operations["vote_api_public_collect__code__vote_post"]; + post?: never; delete?: never; options?: never; head?: never; patch?: never; trace?: never; }; - "/api/public/e/{code}/bridge-status": { + "/api/public/collect/{code}/enrich-preview": { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; + get?: never; + put?: never; /** - * Get Public Bridge Status - * @description Get bridge connection status for public display. - * - * Independent of track data — returns bridge connectivity even when - * no track is currently playing. Resolves by join_code: serves guest-facing - * kiosk display + overlay pages. + * Enrich Preview + * @description Lightweight Beatport BPM/key lookup for search-time vibes — no DB writes. */ - get: operations["get_public_bridge_status_api_public_e__code__bridge_status_get"]; - put?: never; - post?: never; + post: operations["enrich_preview_api_public_collect__code__enrich_preview_post"]; delete?: never; options?: never; head?: never; patch?: never; trace?: never; }; - "/api/public/e/{code}/history": { + "/api/public/collect/{code}/guest/ensure-name": { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; + get?: never; + put?: never; /** - * Get Public History - * @description Get play history for public display. + * Ensure Name + * @description Frictionless-join name management. Auto-generates a nickname when none is + * set, or applies a manual rename. Gated on event.frictionless_join so it can + * never bypass email verification on a hardened (non-frictionless) event. * - * Returns the list of tracks played during the event, newest first. - * Resolves by join_code: serves guest-facing kiosk display. + * Not anonymous: requires the `wrzdj_human` HMAC-signed verified-human cookie + * (set via Turnstile) through `require_verified_human_soft`. Calls without a + * resolvable verified-human guest are rejected with 403 + * `human_verification_required`. */ - get: operations["get_public_history_api_public_e__code__history_get"]; - put?: never; - post?: never; + post: operations["ensure_name_api_public_collect__code__guest_ensure_name_post"]; delete?: never; options?: never; head?: never; patch?: never; trace?: never; }; - "/api/public/e/{code}/nowplaying": { + "/api/public/collect/{code}/join-config": { parameters: { query?: never; header?: never; @@ -1566,15 +1756,10 @@ export interface paths { cookie?: never; }; /** - * Get Public Now Playing - * @description Get current now-playing track for public display. - * - * Returns the track currently playing from StageLinQ, or None if nothing playing. - * - * Resolves by join_code: this endpoint serves the kiosk display + OBS overlay - * pages, which route by join_code per the post-PR-#324 public/guest URL contract. + * Join Config + * @description Public, unauthenticated: lets the join page decide its gate mode on load. */ - get: operations["get_public_now_playing_api_public_e__code__nowplaying_get"]; + get: operations["join_config_api_public_collect__code__join_config_get"]; put?: never; post?: never; delete?: never; @@ -1583,7 +1768,207 @@ export interface paths { patch?: never; trace?: never; }; - "/api/public/events/{code}": { + "/api/public/collect/{code}/leaderboard": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Leaderboard */ + get: operations["leaderboard_api_public_collect__code__leaderboard_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/public/collect/{code}/live-join-code": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** + * Get Live Join Code + * @description Return the live join_code for an event that has entered the live phase. + * + * Requires a verified human cookie (not email verification) so the join_code + * is never leaked to unverified bots scraping /collect during the + * collection-to-live transition. The join_code is otherwise revealed only + * via the QR code at the event venue. + */ + get: operations["get_live_join_code_api_public_collect__code__live_join_code_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/public/collect/{code}/profile": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Get Profile */ + get: operations["get_profile_api_public_collect__code__profile_get"]; + put?: never; + /** Set Profile */ + post: operations["set_profile_api_public_collect__code__profile_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/public/collect/{code}/profile/me": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** My Picks */ + get: operations["my_picks_api_public_collect__code__profile_me_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/public/collect/{code}/requests": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Submit */ + post: operations["submit_api_public_collect__code__requests_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/public/collect/{code}/requests/{request_id}/preview": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Request Preview */ + get: operations["request_preview_api_public_collect__code__requests__request_id__preview_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/public/collect/{code}/vote": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Vote */ + post: operations["vote_api_public_collect__code__vote_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/public/e/{code}/bridge-status": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** + * Get Public Bridge Status + * @description Get bridge connection status for public display. + * + * Independent of track data — returns bridge connectivity even when + * no track is currently playing. Resolves by join_code: serves guest-facing + * kiosk display + overlay pages. + */ + get: operations["get_public_bridge_status_api_public_e__code__bridge_status_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/public/e/{code}/history": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** + * Get Public History + * @description Get play history for public display. + * + * Returns the list of tracks played during the event, newest first. + * Resolves by join_code: serves guest-facing kiosk display. + */ + get: operations["get_public_history_api_public_e__code__history_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/public/e/{code}/nowplaying": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** + * Get Public Now Playing + * @description Get current now-playing track for public display. + * + * Returns the track currently playing from StageLinQ, or None if nothing playing. + * + * Resolves by join_code: this endpoint serves the kiosk display + OBS overlay + * pages, which route by join_code per the post-PR-#324 public/guest URL contract. + */ + get: operations["get_public_now_playing_api_public_e__code__nowplaying_get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/public/events/{code}": { parameters: { query?: never; header?: never; @@ -1701,6 +2086,14 @@ export interface paths { * unauthenticated DoS (unlimited long-lived connections exhausting FDs) * and passive eavesdropping via 6-char event-code brute force. * + * POOL SAFETY (issue #356): the one-shot existence/auth check runs inside a + * short-lived ``with SessionLocal()`` block whose pooled connection is + * returned BEFORE the EventSourceResponse is returned. An EventSource + * connection can stay open indefinitely, so we must NOT hold a + * request-scoped ``get_db`` session across the stream lifetime — doing so + * pinned one pooled connection per open stream and exhausted the QueuePool + * (size 5 + overflow 10 = 15 connections) under modest guest load. + * * Event types: * - request_created: New request submitted * - request_status_changed: Request status update @@ -2019,6 +2412,58 @@ export interface paths { patch?: never; trace?: never; }; + "/api/setbuilder/sets": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** + * List Sets + * @description List the current DJ's sets, newest first. + */ + get: operations["list_sets_api_setbuilder_sets_get"]; + put?: never; + /** + * Create Set + * @description Create a new empty set owned by the current DJ. + */ + post: operations["create_set_api_setbuilder_sets_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/setbuilder/sets/{set_id}": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** + * Get Set + * @description Get one of the current DJ's sets, or 404. + */ + get: operations["get_set_api_setbuilder_sets__set_id__get"]; + put?: never; + post?: never; + /** + * Delete Set + * @description Delete one of the current DJ's sets, or 404. + */ + delete: operations["delete_set_api_setbuilder_sets__set_id__delete"]; + options?: never; + head?: never; + /** + * Rename Set + * @description Rename one of the current DJ's sets, or 404. + */ + patch: operations["rename_set_api_setbuilder_sets__set_id__patch"]; + trace?: never; + }; "/api/tidal/auth/cancel": { parameters: { query?: never; @@ -2288,13 +2733,101 @@ export interface components { /** Source */ source: string; }; - /** AdminEventOut */ - AdminEventOut: { - /** Code */ - code: string; + /** + * AdminAuditOut + * @description Paginated audit-event browse response. + */ + AdminAuditOut: { + /** Limit */ + limit: number; + /** Offset */ + offset: number; + /** Rows */ + rows: components["schemas"]["AuditEventRow"][]; + /** Total */ + total: number; + }; + /** + * AdminConnectorCapPatch + * @description Admin set/clear a connector's monthly token cap (issue #339). + * + * ``monthly_token_cap`` is **required** so intent is always explicit: an + * integer sets the cap, ``null`` clears it (unlimited). Omitting the field + * (an empty ``{}`` body) is rejected with 422 rather than silently treated as + * ``null`` — that would let an accidental no-field PATCH wipe a configured + * cap. A non-null value must be a non-negative integer; ``0`` means "no + * further calls this month". The upper bound is a sanity ceiling, not a + * billing limit. + */ + AdminConnectorCapPatch: { + /** Monthly Token Cap */ + monthly_token_cap: number | null; + }; + /** + * AdminConnectorOut + * @description Admin view — adds the DJ's username for display. + */ + AdminConnectorOut: { + /** Base Url Plain */ + base_url_plain: string | null; /** - * Created At - * Format: date-time + * Connector Type + * @enum {string} + */ + connector_type: "openai_apikey" | "anthropic_apikey" | "openai_compatible" | "openrouter_apikey" | "xai_apikey" | "bedrock" | "azure_openai" | "gemini_apikey"; + /** + * Created At + * Format: date-time + */ + created_at: string; + /** + * Current Month Tokens + * @default 0 + */ + current_month_tokens: number; + /** Display Name */ + display_name: string; + /** Dj Username */ + dj_username: string; + /** Id */ + id: number; + /** + * Is Default + * @default false + */ + is_default: boolean; + /** Last Error */ + last_error: string | null; + /** Last Health Check At */ + last_health_check_at: string | null; + /** Last Health Check Status */ + last_health_check_status: ("ok" | "auth_invalid" | "rate_limited" | "quota_exceeded" | "provider_unavailable" | "error") | null; + /** Last Used At */ + last_used_at: string | null; + /** Model Hint */ + model_hint: string | null; + /** Monthly Token Cap */ + monthly_token_cap: number | null; + /** + * Status + * @enum {string} + */ + status: "active" | "auth_invalid" | "disabled"; + /** + * Updated At + * Format: date-time + */ + updated_at: string; + /** User Id */ + user_id: number; + }; + /** AdminEventOut */ + AdminEventOut: { + /** Code */ + code: string; + /** + * Created At + * Format: date-time */ created_at: string; /** @@ -2320,6 +2853,40 @@ export interface components { */ request_count: number; }; + /** AdminPolicyOut */ + AdminPolicyOut: { + /** Llm Apikey Connectors Enabled */ + llm_apikey_connectors_enabled: boolean; + /** Llm Call Log Retention Days */ + llm_call_log_retention_days: number; + /** Llm Compatible Connector Enabled */ + llm_compatible_connector_enabled: boolean; + /** Llm Default Connector Id */ + llm_default_connector_id: number | null; + }; + /** AdminPolicyPatch */ + AdminPolicyPatch: { + /** + * Clear Default + * @default false + */ + clear_default: boolean; + /** Llm Apikey Connectors Enabled */ + llm_apikey_connectors_enabled?: boolean | null; + /** Llm Call Log Retention Days */ + llm_call_log_retention_days?: number | null; + /** Llm Compatible Connector Enabled */ + llm_compatible_connector_enabled?: boolean | null; + /** Llm Default Connector Id */ + llm_default_connector_id?: number | null; + }; + /** AdminUsageOut */ + AdminUsageOut: { + /** Days */ + days: number; + /** Rows */ + rows: components["schemas"]["UsageRow"][]; + }; /** AdminUserCreate */ AdminUserCreate: { /** Password */ @@ -2362,6 +2929,34 @@ export interface components { /** Role */ role?: string | null; }; + /** + * AuditEventRow + * @description A single audit-trail row with joined display labels. + * + * Never includes credential material — only the target connector's + * human-readable display name (joined from ``llm_connectors``). + */ + AuditEventRow: { + /** Actor User Id */ + actor_user_id: number; + /** Actor Username */ + actor_username: string; + /** + * Created At + * Format: date-time + */ + created_at: string; + /** Event Type */ + event_type: string; + /** Id */ + id: number; + /** Notes */ + notes: string | null; + /** Target Connector Display Name */ + target_connector_display_name: string | null; + /** Target Connector Id */ + target_connector_id: number | null; + }; /** * BeatportEventSettings * @description Beatport sync settings for an event. @@ -2805,6 +3400,156 @@ export interface components { /** Request Id */ request_id: number; }; + /** + * ConnectorCreate + * @description Provider-agnostic create payload. + * + * Field requirements vary by ``connector_type``: + * + * - ``openai_apikey`` / ``anthropic_apikey`` / ``openrouter_apikey`` / + * ``xai_apikey`` / ``gemini_apikey``: ``api_key`` required; ``base_url`` + * and ``bearer`` are ignored. + * - ``openai_compatible``: ``base_url`` required; ``bearer`` optional; + * ``api_key`` is ignored. + * - ``bedrock``: ``aws_access_key_id``, ``aws_secret_access_key``, + * ``aws_region`` and ``aws_model_id`` required; other fields ignored. + * - ``azure_openai``: ``api_key``, ``azure_resource_name``, + * ``azure_deployment_name`` and ``azure_api_version`` all required. + * + * The combination is enforced by :meth:`_require_credentials_for_type`. + * See ``build_create_payload`` in ``services/llm/connector_storage.py`` + * for the full validation flow (including key shape checks). + */ + ConnectorCreate: { + /** Api Key */ + api_key?: string | null; + /** Aws Access Key Id */ + aws_access_key_id?: string | null; + /** Aws Model Id */ + aws_model_id?: string | null; + /** Aws Region */ + aws_region?: string | null; + /** Aws Secret Access Key */ + aws_secret_access_key?: string | null; + /** Azure Api Version */ + azure_api_version?: string | null; + /** Azure Deployment Name */ + azure_deployment_name?: string | null; + /** Azure Resource Name */ + azure_resource_name?: string | null; + /** Base Url */ + base_url?: string | null; + /** Bearer */ + bearer?: string | null; + /** + * Connector Type + * @enum {string} + */ + connector_type: "openai_apikey" | "anthropic_apikey" | "openai_compatible" | "openrouter_apikey" | "xai_apikey" | "bedrock" | "azure_openai" | "gemini_apikey"; + /** Display Name */ + display_name: string; + /** Model Hint */ + model_hint?: string | null; + }; + /** + * ConnectorCredentialsRotate + * @description Rotation payload — at least one credential field must be supplied. + * + * Field semantics mirror :class:`ConnectorCreate`. The actual field required + * depends on the connector being rotated (validated in ``rotate_credentials``). + */ + ConnectorCredentialsRotate: { + /** Api Key */ + api_key?: string | null; + /** Aws Access Key Id */ + aws_access_key_id?: string | null; + /** Aws Model Id */ + aws_model_id?: string | null; + /** Aws Region */ + aws_region?: string | null; + /** Aws Secret Access Key */ + aws_secret_access_key?: string | null; + /** Azure Api Version */ + azure_api_version?: string | null; + /** Azure Deployment Name */ + azure_deployment_name?: string | null; + /** Azure Resource Name */ + azure_resource_name?: string | null; + /** Base Url */ + base_url?: string | null; + /** Bearer */ + bearer?: string | null; + }; + /** + * ConnectorOut + * @description Public-safe connector view — never includes the credential blob. + */ + ConnectorOut: { + /** Base Url Plain */ + base_url_plain: string | null; + /** + * Connector Type + * @enum {string} + */ + connector_type: "openai_apikey" | "anthropic_apikey" | "openai_compatible" | "openrouter_apikey" | "xai_apikey" | "bedrock" | "azure_openai" | "gemini_apikey"; + /** + * Created At + * Format: date-time + */ + created_at: string; + /** Display Name */ + display_name: string; + /** Id */ + id: number; + /** + * Is Default + * @default false + */ + is_default: boolean; + /** Last Error */ + last_error: string | null; + /** Last Health Check At */ + last_health_check_at: string | null; + /** Last Health Check Status */ + last_health_check_status: ("ok" | "auth_invalid" | "rate_limited" | "quota_exceeded" | "provider_unavailable" | "error") | null; + /** Last Used At */ + last_used_at: string | null; + /** Model Hint */ + model_hint: string | null; + /** Monthly Token Cap */ + monthly_token_cap: number | null; + /** + * Status + * @enum {string} + */ + status: "active" | "auth_invalid" | "disabled"; + /** + * Updated At + * Format: date-time + */ + updated_at: string; + /** User Id */ + user_id: number; + }; + /** + * ConnectorPatch + * @description Metadata-only patch (no credential rotation here). + */ + ConnectorPatch: { + /** Display Name */ + display_name?: string | null; + /** Model Hint */ + model_hint?: string | null; + }; + /** ConnectorTestResult */ + ConnectorTestResult: { + /** Error Code */ + error_code: string | null; + /** Message */ + message: string | null; + /** Ok */ + ok: boolean; + }; /** * DisplaySettingsResponse * @description Response for display settings update. @@ -2847,6 +3592,28 @@ export interface components { /** Requests Open */ requests_open?: boolean | null; }; + /** + * DjPolicyOut + * @description DJ-readable connector policy — the non-sensitive subset of the admin + * policy surface. + * + * Lets the settings/ai page fail *closed*: a normal DJ can learn which + * connector types the admin has enabled (so disallowed providers are hidden + * in the picker) without exposing admin-only fields such as + * ``llm_default_connector_id``. + * + * ``allowed_connector_types`` is the pre-computed set of connector types a DJ + * may create given the two toggles, so the frontend doesn't have to hard-code + * the api-key-vs-compatible mapping. + */ + DjPolicyOut: { + /** Allowed Connector Types */ + allowed_connector_types: ("openai_apikey" | "anthropic_apikey" | "openai_compatible" | "openrouter_apikey" | "xai_apikey" | "bedrock" | "azure_openai" | "gemini_apikey")[]; + /** Llm Apikey Connectors Enabled */ + llm_apikey_connectors_enabled: boolean; + /** Llm Compatible Connector Enabled */ + llm_compatible_connector_enabled: boolean; + }; /** EnrichPreviewItem */ EnrichPreviewItem: { /** Artist */ @@ -3012,6 +3779,42 @@ export interface components { /** Name */ name?: string | null; }; + /** + * FeaturePreferenceOut + * @description A single per-feature connector pin (issue #337). + */ + FeaturePreferenceOut: { + /** Connector Id */ + connector_id: number; + /** + * Feature + * @enum {string} + */ + feature: "recommendation" | "set_builder"; + }; + /** + * FeaturePreferenceSet + * @description Set/change a per-feature pin. Upsert — replaces any existing pin. + */ + FeaturePreferenceSet: { + /** Connector Id */ + connector_id: number; + /** + * Feature + * @enum {string} + */ + feature: "recommendation" | "set_builder"; + }; + /** + * FeaturePreferencesListOut + * @description All of a DJ's per-feature pins + the catalogue of pinnable features. + */ + FeaturePreferencesListOut: { + /** Known Features */ + known_features: ("recommendation" | "set_builder")[]; + /** Preferences */ + preferences: components["schemas"]["FeaturePreferenceOut"][]; + }; /** GuestNowPlaying */ GuestNowPlaying: { /** Album Art Url */ @@ -3767,6 +4570,102 @@ export interface components { catalog_search: components["schemas"]["CapabilityStatus"]; playlist_sync: components["schemas"]["CapabilityStatus"]; }; + /** + * SetCreate + * @description Body for creating a new (empty) set. + */ + SetCreate: { + /** Event Id */ + event_id?: number | null; + /** Name */ + name: string; + }; + /** + * SetDetail + * @description Full set record (Phase 0: no slot/curve expansion yet). + */ + SetDetail: { + /** Bpm Ceiling */ + bpm_ceiling: number | null; + /** Bpm Floor */ + bpm_floor: number | null; + /** + * Created At + * Format: date-time + */ + created_at: string; + /** Event Id */ + event_id: number | null; + /** Exported At */ + exported_at: string | null; + /** Id */ + id: number; + /** Key Strictness */ + key_strictness: number; + /** Name */ + name: string; + /** + * Sharing Mode + * @enum {string} + */ + sharing_mode: "private" | "invite_only"; + /** + * Status + * @enum {string} + */ + status: "draft" | "locked" | "exported"; + /** Target Duration Sec */ + target_duration_sec: number | null; + /** Tidal Playlist Id */ + tidal_playlist_id: string | null; + /** + * Updated At + * Format: date-time + */ + updated_at: string; + /** Vibe Theme */ + vibe_theme: string | null; + }; + /** + * SetRename + * @description Body for renaming a set. + */ + SetRename: { + /** Name */ + name: string; + }; + /** + * SetSummary + * @description Set list item (no children). + */ + SetSummary: { + /** + * Created At + * Format: date-time + */ + created_at: string; + /** Event Id */ + event_id: number | null; + /** Id */ + id: number; + /** Name */ + name: string; + /** + * Sharing Mode + * @enum {string} + */ + sharing_mode: "private" | "invite_only"; + /** + * Status + * @enum {string} + */ + status: "draft" | "locked" | "exported"; + /** + * Updated At + * Format: date-time + */ + updated_at: string; + }; /** StatusMessageResponse */ StatusMessageResponse: { /** Message */ @@ -3995,6 +4894,30 @@ export interface components { /** Tidal Sync Enabled */ tidal_sync_enabled?: boolean | null; }; + /** UsageRow */ + UsageRow: { + /** Connector Id */ + connector_id: number; + /** + * Connector Type + * @enum {string} + */ + connector_type: "openai_apikey" | "anthropic_apikey" | "openai_compatible" | "openrouter_apikey" | "xai_apikey" | "bedrock" | "azure_openai" | "gemini_apikey"; + /** Display Name */ + display_name: string; + /** Dj Username */ + dj_username: string; + /** Error Count */ + error_count: number; + /** Error Rate */ + error_rate: number; + /** Total Calls */ + total_calls: number; + /** Total Tokens In */ + total_tokens_in: number; + /** Total Tokens Out */ + total_tokens_out: number; + }; /** UserOut */ UserOut: { /** @@ -4403,9 +5326,16 @@ export interface operations { }; }; }; - admin_get_settings_api_admin_settings_get: { + list_audit_events_api_admin_llm_audit_get: { parameters: { - query?: never; + query?: { + event_type?: string | null; + actor_user_id?: number | null; + target_connector_id?: number | null; + days?: number; + limit?: number; + offset?: number; + }; header?: never; path?: never; cookie?: never; @@ -4418,31 +5348,41 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["SystemSettingsOut"]; + "application/json": components["schemas"]["AdminAuditOut"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; }; }; }; }; - admin_update_settings_api_admin_settings_patch: { + export_audit_events_csv_api_admin_llm_audit_csv_get: { parameters: { - query?: never; + query?: { + event_type?: string | null; + actor_user_id?: number | null; + target_connector_id?: number | null; + days?: number; + }; header?: never; path?: never; cookie?: never; }; - requestBody: { - content: { - "application/json": components["schemas"]["SystemSettingsUpdate"]; - }; - }; + requestBody?: never; responses: { - /** @description Successful Response */ + /** @description CSV export of the filtered audit trail. */ 200: { headers: { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["SystemSettingsOut"]; + "text/csv": string; }; }; /** @description Validation Error */ @@ -4456,7 +5396,7 @@ export interface operations { }; }; }; - admin_stats_api_admin_stats_get: { + list_connectors_admin_api_admin_llm_connectors_get: { parameters: { query?: never; header?: never; @@ -4471,23 +5411,25 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["SystemStats"]; + "application/json": components["schemas"]["AdminConnectorOut"][]; }; }; }; }; - admin_list_users_api_admin_users_get: { + set_connector_cap_admin_api_admin_llm_connectors__connector_id__cap_patch: { parameters: { - query?: { - page?: number; - limit?: number; - role?: string | null; - }; + query?: never; header?: never; - path?: never; + path: { + connector_id: number; + }; cookie?: never; }; - requestBody?: never; + requestBody: { + content: { + "application/json": components["schemas"]["AdminConnectorCapPatch"]; + }; + }; responses: { /** @description Successful Response */ 200: { @@ -4495,7 +5437,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["PaginatedResponse"]; + "application/json": components["schemas"]["AdminConnectorOut"]; }; }; /** @description Validation Error */ @@ -4509,26 +5451,24 @@ export interface operations { }; }; }; - admin_create_user_api_admin_users_post: { + revoke_connector_admin_api_admin_llm_connectors__connector_id__revoke_post: { parameters: { query?: never; header?: never; - path?: never; - cookie?: never; - }; - requestBody: { - content: { - "application/json": components["schemas"]["AdminUserCreate"]; + path: { + connector_id: number; }; + cookie?: never; }; + requestBody?: never; responses: { /** @description Successful Response */ - 201: { + 200: { headers: { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["AdminUserOut"]; + "application/json": components["schemas"]["AdminConnectorOut"]; }; }; /** @description Validation Error */ @@ -4542,40 +5482,263 @@ export interface operations { }; }; }; - admin_delete_user_api_admin_users__user_id__delete: { + get_policy_api_admin_llm_policy_get: { parameters: { query?: never; header?: never; - path: { - user_id: number; - }; + path?: never; cookie?: never; }; requestBody?: never; responses: { /** @description Successful Response */ - 204: { - headers: { - [name: string]: unknown; - }; - content?: never; - }; - /** @description Validation Error */ - 422: { + 200: { headers: { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["HTTPValidationError"]; + "application/json": components["schemas"]["AdminPolicyOut"]; }; }; }; }; - admin_update_user_api_admin_users__user_id__patch: { + patch_policy_api_admin_llm_policy_patch: { parameters: { query?: never; header?: never; - path: { + path?: never; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["AdminPolicyPatch"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["AdminPolicyOut"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + get_usage_api_admin_llm_usage_get: { + parameters: { + query?: { + days?: number; + }; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["AdminUsageOut"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + admin_get_settings_api_admin_settings_get: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["SystemSettingsOut"]; + }; + }; + }; + }; + admin_update_settings_api_admin_settings_patch: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["SystemSettingsUpdate"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["SystemSettingsOut"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + admin_stats_api_admin_stats_get: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["SystemStats"]; + }; + }; + }; + }; + admin_list_users_api_admin_users_get: { + parameters: { + query?: { + page?: number; + limit?: number; + role?: string | null; + }; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["PaginatedResponse"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + admin_create_user_api_admin_users_post: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["AdminUserCreate"]; + }; + }; + responses: { + /** @description Successful Response */ + 201: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["AdminUserOut"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + admin_delete_user_api_admin_users__user_id__delete: { + parameters: { + query?: never; + header?: never; + path: { + user_id: number; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 204: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + admin_update_user_api_admin_users__user_id__patch: { + parameters: { + query?: never; + header?: never; + path: { user_id: number; }; cookie?: never; @@ -6168,7 +7331,461 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["AcceptAllResponse"]; + "application/json": components["schemas"]["AcceptAllResponse"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + bulk_delete_requests_endpoint_api_events__code__requests_bulk_delete: { + parameters: { + query?: { + status?: string | null; + }; + header?: never; + path: { + code: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["BulkActionResponse"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + reject_all_requests_endpoint_api_events__code__requests_reject_all_post: { + parameters: { + query?: never; + header?: never; + path: { + code: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["BulkActionResponse"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + event_search_api_events__code__search_get: { + parameters: { + query: { + q: string; + }; + header?: never; + path: { + code: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["SearchResult"][]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + unarchive_event_endpoint_api_events__code__unarchive_post: { + parameters: { + query?: never; + header?: never; + path: { + code: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["EventOut"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + api_health_check_api_health_get: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": unknown; + }; + }; + }; + }; + list_my_kiosks_api_kiosk_mine_get: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["KioskOut"][]; + }; + }; + }; + }; + complete_kiosk_pairing_api_kiosk_pair__pair_code__complete_post: { + parameters: { + query?: never; + header?: never; + path: { + pair_code: string; + }; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["KioskCompletePairingRequest"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["KioskOut"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + delete_kiosk_endpoint_api_kiosk__kiosk_id__delete: { + parameters: { + query?: never; + header?: never; + path: { + kiosk_id: number; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 204: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + rename_kiosk_endpoint_api_kiosk__kiosk_id__patch: { + parameters: { + query?: never; + header?: never; + path: { + kiosk_id: number; + }; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["KioskRenameRequest"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["KioskOut"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + assign_kiosk_api_kiosk__kiosk_id__assign_patch: { + parameters: { + query?: never; + header?: never; + path: { + kiosk_id: number; + }; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["KioskAssignRequest"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["KioskOut"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + list_connectors_api_llm_connectors_get: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["ConnectorOut"][]; + }; + }; + }; + }; + create_connector_endpoint_api_llm_connectors_post: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["ConnectorCreate"]; + }; + }; + responses: { + /** @description Successful Response */ + 201: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["ConnectorOut"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + delete_connector_endpoint_api_llm_connectors__connector_id__delete: { + parameters: { + query?: never; + header?: never; + path: { + connector_id: number; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 204: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + update_connector_metadata_api_llm_connectors__connector_id__patch: { + parameters: { + query?: never; + header?: never; + path: { + connector_id: number; + }; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["ConnectorPatch"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["ConnectorOut"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + rotate_connector_credentials_api_llm_connectors__connector_id__credentials_put: { + parameters: { + query?: never; + header?: never; + path: { + connector_id: number; + }; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["ConnectorCredentialsRotate"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["ConnectorOut"]; }; }; /** @description Validation Error */ @@ -6182,14 +7799,12 @@ export interface operations { }; }; }; - bulk_delete_requests_endpoint_api_events__code__requests_bulk_delete: { + set_connector_as_default_api_llm_connectors__connector_id__default_post: { parameters: { - query?: { - status?: string | null; - }; + query?: never; header?: never; path: { - code: string; + connector_id: number; }; cookie?: never; }; @@ -6201,8 +7816,22 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["BulkActionResponse"]; + "application/json": components["schemas"]["ConnectorOut"]; + }; + }; + /** @description Connector cannot be set as default (e.g. disabled or auth_invalid). */ + 400: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + /** @description Connector not found for current user. */ + 404: { + headers: { + [name: string]: unknown; }; + content?: never; }; /** @description Validation Error */ 422: { @@ -6215,12 +7844,12 @@ export interface operations { }; }; }; - reject_all_requests_endpoint_api_events__code__requests_reject_all_post: { + unset_connector_as_default_api_llm_connectors__connector_id__default_delete: { parameters: { query?: never; header?: never; path: { - code: string; + connector_id: number; }; cookie?: never; }; @@ -6232,8 +7861,15 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["BulkActionResponse"]; + "application/json": components["schemas"]["ConnectorOut"]; + }; + }; + /** @description Connector not found for current user. */ + 404: { + headers: { + [name: string]: unknown; }; + content?: never; }; /** @description Validation Error */ 422: { @@ -6246,14 +7882,12 @@ export interface operations { }; }; }; - event_search_api_events__code__search_get: { + stream_test_connector_api_llm_connectors__connector_id__stream_test_post: { parameters: { - query: { - q: string; - }; + query?: never; header?: never; path: { - code: string; + connector_id: number; }; cookie?: never; }; @@ -6265,7 +7899,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["SearchResult"][]; + "application/json": unknown; }; }; /** @description Validation Error */ @@ -6279,12 +7913,12 @@ export interface operations { }; }; }; - unarchive_event_endpoint_api_events__code__unarchive_post: { + test_connector_api_llm_connectors__connector_id__test_post: { parameters: { query?: never; header?: never; path: { - code: string; + connector_id: number; }; cookie?: never; }; @@ -6296,7 +7930,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["EventOut"]; + "application/json": components["schemas"]["ConnectorTestResult"]; }; }; /** @description Validation Error */ @@ -6310,7 +7944,7 @@ export interface operations { }; }; }; - api_health_check_api_health_get: { + list_feature_preferences_api_llm_feature_preferences_get: { parameters: { query?: never; header?: never; @@ -6325,19 +7959,23 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": unknown; + "application/json": components["schemas"]["FeaturePreferencesListOut"]; }; }; }; }; - list_my_kiosks_api_kiosk_mine_get: { + set_feature_preference_endpoint_api_llm_feature_preferences_post: { parameters: { query?: never; header?: never; path?: never; cookie?: never; }; - requestBody?: never; + requestBody: { + content: { + "application/json": components["schemas"]["FeaturePreferenceSet"]; + }; + }; responses: { /** @description Successful Response */ 200: { @@ -6345,34 +7983,22 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["KioskOut"][]; + "application/json": components["schemas"]["FeaturePreferencesListOut"]; }; }; - }; - }; - complete_kiosk_pairing_api_kiosk_pair__pair_code__complete_post: { - parameters: { - query?: never; - header?: never; - path: { - pair_code: string; - }; - cookie?: never; - }; - requestBody: { - content: { - "application/json": components["schemas"]["KioskCompletePairingRequest"]; - }; - }; - responses: { - /** @description Successful Response */ - 200: { + /** @description Connector is not active and cannot be pinned. */ + 400: { headers: { [name: string]: unknown; }; - content: { - "application/json": components["schemas"]["KioskOut"]; + content?: never; + }; + /** @description Connector not found for current user. */ + 404: { + headers: { + [name: string]: unknown; }; + content?: never; }; /** @description Validation Error */ 422: { @@ -6385,23 +8011,25 @@ export interface operations { }; }; }; - delete_kiosk_endpoint_api_kiosk__kiosk_id__delete: { + clear_feature_preference_endpoint_api_llm_feature_preferences__feature__delete: { parameters: { query?: never; header?: never; path: { - kiosk_id: number; + feature: "recommendation" | "set_builder"; }; cookie?: never; }; requestBody?: never; responses: { /** @description Successful Response */ - 204: { + 200: { headers: { [name: string]: unknown; }; - content?: never; + content: { + "application/json": components["schemas"]["FeaturePreferencesListOut"]; + }; }; /** @description Validation Error */ 422: { @@ -6414,20 +8042,14 @@ export interface operations { }; }; }; - rename_kiosk_endpoint_api_kiosk__kiosk_id__patch: { + list_openrouter_models_api_llm_openrouter_models_get: { parameters: { query?: never; header?: never; - path: { - kiosk_id: number; - }; + path?: never; cookie?: never; }; - requestBody: { - content: { - "application/json": components["schemas"]["KioskRenameRequest"]; - }; - }; + requestBody?: never; responses: { /** @description Successful Response */ 200: { @@ -6435,34 +8057,19 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["KioskOut"]; - }; - }; - /** @description Validation Error */ - 422: { - headers: { - [name: string]: unknown; - }; - content: { - "application/json": components["schemas"]["HTTPValidationError"]; + "application/json": components["schemas"]["AIModelsResponse"]; }; }; }; }; - assign_kiosk_api_kiosk__kiosk_id__assign_patch: { + get_dj_policy_api_llm_policy_get: { parameters: { query?: never; header?: never; - path: { - kiosk_id: number; - }; + path?: never; cookie?: never; }; - requestBody: { - content: { - "application/json": components["schemas"]["KioskAssignRequest"]; - }; - }; + requestBody?: never; responses: { /** @description Successful Response */ 200: { @@ -6470,17 +8077,22 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["KioskOut"]; + "application/json": components["schemas"]["DjPolicyOut"]; }; }; - /** @description Validation Error */ - 422: { + /** @description Not authenticated (missing or invalid bearer token). */ + 401: { headers: { [name: string]: unknown; }; - content: { - "application/json": components["schemas"]["HTTPValidationError"]; + content?: never; + }; + /** @description Authenticated but not an active DJ (e.g. pending approval). */ + 403: { + headers: { + [name: string]: unknown; }; + content?: never; }; }; }; @@ -7617,6 +9229,154 @@ export interface operations { }; }; }; + list_sets_api_setbuilder_sets_get: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["SetSummary"][]; + }; + }; + }; + }; + create_set_api_setbuilder_sets_post: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["SetCreate"]; + }; + }; + responses: { + /** @description Successful Response */ + 201: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["SetDetail"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + get_set_api_setbuilder_sets__set_id__get: { + parameters: { + query?: never; + header?: never; + path: { + set_id: number; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["SetDetail"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + delete_set_api_setbuilder_sets__set_id__delete: { + parameters: { + query?: never; + header?: never; + path: { + set_id: number; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 204: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; + rename_set_api_setbuilder_sets__set_id__patch: { + parameters: { + query?: never; + header?: never; + path: { + set_id: number; + }; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["SetRename"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["SetDetail"]; + }; + }; + /** @description Validation Error */ + 422: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["HTTPValidationError"]; + }; + }; + }; + }; cancel_auth_api_tidal_auth_cancel_post: { parameters: { query?: never; diff --git a/dashboard/lib/api-types.ts b/dashboard/lib/api-types.ts index c177b05b..c132e30a 100644 --- a/dashboard/lib/api-types.ts +++ b/dashboard/lib/api-types.ts @@ -52,6 +52,32 @@ export type AIModelInfo = Schemas['AIModelInfo']; export type AIModelsResponse = Schemas['AIModelsResponse']; export type AISettings = Schemas['AISettingsOut']; export type AISettingsUpdate = Schemas['AISettingsUpdate']; + +// LLM gateway (issue #329) +export type LlmConnector = Schemas['ConnectorOut']; +export type LlmAdminConnector = Schemas['AdminConnectorOut']; +export type LlmConnectorCreate = Schemas['ConnectorCreate']; +export type LlmConnectorPatch = Schemas['ConnectorPatch']; +export type LlmConnectorCredentialsRotate = Schemas['ConnectorCredentialsRotate']; +export type LlmConnectorTestResult = Schemas['ConnectorTestResult']; +export type LlmAdminPolicy = Schemas['AdminPolicyOut']; +export type LlmAdminPolicyPatch = Schemas['AdminPolicyPatch']; +// Monthly token cap (issue #339) +export type LlmAdminConnectorCapPatch = Schemas['AdminConnectorCapPatch']; +export type LlmDjPolicy = Schemas['DjPolicyOut']; +export type LlmAdminUsage = Schemas['AdminUsageOut']; +export type LlmUsageRow = Schemas['UsageRow']; +// LLM audit trail (issue #341) +export type LlmAdminAudit = Schemas['AdminAuditOut']; +export type LlmAuditRow = Schemas['AuditEventRow']; +// Per-feature connector preference (issue #337) +export type LlmFeaturePreference = Schemas['FeaturePreferenceOut']; +export type LlmFeaturePreferences = Schemas['FeaturePreferencesListOut']; +export type LlmFeaturePreferenceSet = Schemas['FeaturePreferenceSet']; +export type LlmFeatureKey = Schemas['FeaturePreferenceOut']['feature']; +// Derive from schema so backend enum changes propagate to TS automatically. +export type LlmConnectorType = Schemas['ConnectorOut']['connector_type']; +export type LlmConnectorStatus = Schemas['ConnectorOut']['status']; export type ActivityLogEntry = Schemas['ActivityLogEntry']; export type CapabilityStatus = Schemas['CapabilityStatus']; export type ServiceCapabilities = Schemas['ServiceCapabilities']; diff --git a/dashboard/lib/api.ts b/dashboard/lib/api.ts index 6c4f7d3e..8db9635f 100644 --- a/dashboard/lib/api.ts +++ b/dashboard/lib/api.ts @@ -5,6 +5,20 @@ import type { AISettings, AISettingsUpdate, ActivityLogEntry, + LlmAdminAudit, + LlmAdminConnector, + LlmAdminPolicy, + LlmAdminPolicyPatch, + LlmAdminUsage, + LlmConnector, + LlmConnectorCreate, + LlmConnectorCredentialsRotate, + LlmConnectorPatch, + LlmConnectorTestResult, + LlmDjPolicy, + LlmFeatureKey, + LlmFeaturePreferences, + LlmFeaturePreferenceSet, ArchivedEvent, BeatportEventSettings, BeatportSearchResult, @@ -52,6 +66,25 @@ export type { AIModelsResponse, AISettings, AISettingsUpdate, + LlmAdminAudit, + LlmAdminConnector, + LlmAdminPolicy, + LlmAdminPolicyPatch, + LlmAdminUsage, + LlmAuditRow, + LlmConnector, + LlmConnectorCreate, + LlmConnectorCredentialsRotate, + LlmConnectorPatch, + LlmConnectorStatus, + LlmConnectorTestResult, + LlmConnectorType, + LlmDjPolicy, + LlmFeatureKey, + LlmFeaturePreference, + LlmFeaturePreferences, + LlmFeaturePreferenceSet, + LlmUsageRow, ArchivedEvent, BeatportEventSettings, BeatportSearchResult, @@ -103,6 +136,17 @@ export type { VoteResponse, } from './api-types'; +// ========== Admin LLM audit trail filters (issue #341) ========== + +export interface AdminLlmAuditFilters { + event_type?: string; + actor_user_id?: number; + target_connector_id?: number; + days?: number; + limit?: number; + offset?: number; +} + // ========== Pre-Event Collection Types ========== export interface CollectEventPreview { @@ -231,6 +275,26 @@ export class HumanVerificationRequiredError extends ApiError { } } +/** + * One incremental chunk of a streamed LLM response (mirrors the backend + * `ChatResponseChunk`). Non-final chunks carry `text_delta` and/or + * `tool_call_deltas`; the final chunk has `done: true` plus `stop_reason` and + * (when reported) `usage`. Hand-written client type — SSE chunks are not part of + * the REST OpenAPI schema. + */ +export interface LlmStreamChunk { + text_delta?: string; + tool_call_deltas?: Array<{ + index: number; + id?: string | null; + name?: string | null; + input_json_fragment?: string; + }>; + stop_reason?: 'end_turn' | 'tool_use' | 'max_tokens' | 'error' | null; + usage?: { prompt: number; completion: number } | null; + done?: boolean; +} + /** * Wrap a guest-public fetch in 403-human-verification-required retry logic. * Caller passes a `reverify` async function that re-runs the Turnstile @@ -1173,6 +1237,244 @@ class ApiClient { }); } + // ========== LLM connectors (per-DJ) ========== + + async listLlmConnectors(): Promise { + return this.fetch('/api/llm/connectors'); + } + + // DJ-readable connector policy (non-sensitive subset). The settings/ai page + // uses this to fail closed — hiding connector types the admin disabled — + // instead of falling back to "all types allowed" on the admin-only endpoint. + async getLlmPolicy(): Promise { + return this.fetch('/api/llm/policy'); + } + + async listOpenRouterModels(): Promise { + return this.fetch('/api/llm/openrouter/models'); + } + + async createLlmConnector(data: LlmConnectorCreate): Promise { + return this.fetch('/api/llm/connectors', { + method: 'POST', + body: JSON.stringify(data), + }); + } + + async updateLlmConnector(id: number, data: LlmConnectorPatch): Promise { + return this.fetch(`/api/llm/connectors/${id}`, { + method: 'PATCH', + body: JSON.stringify(data), + }); + } + + async rotateLlmConnectorCredentials( + id: number, + data: LlmConnectorCredentialsRotate, + ): Promise { + return this.fetch(`/api/llm/connectors/${id}/credentials`, { + method: 'PUT', + body: JSON.stringify(data), + }); + } + + async testLlmConnector(id: number): Promise { + return this.fetch(`/api/llm/connectors/${id}/test`, { method: 'POST' }); + } + + /** + * Stream a short health-check sentence through a connector via SSE. + * + * Uses fetch + ReadableStream rather than EventSource because EventSource + * cannot send the Authorization header this authenticated endpoint requires. + * Pass an AbortSignal to cancel — aborting closes the connection, which the + * backend treats as a client disconnect and cancels the upstream provider + * request. `onChunk` is invoked for every parsed SSE data frame. + */ + async streamConnectorTest( + id: number, + onChunk: (chunk: LlmStreamChunk) => void, + signal?: AbortSignal, + ): Promise { + const headers = new Headers({ Accept: 'text/event-stream' }); + if (this.token) headers.set('Authorization', `Bearer ${this.token}`); + + const response = await fetch(`${getApiUrl()}/api/llm/connectors/${id}/stream-test`, { + method: 'POST', + headers, + signal, + }); + if (!response.ok || !response.body) { + if (response.status === 401 && this.onUnauthorized) this.onUnauthorized(); + throw new ApiError('Stream test failed', response.status); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + // SSE frames are separated by a blank line. The spec allows LF (`\n\n`) or + // CRLF (`\r\n\r\n`) terminators, so match either — a CRLF-emitting server or + // proxy must not leave frames (including `event: error`) unparsed. + const frameBoundary = /\r?\n\r?\n/; + try { + for (;;) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + let sep: number; + while ((sep = buffer.search(frameBoundary)) !== -1) { + const frame = buffer.slice(0, sep); + const boundary = buffer.slice(sep).match(frameBoundary)?.[0] ?? '\n\n'; + buffer = buffer.slice(sep + boundary.length); + // A frame may carry an `event:` name plus one or more `data:` lines. + // The backend emits `event: error` for typed gateway failures, so we + // must inspect the event type — not just blindly parse `data:`. + let eventType = 'message'; + const dataLines: string[] = []; + for (const line of frame.split(/\r?\n/)) { + if (line.startsWith('event:')) { + eventType = line.slice('event:'.length).trim(); + } else if (line.startsWith('data:')) { + dataLines.push(line.slice('data:'.length).trim()); + } + } + const data = dataLines.join('\n').trim(); + if (!data || data === '[DONE]') continue; + + if (eventType === 'error') { + // Surface the sanitised backend error code as a thrown failure + // rather than passing it through as an inert chunk. + let code: string | undefined; + try { + code = (JSON.parse(data) as { code?: string }).code; + } catch { + code = undefined; + } + throw new ApiError(`Stream test failed${code ? `: ${code}` : ''}`, 500); + } + + try { + onChunk(JSON.parse(data) as LlmStreamChunk); + } catch { + // Ignore unparseable keepalive frames. + } + } + } + } finally { + reader.releaseLock(); + } + } + + async deleteLlmConnector(id: number): Promise { + await this.fetch(`/api/llm/connectors/${id}`, { method: 'DELETE' }); + } + + // Pin / unpin a connector as the DJ's explicit default (issue #336). When + // pinned, the gateway routes through this connector regardless of which one + // is most-recently-used. + async setLlmConnectorDefault(id: number): Promise { + return this.fetch(`/api/llm/connectors/${id}/default`, { method: 'POST' }); + } + + async unsetLlmConnectorDefault(id: number): Promise { + return this.fetch(`/api/llm/connectors/${id}/default`, { method: 'DELETE' }); + } + + // ========== Per-feature connector preferences (issue #337) ========== + + async listLlmFeaturePreferences(): Promise { + return this.fetch('/api/llm/feature-preferences'); + } + + async setLlmFeaturePreference(data: LlmFeaturePreferenceSet): Promise { + return this.fetch('/api/llm/feature-preferences', { + method: 'POST', + body: JSON.stringify(data), + }); + } + + async clearLlmFeaturePreference(feature: LlmFeatureKey): Promise { + return this.fetch(`/api/llm/feature-preferences/${feature}`, { + method: 'DELETE', + }); + } + + // ========== Admin LLM policy + oversight ========== + + async getAdminLlmPolicy(): Promise { + return this.fetch('/api/admin/llm/policy'); + } + + async updateAdminLlmPolicy(data: LlmAdminPolicyPatch): Promise { + return this.fetch('/api/admin/llm/policy', { + method: 'PATCH', + body: JSON.stringify(data), + }); + } + + async listAllLlmConnectors(): Promise { + return this.fetch('/api/admin/llm/connectors'); + } + + async revokeAdminLlmConnector(id: number): Promise { + return this.fetch(`/api/admin/llm/connectors/${id}/revoke`, { method: 'POST' }); + } + + async getAdminLlmUsage(days = 30): Promise { + return this.fetch(`/api/admin/llm/usage?days=${days}`); + } + + /** + * Set or clear a connector's monthly token cap (admin-only, issue #339). + * Pass `null` to clear the cap (unlimited). + */ + async setAdminLlmConnectorCap( + id: number, + monthlyTokenCap: number | null, + ): Promise { + return this.fetch(`/api/admin/llm/connectors/${id}/cap`, { + method: 'PATCH', + body: JSON.stringify({ monthly_token_cap: monthlyTokenCap }), + }); + } + + // ========== Admin LLM audit trail (issue #341) ========== + + private buildAuditQuery(filters: AdminLlmAuditFilters = {}): URLSearchParams { + const params = new URLSearchParams(); + if (filters.event_type) params.set('event_type', filters.event_type); + if (filters.actor_user_id != null) { + params.set('actor_user_id', String(filters.actor_user_id)); + } + if (filters.target_connector_id != null) { + params.set('target_connector_id', String(filters.target_connector_id)); + } + if (filters.days != null) params.set('days', String(filters.days)); + if (filters.limit != null) params.set('limit', String(filters.limit)); + if (filters.offset != null) params.set('offset', String(filters.offset)); + return params; + } + + async getAdminLlmAudit(filters: AdminLlmAuditFilters = {}): Promise { + const params = this.buildAuditQuery(filters); + return this.fetch(`/api/admin/llm/audit?${params.toString()}`); + } + + /** + * Download the (filtered) audit trail as a CSV Blob. Pagination params are + * ignored server-side for the export — it honors only the filter fields. + */ + async downloadAdminLlmAuditCsv(filters: AdminLlmAuditFilters = {}): Promise { + const params = this.buildAuditQuery({ + event_type: filters.event_type, + actor_user_id: filters.actor_user_id, + target_connector_id: filters.target_connector_id, + days: filters.days, + }); + const response = await this.rawFetch(`/api/admin/llm/audit.csv?${params.toString()}`); + return response.blob(); + } + // ========== Kiosk Pairing ========== async getKioskPairChallenge(): Promise<{ nonce: string; expires_in: number }> { diff --git a/dashboard/next.config.js b/dashboard/next.config.js index 44585cf6..185e530c 100644 --- a/dashboard/next.config.js +++ b/dashboard/next.config.js @@ -15,6 +15,13 @@ const csp = [ const nextConfig = { output: 'standalone', allowedDevOrigins: ['192.168.*.*'], + async redirects() { + return [ + // DJ AI connector/model settings moved into the account page (#357). + // Keep old bookmarks/links working with a permanent (308) redirect. + { source: '/settings/ai', destination: '/account', permanent: true }, + ]; + }, async headers() { return [ { diff --git a/docs/LLM-PLUGIN.md b/docs/LLM-PLUGIN.md new file mode 100644 index 00000000..6d03f304 --- /dev/null +++ b/docs/LLM-PLUGIN.md @@ -0,0 +1,323 @@ +# LLM Adapter Plug-in Guide + +The WrzDJ backend dispatches every LLM call through the **LLM Gateway**, which +selects a connector for the calling user and routes the request through a +provider-specific **adapter**. The set of adapters is open: forks and +third-party deployments can add new providers without modifying any file +under `server/app/services/llm/`. + +This document is the contract that third-party plug-ins write against. + +> Companion guide: [`docs/PLUGIN-ARCHITECTURE.md`](PLUGIN-ARCHITECTURE.md) +> describes the bridge-side equipment plug-in system. The LLM plug-in surface +> follows the same shape: a small ABC, a registry, and a strict typed-error +> contract. + +## Architecture Overview + +```text +Caller (recommendation engine, agentic feature) + │ + ▼ +Gateway.dispatch(db, actor, request, *, purpose) + │ 1. Resolve LlmConnector (per-DJ MRU → org default) + │ 2. registry.get_adapter_class(connector_type) + │ 3. adapter = cls(connector); await adapter.chat(request) + │ 4. Log call + handle fallback policy + ▼ +LlmAdapter (your plug-in) + │ 1. Parse connector.credentials (encrypted JSON blob) + │ 2. Translate ChatRequest → provider-native request + │ 3. Translate provider response → ChatResponse + │ 4. Map provider errors → typed LlmError subclasses + ▼ +Provider HTTP endpoint / SDK +``` + +| Layer | File | Responsibility | +|-------|------|----------------| +| Adapter | `app/services/llm/adapters/*.py` (built-in)
`LLM_PLUGIN_DIR/*.py` (third-party) | Convert between canonical and provider-native shapes; map errors | +| Registry | `app/services/llm/registry.py` | `connector_type` → adapter class lookup | +| Tool translation | `app/services/llm/tool_translation.py` | JSON-Schema `ToolSpec` ↔ provider tool/function shape | +| Gateway | `app/services/llm/gateway.py` | Resolve connector, call adapter, log, handle fallback | +| Models | `app/models/llm_connector.py` | `LlmConnector` row (encrypted credentials), call log, audit log | +| Exceptions | `app/services/llm/exceptions.py` | Typed error hierarchy adapters must raise | + +The connector row stores credentials as **encrypted JSON** via the +`EncryptedText` SQLAlchemy column type — accessing +`connector.credentials` returns the decrypted plaintext blob. Your adapter is +responsible for parsing that blob. + +## The `LlmAdapter` ABC + +Defined in [`app/services/llm/base.py`](../server/app/services/llm/base.py). + +```python +class LlmAdapter(ABC): + connector_type: str = "" # set on the subclass — registry key + + def __init__(self, connector) -> None: + self.connector = connector + + @abstractmethod + async def chat(self, request: ChatRequest) -> ChatResponse: ... + + @abstractmethod + async def health_check(self) -> None: ... +``` + +### Required Class Attribute: `connector_type` + +A short, lowercase, snake-case string. The DB column that stores it is 40 +characters; pick something unique and stable (e.g. `mistral_apikey`, +`groq_apikey`, `local_vllm`). The registry **refuses to bind the same +`connector_type` to two different classes** — that prevents silent shadowing +of built-in adapters. + +### Required Method: `chat()` + +| Property | Contract | +|----------|----------| +| Coroutine | Yes — `async def`. The gateway always awaits. | +| Input | A canonical `ChatRequest`. | +| Output | A canonical `ChatResponse`. | +| Errors | One of the typed `LlmError` subclasses (see below). Never a raw HTTP / SDK exception. | +| Side effects | None other than the upstream network call. Do **not** mutate the connector row. | +| Logging | Do not log full prompts, completions, or any credential material. | + +### Required Method: `health_check()` + +Validate the credential against the provider. The gateway calls this from the +admin "Test connector" path. Returns `None` on success; raises the same typed +exceptions as `chat()` on failure. + +Pattern: issue the cheapest possible call (e.g. `max_tokens=1`). The shared +helper `build_healthcheck_request()` in +`app/services/llm/adapters/_httpx_openai.py` is reusable for OpenAI-shaped +endpoints. + +## Canonical Types + +Defined in [`app/services/llm/base.py`](../server/app/services/llm/base.py). +These are **stable** Pydantic models — fields may be added in a minor release +but never renamed or removed without a major-version bump. + +### `ChatRequest` + +| Field | Type | Notes | +|-------|------|-------| +| `messages` | `list[Message]` | Required. `role ∈ {"system", "user", "assistant", "tool"}`. Tool messages carry `tool_call_id`. | +| `tools` | `list[ToolSpec] \| None` | JSON-Schema shape. Translate via `tool_translation.to_*_tools()`. | +| `force_tool` | `str \| None` | Forces a specific tool name; raise `ToolTranslationError` if not in `tools`. | +| `max_tokens` | `int \| None` | Adapters supply a default if `None`. | +| `temperature` | `float \| None` | Pass through verbatim when not `None`. | +| `model` | `str \| None` | Overrides `connector.model_hint`. | +| `timeout_seconds` | `float \| None` | Adapters MAY clamp to a max. | +| `system` | `str \| None` | Provider-native system prompt. Map to the right surface (OpenAI: first system message; Anthropic: top-level `system`). | +| `fallback_policy` | `Literal["none", "org_default", "retry_then_org_default"]` | Handled by the gateway, not the adapter. Ignore. | + +### `ChatResponse` + +| Field | Type | Notes | +|-------|------|-------| +| `text` | `str` | The textual assistant reply. Empty string if the model only emitted tool calls. | +| `tool_calls` | `list[ToolCall]` | Empty list when no tools were called. | +| `stop_reason` | `Literal["end_turn", "tool_use", "max_tokens", "error"]` | Required. Map from the provider's native stop reason. | +| `usage` | `TokenUsage \| None` | Counts only — never prompt content. Optional. | +| `model` | `str \| None` | Provider-reported model id (for telemetry). Recommended. | + +### `ToolSpec`, `ToolCall`, `Message` + +See the source. `ToolSpec.input_schema` is a JSON-Schema dict; +`tool_translation.py` knows how to translate it for OpenAI / Anthropic / +Bedrock and parse the response back into canonical `ToolCall` objects. +Reuse those helpers rather than reimplementing them per adapter. + +## Exception Contract + +Defined in [`app/services/llm/exceptions.py`](../server/app/services/llm/exceptions.py). +Every error from the adapter must be one of these. The gateway translates +them into telemetry, audit events, and HTTP response codes; raw provider +errors **must not** reach the caller (they often contain bearer tokens in +error messages — a credential-leak vector). + +| Exception | When to raise | Status hint | +|-----------|---------------|-------------| +| `AuthInvalid` | Credentials are malformed, missing, or rejected (`401`/`403`). Includes "failed to parse the credential JSON". | Marks connector `status="auth_invalid"`; writes audit event. | +| `RateLimited(retry_after_seconds=...)` | Provider returned `429`. Pass through `Retry-After` if present. | Gateway logs and surfaces as `429` to the caller. | +| `QuotaExceeded` | Billing failure (`402`) or provider-specific quota error. | Logged, surfaced as `402` to caller. | +| `ProviderUnavailable` | `5xx`, network failure, timeout, generic SDK error. | Logged, surfaced as `502`. Eligible for fallback. | +| `ToolTranslationError` | Unable to translate input tools or parse the response. | Logged, surfaced as `502`. **Not** a fallback trigger. | +| `NoLlmConfigured` | **Gateway-only.** Adapters should not raise this. | – | + +### Mapping example (OpenAI HTTP shape) + +```python +status = response.status_code +if status in (401, 403): + raise AuthInvalid(f"Auth failed (HTTP {status})") +if status == 402: + raise QuotaExceeded("Quota or billing failure") +if status == 429: + retry = response.headers.get("retry-after") + raise RateLimited("Rate limited", retry_after_seconds=int(float(retry)) if retry else None) +if 500 <= status < 600: + raise ProviderUnavailable(f"Upstream error (HTTP {status})") +# 4xx other than the above → almost certainly a translation problem. +raise ToolTranslationError(f"Upstream rejected request (HTTP {status})") +``` + +## Tool Translation + +The canonical `ToolSpec` is JSON-Schema. Adapters should delegate to +[`app/services/llm/tool_translation.py`](../server/app/services/llm/tool_translation.py) +rather than re-implementing the conversion. The module exposes: + +| Helper | Direction | +|--------|-----------| +| `to_openai_tools(tools, force)` | Canonical → OpenAI `tools` + `tool_choice` | +| `parse_openai_response(payload)` | OpenAI body → `ChatResponse` | +| `to_anthropic_tools(tools, force)` | Canonical → Anthropic `tools` + `tool_choice` | +| `parse_anthropic_response(message)` | Anthropic SDK message → `ChatResponse` | +| `to_bedrock_tools(tools, force)` | Canonical → Bedrock Converse `toolConfig` | +| `parse_bedrock_response(payload)` | Bedrock body → `ChatResponse` | + +Adding a new translation pair for a provider whose tool shape genuinely +differs is allowed — open a PR adding helpers under the same naming +convention. Until then, do not silently re-shape tools inside your adapter. + +## Registration + +Register the adapter as the **last statement** of your module: + +```python +register_adapter(MyAdapter.connector_type, MyAdapter) +``` + +That call: + +- Validates the class subclasses `LlmAdapter`. +- Rejects empty `connector_type`. +- Rejects double-binding (a different class trying to take an already-bound + key — surfaced as `ValueError` at startup). + +Re-registering the *same* class is a no-op (safe for test re-imports). + +## Loading Third-Party Plug-ins + +There are two supported mechanisms: + +1. **Import from your own code.** Add the file to your fork of the backend + and ensure it gets imported at startup (e.g. add it to the + `app/services/llm/registry.py::_bootstrap` block, or import it from + `app/main.py`). This is the recommended path for forks. + +2. **`LLM_PLUGIN_DIR` env var.** Set the environment variable to a directory + path. At startup the loader + ([`app/services/llm/plugin_loader.py`](../server/app/services/llm/plugin_loader.py)) + imports every `*.py` file in that directory (non-recursive; files starting + with `_` are skipped). Each plug-in is responsible for calling + `register_adapter()` on import. A broken plug-in is logged with a full + stack trace and skipped — it does **not** prevent the rest of the directory + or the backend itself from starting. + +### Security posture for `LLM_PLUGIN_DIR` + +Loading a plug-in grants it the **full privileges of the backend process**. +There is no sandbox; this is the same trust boundary as `pip install`. +Operators must: + +- Treat the plug-in directory as a privileged path. Only the backend's + service account should have write access to it. +- Audit every plug-in's source the same way they would audit a third-party + Python dependency. +- Never set `LLM_PLUGIN_DIR` to a world-writable or multi-tenant path. + +In production we recommend leaving `LLM_PLUGIN_DIR` unset and packaging +trusted plug-ins as ordinary Python modules. The env-var loader exists to +make local experimentation and forks ergonomic. + +## Stable vs Internal API + +The plug-in surface is **the surface listed in this document**. Everything +else under `app/services/llm/` is internal — including helper modules, +private functions, and adapter base-class internals not enumerated above. + +| Surface | Stability | +|---------|-----------| +| `LlmAdapter` ABC method signatures (`chat`, `health_check`, `connector_type`) | **Stable.** Breaking change → major version bump. | +| `ChatRequest`, `ChatResponse`, `Message`, `ToolSpec`, `ToolCall`, `TokenUsage` field names + types | **Stable.** Field additions in minor versions; never renames/removals without a major bump. | +| Exception types and their constructor signatures | **Stable.** | +| `register_adapter`, `get_adapter_class`, `list_connector_types`, `is_registered` | **Stable.** | +| `tool_translation.to_*_tools` / `parse_*_response` | **Stable** for the providers documented above. | +| `_httpx_openai`, `url_validator`, `connector_storage` | **Internal.** Reuse at your own risk; may change without notice. | +| `gateway.dispatch` internals (fallback, logging, audit) | **Internal.** Callers must use the public `Gateway.dispatch` entrypoint. | +| `LlmConnector` ORM model | **Internal.** Adapters touch only `connector.credentials`, `connector.model_hint`, and `connector.base_url_plain`. | + +Schema changes to the `LlmConnector` storage shape (encrypted JSON blob keys) +are versioned by `connector_type`. Each provider chooses its own blob keys +in its own migration; the only invariant is that **the blob is a JSON object**. + +## Test Matrix + +Every registered adapter — built-in or third-party — must pass the +parametrised contract tests in +[`server/tests/test_llm_adapter_contract.py`](../server/tests/test_llm_adapter_contract.py). +The contract covers: + +1. The class subclasses `LlmAdapter`. +2. `connector_type` is non-empty and matches the registration key. +3. `chat` and `health_check` are async callables. +4. The constructor accepts a connector row without raising. +5. `chat()` raises `AuthInvalid` (or another `LlmError`) for malformed + credential blobs — never a raw `JSONDecodeError`, `KeyError`, or HTTP + exception. +6. The registry returns classes (not instances) and raises `KeyError` on + unknown lookups. + +Adapter-specific HTTP and parsing behaviour belongs in a separate test file +(see the built-in adapters' tests in `test_llm_adapters.py` for the pattern). + +Run the contract test against your adapter: + +```bash +cd server +.venv/bin/pytest tests/test_llm_adapter_contract.py +``` + +If a contract test fails on your adapter, **fix the adapter** — do not +modify the contract. The contract is what lets the gateway dispatch +generically. + +## Reference Skeleton + +The minimum working adapter lives at +[`docs/examples/echo_adapter.py`](examples/echo_adapter.py). It is exercised +by `test_skeleton_echo_adapter_*` in the contract test file, so any change +that breaks the documented surface fails CI immediately. + +## Adding a Plug-in in 5 Minutes + +```bash +# 1. Copy the skeleton. +cp docs/examples/echo_adapter.py /opt/wrzdj/llm_plugins/mistral_apikey.py + +# 2. Edit it: +# - Change `connector_type` to a unique value (e.g. "mistral_apikey"). +# - Replace the echo body with your provider call. +# - Map provider errors to the typed exceptions. + +# 3. Point the backend at the plug-in directory. +export LLM_PLUGIN_DIR=/opt/wrzdj/llm_plugins +uvicorn app.main:app + +# 4. Verify the registry sees it. +python -c "from app.services.llm.registry import list_connector_types; print(list_connector_types())" + +# 5. Run the contract tests. +cd server && .venv/bin/pytest tests/test_llm_adapter_contract.py +``` + +Once your adapter is registered, DJs can create a connector row via +`POST /api/llm/connectors` with `connector_type="mistral_apikey"` and the +gateway will route their requests through your adapter automatically. diff --git a/docs/examples/echo_adapter.py b/docs/examples/echo_adapter.py new file mode 100644 index 00000000..40930948 --- /dev/null +++ b/docs/examples/echo_adapter.py @@ -0,0 +1,174 @@ +"""Echo adapter — minimal reference implementation of ``LlmAdapter``. + +This skeleton is the canonical "blank slate" for third-party LLM provider +plug-ins. It implements the full :class:`~app.services.llm.base.LlmAdapter` +contract without making any network calls — every request is echoed back as +the assistant message body. + +Usage in tests:: + + # Self-test against the contract — no production import. + from docs.examples import echo_adapter # noqa: F401 (side-effect: register) + from app.services.llm.registry import get_adapter_class + + cls = get_adapter_class("echo") + response = await cls(connector).chat(request) + +Usage in production (third-party plug-ins):: + + # 1. Copy this file under any module path you control. + # 2. Customize ``connector_type`` and the body of ``chat()``. + # 3. Either: + # a) drop the .py file into the directory pointed to by ``LLM_PLUGIN_DIR``, + # or + # b) import the module from your own bootstrap code at startup. + # 4. The :func:`register_adapter` call at the bottom binds the class to the + # registry the moment the module is imported. + +See ``docs/LLM-PLUGIN.md`` for the full extension contract. + +Security note: this skeleton intentionally does not validate or sanitise the +input it echoes. Real adapters must: +- Treat ``connector.credentials`` as untrusted (the encrypted blob can be + malformed; raise :class:`AuthInvalid` rather than letting :class:`json.JSONDecodeError` + bubble up). +- Translate upstream HTTP/SDK errors into the typed exception hierarchy + (``AuthInvalid`` / ``RateLimited`` / ``QuotaExceeded`` / ``ProviderUnavailable`` + / ``ToolTranslationError``). Raw provider errors must not reach the caller. +- Never log secrets, full prompts, or completion bodies (the gateway only + logs counts). +""" + +from __future__ import annotations + +import json +import logging +from typing import Any + +from app.services.llm.base import ( + ChatRequest, + ChatResponse, + ContentBlock, + LlmAdapter, + Message, + TokenUsage, +) +from app.services.llm.exceptions import AuthInvalid +from app.services.llm.registry import register_adapter + +logger = logging.getLogger(__name__) + + +class EchoAdapter(LlmAdapter): + """An adapter that echoes the last user message back as the assistant reply. + + Useful for: + - Wiring tests for the gateway / connector storage layer end-to-end + without depending on a live provider. + - Showing third-party plug-in authors the minimum required surface. + """ + + # The registry key for this adapter. Plug-in authors must change this to a + # unique string before publishing — the registry refuses to register two + # different classes under the same ``connector_type``. + connector_type = "echo" + + # ------------------------------------------------------------------ + # Credential handling + # ------------------------------------------------------------------ + def _read_credentials(self) -> dict[str, Any]: + """Parse the encrypted credential blob, raising AuthInvalid on failure. + + The :class:`~app.models.llm_connector.LlmConnector` row stores + credentials as an encrypted JSON string. Accessing ``self.connector.credentials`` + triggers decryption transparently via the ``EncryptedText`` column + type. After that, parsing is the adapter's responsibility — and every + failure mode here must surface as :class:`AuthInvalid` so the gateway + can mark the connector and emit a clean audit event. + """ + raw = self.connector.credentials or "" + try: + blob = json.loads(raw) + except (json.JSONDecodeError, TypeError) as exc: + raise AuthInvalid("Connector credentials are malformed") from exc + if not isinstance(blob, dict): + raise AuthInvalid("Connector credentials shape is invalid") + return blob + + # ------------------------------------------------------------------ + # LlmAdapter — required methods + # ------------------------------------------------------------------ + async def chat(self, request: ChatRequest) -> ChatResponse: + """Echo the most recent user message back as the assistant reply. + + Real adapters should: + - Translate ``request.messages`` to the provider's native message shape. + - Call ``to__tools(request.tools, request.force_tool)`` from + ``app.services.llm.tool_translation`` to translate tools. + - Call ``parse__response(...)`` from that same module to + translate the response back to ``ChatResponse``. + - Map provider HTTP / SDK errors to the typed exception hierarchy. + """ + # We deliberately read credentials before doing any echoing — that way + # this skeleton exercises the same boundary (malformed creds raise + # AuthInvalid) that real adapters depend on. + self._read_credentials() + + last_user = next( + (m for m in reversed(request.messages) if m.role == "user"), + None, + ) + if last_user is None: + text = "" + else: + text = _flatten_message_text(last_user) + + return ChatResponse( + text=text, + tool_calls=[], + stop_reason="end_turn", + usage=TokenUsage(prompt=len(text.split()), completion=len(text.split())), + # Surface the resolved model name (request override → connector hint + # → adapter default) so call logs and recommendation telemetry stay + # accurate. Real adapters should set this to the *provider-reported* + # model id from the response payload, not the requested model. + model=request.model or self.connector.model_hint or "echo-1", + ) + + async def health_check(self) -> None: + """Validate the credential without exercising the (nonexistent) provider. + + Real adapters should issue a cheap, low-token call (e.g. ``max_tokens=1``) + and raise the same typed exceptions as :meth:`chat`. + """ + # No provider to ping — the credential parse step is enough proof that + # the connector is wired correctly. + self._read_credentials() + + +def _flatten_message_text(msg: Message) -> str: + """Collapse a possibly-multi-block message to plain text. + + Real provider adapters typically keep the block structure; this skeleton + flattens because a string return matches the simplest possible echo. + """ + content = msg.content + if isinstance(content, str): + return content + parts: list[str] = [] + for block in content: + if isinstance(block, ContentBlock): + parts.append(block.text) + elif isinstance(block, dict): + parts.append(block.get("text") or "") + return "".join(parts) + + +# The registry call here is what makes the skeleton "live" — importing this +# module registers the adapter under the ``connector_type`` declared above. +# +# Third-party plug-ins follow the same pattern. The registry refuses to bind +# the same ``connector_type`` to two different classes, so plug-in authors +# must pick a unique value (the ``LlmConnector.connector_type`` column is 40 +# chars; keep it short, lowercase, snake-case, e.g. ``mistral_apikey``). +register_adapter(EchoAdapter.connector_type, EchoAdapter) diff --git a/docs/superpowers/plans/2026-05-25-llm-audit-trail-admin-ui.md b/docs/superpowers/plans/2026-05-25-llm-audit-trail-admin-ui.md new file mode 100644 index 00000000..51474357 --- /dev/null +++ b/docs/superpowers/plans/2026-05-25-llm-audit-trail-admin-ui.md @@ -0,0 +1,62 @@ +# LLM Audit-Trail Admin UI (#341) Implementation Plan + +> **For agentic workers:** Steps use checkbox (`- [ ]`) syntax for tracking. TDD throughout. + +**Goal:** Add an admin-only browse/filter/export UI for the existing `llm_audit_event` table on the `/admin/ai` page. + +**Architecture:** New read-only backend endpoints on `admin_llm.py` (`GET /api/admin/llm/audit` paginated JSON + `GET /api/admin/llm/audit.csv` streaming CSV), both joining actor username + target connector display name (never credentials). New Pydantic schemas. New API-client methods + a new "Audit trail" card section on the existing `/admin/ai` page (the page uses cards as sections — no tab component exists). + +**Tech Stack:** FastAPI, SQLAlchemy 2.0, slowapi, Pydantic v2, Next.js/React 19 + vanilla CSS, vitest. + +**Scope fences:** Edit only `server/app/api/admin_llm.py`, `server/app/schemas/llm.py`, `server/tests/*`, `dashboard/app/admin/ai/page.tsx` (+ `__tests__`), `dashboard/lib/api.ts` (add-only), `dashboard/lib/api-types.ts` (add-only). NO migration. READ-ONLY on `llm_audit_event`. + +--- + +## Task 1: Backend schemas + paginated audit endpoint + +**Files:** +- Modify: `server/app/schemas/llm.py` (add `AuditEventRow`, `AdminAuditOut`) +- Modify: `server/app/api/admin_llm.py` (add `GET /audit`) +- Test: `server/tests/test_llm_admin_audit.py` + +- [ ] Step 1: Write failing tests covering: basic list (admin), 403 for non-admin, filter by event_type, filter by actor_user_id, filter by target_connector_id, days window, pagination (limit/offset + total), joined actor_username + target_connector_display_name, no credentials leaked. +- [ ] Step 2: Run → FAIL (404 / no endpoint). +- [ ] Step 3: Add schemas + endpoint. Query `LlmAuditEvent` left-joined to User (actor) and LlmConnector (target). Filters all optional. `days` default 30, range 1..3650. limit 1..200 default 50, offset >=0. Return rows newest-first + `total`. +- [ ] Step 4: Run → PASS. +- [ ] Step 5: Commit. + +## Task 2: CSV export endpoint + +**Files:** +- Modify: `server/app/api/admin_llm.py` (add `GET /audit.csv`) +- Test: `server/tests/test_llm_admin_audit.py` + +- [ ] Step 1: Write failing tests: CSV content-type + header row + a data row; honors event_type filter; 403 non-admin; cap rows. +- [ ] Step 2: Run → FAIL. +- [ ] Step 3: Implement StreamingResponse with `csv` module; same filter helper as Task 1; cap at 10000 rows. Columns: timestamp, actor, event_type, target_connector, notes (notes column reserved/empty — schema has no notes field; emit blank to honor issue's column list). +- [ ] Step 4: Run → PASS. +- [ ] Step 5: Commit. + +## Task 3: Frontend API client + types + +**Files:** +- Modify: `dashboard/lib/api-types.ts` (add `LlmAdminAudit`, `LlmAuditRow`) +- Modify: `dashboard/lib/api.ts` (add `getAdminLlmAudit`, `getAdminLlmAuditCsvUrl`/download helper) +- Regenerate: `dashboard/lib/api-types.generated.ts` via `npm run types:export && npm run types:generate` + +- [ ] Step 1: Regenerate OpenAPI types so new schemas appear. +- [ ] Step 2: Add manual aliases + client methods. +- [ ] Step 3: tsc passes. +- [ ] Step 4: Commit. + +## Task 4: Audit trail card on /admin/ai page + tests + +**Files:** +- Modify: `dashboard/app/admin/ai/page.tsx` +- Test: `dashboard/app/admin/ai/__tests__/page.test.tsx` + +- [ ] Step 1: Write failing test: renders "Audit trail" heading + a seeded row; filter inputs present; export button present. +- [ ] Step 2: Run → FAIL. +- [ ] Step 3: Implement card: filters (event type select, actor, target connector, days), table (timestamp, actor, event type, connector, notes), pagination (prev/next), CSV export button. +- [ ] Step 4: Run → PASS. Full frontend CI. +- [ ] Step 5: Commit. diff --git a/docs/superpowers/plans/2026-05-26-move-dj-ai-settings-to-account.md b/docs/superpowers/plans/2026-05-26-move-dj-ai-settings-to-account.md new file mode 100644 index 00000000..72ae4cd0 --- /dev/null +++ b/docs/superpowers/plans/2026-05-26-move-dj-ai-settings-to-account.md @@ -0,0 +1,96 @@ +# Move DJ AI connector/model settings into the account page Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Relocate the DJ-facing AI connector UI (connect/test/rotate/delete, model hint, Hermes onboarding) from `/settings/ai` into the existing `/account` page, redirect the old route, and update tests — keeping the admin `/admin/ai` UI untouched. + +**Architecture:** Extract the existing `/settings/ai` page body into a reusable client component `components/AiProvidersSection.tsx`. Render it as a third inline card section inside `/account`. Delete the old `/settings/ai` route and add a server-side redirect in `next.config.js` so bookmarks 308 to `/account`. Preserve fail-closed policy behavior verbatim (it moves with the component). + +**Tech Stack:** Next.js 16 (App Router), React 19, TypeScript (strict), vanilla CSS + inline styles, Vitest + Testing Library. + +--- + +### Task 1: Extract AI providers UI into a reusable component + +**Files:** +- Create: `dashboard/components/AiProvidersSection.tsx` +- Reference (source of logic): `dashboard/app/(dj)/settings/ai/page.tsx` + +The component contains ALL connector logic from the current page: policy fetch (`fetchPolicySoft` → `getLlmPolicy`), `allowedTypes` fail-closed memo, connectors list, create form (all provider types incl. bedrock/azure/openai_compatible/openrouter dropdown), test, delete. It must NOT include the page-level `
` wrapper, the "← Dashboard" link header, the `useAuth`/`useRouter` auth-redirect (those stay at the page level — `/account` already does the auth gate). It exports a default React component `AiProvidersSection` rendering a `
` that begins with an `

AI / Model providers

` and the existing intro paragraph, then "Connected providers" and the add-provider form. + +- [ ] **Step 1: Create the component** by moving the body. Keep every form field, label text (e.g. `Provider`, `Display name`, `API key`, `Resource name`, `Bedrock model ID`, `Model (optional)`), the OpenRouter model fetch effect, and the fail-closed `allowedTypes` logic identical so existing test assertions still hold. The top of the rendered output is an intro `

` + `

`; the rest is the two `

`s. Wrap all of it in a single fragment/section with `style={{ marginTop: '2rem' }}` matching the account-page card rhythm (it will live inside its own card in Task 2, so use a plain wrapper, not a `.card`). + +- [ ] **Step 2: Type-check** — `cd dashboard && npx tsc --noEmit`. Expected: PASS. + +- [ ] **Step 3: Commit** + +```bash +git add dashboard/components/AiProvidersSection.tsx +git commit -m "refactor(ai): extract AI providers UI into reusable component" +``` + +--- + +### Task 2: Render the AI section inside /account and delete old route + +**Files:** +- Modify: `dashboard/app/(dj)/account/page.tsx` +- Modify: `dashboard/next.config.js` (add `redirects()`) +- Delete: `dashboard/app/(dj)/settings/ai/page.tsx` +- Delete: `dashboard/app/(dj)/settings/ai/__tests__/page.test.tsx` (logic re-tested via component in Task 3) +- Delete dir if empty: `dashboard/app/(dj)/settings/` + +- [ ] **Step 1: Import and render** `AiProvidersSection` in `/account`. Add a third card `
` (same wrapper style as Change Email card: `{ background: 'var(--card)', borderRadius: '0.75rem', padding: '1.5rem', marginTop: '1.5rem' }`) below Change Email, containing ``. Widen the page `
` maxWidth from `480px` to `720px` so the provider form (which used `720px`) is not cramped. + +- [ ] **Step 2: Add redirect** in `next.config.js`: + +```js +async redirects() { + return [ + { source: '/settings/ai', destination: '/account', permanent: true }, + ]; +}, +``` + +- [ ] **Step 3: Delete** the old route file, its test, and the now-empty `settings/` dir. + +- [ ] **Step 4: Grep** `grep -rn "/settings/ai" dashboard/ --include="*.ts" --include="*.tsx" | grep -v node_modules` → expect no remaining nav/link hits (only possibly api-types doc comments, which are fine). + +- [ ] **Step 5: Type-check + lint** — `cd dashboard && npx tsc --noEmit && npm run lint`. Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add dashboard/app/\(dj\)/account/page.tsx dashboard/next.config.js +git add -u dashboard/app/\(dj\)/settings +git commit -m "feat(ai): move DJ AI settings into account page; redirect old route (#357)" +``` + +--- + +### Task 3: Move/adapt the AI tests to the component + account page + +**Files:** +- Create: `dashboard/components/__tests__/AiProvidersSection.test.tsx` (port the old settings/ai tests, importing the component instead of the page; drop the `next/navigation`/`useAuth` mocks that the page-level no longer needs but keep `next/link` mock if used) +- Modify: `dashboard/app/(dj)/account/__tests__/page.test.tsx` (add the AI api methods to the `@/lib/api` mock so the section can mount inside the account page without throwing, and assert the AI heading renders) + +- [ ] **Step 1: Port connector tests** to `AiProvidersSection.test.tsx` — same assertions (lists connectors, fail-closed hides providers, policy filtering, azure/bedrock/openrouter fields, test, delete). Render `` directly. Keep `vi.mock('next/link', ...)` if the component still uses `Link` (it should NOT — Link header stays on the page; remove the import). Mock `@/lib/api` methods used: `listLlmConnectors`, `getLlmPolicy`, `createLlmConnector`, `testLlmConnector`, `deleteLlmConnector`, `listOpenRouterModels`, `getAdminLlmPolicy` (for the "reads DJ-scoped not admin" test). + +- [ ] **Step 2: Update account page test** — extend the existing `vi.mock('@/lib/api', ...)` to add `listLlmConnectors: () => Promise.resolve([])` and `getLlmPolicy: () => Promise.reject(new Error('x'))` (fail-closed, no extra UI). Add a test: AI heading `AI / Model providers` is in the document. + +- [ ] **Step 3: Run frontend tests** — `cd dashboard && npm test -- --run`. Expected: PASS, coverage thresholds met. + +- [ ] **Step 4: Commit** + +```bash +git add dashboard/components/__tests__/AiProvidersSection.test.tsx dashboard/app/\(dj\)/account/__tests__/page.test.tsx +git commit -m "test(ai): relocate AI provider tests to component + account page (#357)" +``` + +--- + +## Self-Review + +- Spec coverage: relocate UI (Task 1+2) ✓; update nav/links (Task 2 grep — only the page itself referenced it) ✓; redirect old route (Task 2) ✓; admin /admin/ai untouched (not touched by any task) ✓; tests moved (Task 3) ✓; fail-closed preserved (logic moved verbatim, retested) ✓. +- Placeholder scan: none. +- Type consistency: component name `AiProvidersSection` used consistently in Tasks 1–3. diff --git a/docs/superpowers/plans/2026-05-26-remove-deprecated-anthropic-env-reads.md b/docs/superpowers/plans/2026-05-26-remove-deprecated-anthropic-env-reads.md new file mode 100644 index 00000000..a09d7160 --- /dev/null +++ b/docs/superpowers/plans/2026-05-26-remove-deprecated-anthropic-env-reads.md @@ -0,0 +1,106 @@ +# Remove deprecated ANTHROPIC_API_KEY env-var reads Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Remove the now-dead legacy `ANTHROPIC_API_KEY` env-var fallback path in the recommendation engine, since the LLM Gateway connector system has been the source of truth for credentials since the MVP. + +**Architecture:** Every production caller of `call_llm` / `generate_llm_suggestions` passes `db` + `actor`, so the gateway path always runs and the `_legacy_call` direct-Anthropic fallback (and its `anthropic_api_key` / `anthropic_max_tokens` / `anthropic_timeout_seconds` config reads) is dead code. We delete that fallback, the unused config fields, and refresh the legacy unit test to drive the gateway path instead. We deliberately KEEP `config.anthropic_api_key` and `config.anthropic_model` because the admin AI-settings/model-listing endpoints and the recommendation response `llm_model` default still read them — removing those is a cross-cutting frontend+API-contract change out of scope for this backend cleanup. + +**Tech Stack:** Python 3.11+, FastAPI, pydantic-settings, pytest. + +--- + +## Design decisions (scope reconciliation) + +The issue's literal grep target is the **uppercase env-var name** `ANTHROPIC_API_KEY`. In non-test code that string appears only in: +- `server/alembic/versions/046_admin_ai_oauth.py` — historical one-shot data migration. **MUST stay** (allowable exception). +- `server/app/services/recommendation/llm_hooks.py:78` — a docstring mention of the dead fallback. **Removed** here. + +The actual env-var *reads* go through the pydantic-settings attribute `config.anthropic_api_key` (lowercase). Mapping every read: + +| Location | What it does | Decision | +|---|---|---| +| `llm_client._legacy_call` | direct-Anthropic fallback when `db is None` | **REMOVE** — dead; all callers pass `db` | +| `llm_client._resolve_max_tokens` | reads `anthropic_max_tokens` for gateway `ChatRequest.max_tokens` | **KEEP the cap, drop the config dependency** — inline the `1024` default | +| `llm_hooks.is_llm_available` final fallback | `bool(get_settings().anthropic_api_key)` | **REMOVE** — gateway connector check is authoritative | +| `admin._list_anthropic_models` / `/ai/settings` | live admin observability of the legacy key | **KEEP** — powers admin UI + API contract + frontend tests; out of scope | +| `events.py:986` | `result.llm_model or get_settings().anthropic_model` display default | **KEEP** — `anthropic_model` is a model-name default, not a credential fallback | + +Config fields: +- `anthropic_max_tokens`, `anthropic_timeout_seconds` → **REMOVE** (only the deleted `_legacy_call` / `_resolve_max_tokens` used them). +- `anthropic_api_key`, `anthropic_model` → **KEEP** (still read by admin + events display). + +--- + +## File Structure + +- `server/app/services/recommendation/llm_client.py` — delete `_legacy_call`, the `AsyncAnthropic` import, the `db is None` branch; inline max-tokens default. +- `server/app/services/recommendation/llm_hooks.py` — drop the `db is None` env-var fallback and the docstring `ANTHROPIC_API_KEY` mention; tighten `is_llm_available` to require `db`. +- `server/app/core/config.py` — remove `anthropic_max_tokens`, `anthropic_timeout_seconds`. +- `server/tests/test_llm_client.py` — replace the `AsyncAnthropic`-patching legacy tests with gateway-path tests. +- `server/tests/test_llm_hooks.py` — drop the env-var-availability assertions. +- `.env.example` — drop the deprecated `ANTHROPIC_*` lines (keep nothing that's dead). +- `CLAUDE.md` — update the Environment section + LLM Gateway note. + +--- + +### Task 1: Remove the dead `_legacy_call` fallback in `llm_client.py` + +**Files:** +- Modify: `server/app/services/recommendation/llm_client.py` +- Test: `server/tests/test_llm_client.py` + +- [ ] **Step 1: Rewrite `TestCallLLM` to drive the gateway path** + +Replace the two `AsyncAnthropic`-patching tests with tests that pass a fake `db` and patch `Gateway.dispatch`, asserting the parse + trim behavior. + +- [ ] **Step 2: Run to verify they fail** (`call_llm` still has the `db is None` branch / `Gateway` not yet the sole path) + +Run: `.venv/bin/pytest tests/test_llm_client.py -q` + +- [ ] **Step 3: Edit `llm_client.py`** + - Remove `from anthropic import AsyncAnthropic`. + - Remove the `if db is None: result = await _legacy_call(...)` branch — make the gateway path unconditional; raise/parse via gateway always. + - Delete `_legacy_call`. + - Replace `_resolve_max_tokens()` body to return a module constant default (`DEFAULT_MAX_TOKENS = 1024`) instead of `get_settings().anthropic_max_tokens`. + - Remove the now-unused `get_settings` import if nothing else uses it. + +- [ ] **Step 4: Run tests** — `.venv/bin/pytest tests/test_llm_client.py -q` → PASS + +- [ ] **Step 5: Commit** + +### Task 2: Tighten `is_llm_available` in `llm_hooks.py` + +**Files:** +- Modify: `server/app/services/recommendation/llm_hooks.py` +- Test: `server/tests/test_llm_hooks.py` + +- [ ] **Step 1: Update `test_llm_hooks.py`** — remove the two assertions that `is_llm_available()` (no db) keys off `anthropic_api_key`; keep/adjust the db-based connector tests. `is_llm_available()` with no db now returns `False`. +- [ ] **Step 2: Run to verify fail.** +- [ ] **Step 3: Edit `llm_hooks.py`** — drop the final `bool(get_settings().anthropic_api_key)` fallback (both the `db is not None` tail and the no-db return → `False`); remove the `ANTHROPIC_API_KEY` docstring bullet and the `db is None` env-var sentence in `generate_llm_suggestions`; remove the now-unused `get_settings` import. +- [ ] **Step 4: Run tests** → PASS. +- [ ] **Step 5: Commit.** + +### Task 3: Remove dead config fields + +**Files:** +- Modify: `server/app/core/config.py` + +- [ ] **Step 1: Remove `anthropic_max_tokens` and `anthropic_timeout_seconds`** from the `Settings` class. Keep `anthropic_api_key` and `anthropic_model` (still used by admin + events). +- [ ] **Step 2: Grep** `grep -rn "anthropic_max_tokens\|anthropic_timeout" server/app` → zero hits. +- [ ] **Step 3: Commit.** + +### Task 4: Docs + env example + +**Files:** +- Modify: `.env.example`, `CLAUDE.md` + +- [ ] **Step 1: `.env.example`** — remove the deprecated `ANTHROPIC_API_KEY` / `ANTHROPIC_MODEL` / `ANTHROPIC_MAX_TOKENS` / `ANTHROPIC_TIMEOUT_SECONDS` lines and rewrite the surrounding comment to state credentials are connector-only. +- [ ] **Step 2: `CLAUDE.md`** — update the Anthropic env-var line in the Environment section and the LLM Gateway note (legacy fallback removed). +- [ ] **Step 3: Commit.** + +### Task 5: Full backend CI + acceptance grep + +- [ ] `cd server && .venv/bin/ruff check . && .venv/bin/ruff format --check . && .venv/bin/bandit -r app -c pyproject.toml -q && .venv/bin/pytest --tb=short -q` +- [ ] `grep -rn "ANTHROPIC_API_KEY" server/ | grep -v /tests/` → only the alembic migration hits remain. +- [ ] `.venv/bin/alembic upgrade head && .venv/bin/alembic check` (config field removal must not drift). diff --git a/docs/superpowers/plans/2026-05-26-sse-stream-pooled-db-connection-leak.md b/docs/superpowers/plans/2026-05-26-sse-stream-pooled-db-connection-leak.md new file mode 100644 index 00000000..aa35419b --- /dev/null +++ b/docs/superpowers/plans/2026-05-26-sse-stream-pooled-db-connection-leak.md @@ -0,0 +1,333 @@ +# Fix SSE Stream Pooled DB Connection Leak Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Stop the public SSE `event_stream` endpoint from pinning a pooled DB connection for the entire (potentially unbounded) lifetime of an EventSource connection, which exhausts the pool (size 5 + overflow 10 = 15) under modest guest load. + +**Architecture:** Remove the `db: Session = Depends(get_db)` request-scoped dependency from `event_stream`. Run the one-shot existence/auth check inside a short-lived `with SessionLocal() as s:` block that is fully closed (connection returned to the pool) BEFORE the `EventSourceResponse` is returned. The async generator currently performs no per-tick DB access, so it opens no session; if future per-tick DB access is needed it must open its own short-lived `SessionLocal()` session. Existence/auth error responses (404 unknown, 410 archived/expired) are preserved exactly. + +**Tech Stack:** FastAPI, SQLAlchemy 2.0 (QueuePool), sse-starlette, pytest. + +--- + +### Task 1: Regression test proving idle SSE streams hold ~0 pooled DB connections + +**Files:** +- Test: `server/tests/test_sse_pool.py` (create) + +The existing `client`/`db` fixtures override `get_db` with a single shared `StaticPool` SQLite session, so they cannot measure the production `QueuePool`. This test exercises the real `event_stream` endpoint function directly against a real `SessionLocal`-backed engine, asserting the function returns (existence check done) with the pool fully checked back in, and that the returned generator can be opened/closed without checking out a connection. + +- [ ] **Step 1: Write the failing test** + +```python +"""Regression test for issue #356 — SSE event_stream must NOT pin a pooled +DB connection for the lifetime of the stream. + +Before the fix, event_stream declared `db: Session = Depends(get_db)`, so +FastAPI held the session (and its checked-out QueuePool connection) open +until the request finished — which for an EventSource never happens while +the browser holds it open. ~15 concurrent guest viewers exhausted the pool. + +These tests bypass the conftest StaticPool override and drive a real +QueuePool engine so engine.pool.checkedout() is meaningful. +""" + +import asyncio +from datetime import timedelta + +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from starlette.requests import Request as StarletteRequest + +from app.core.time import utcnow +from app.models.base import Base +from app.models.user import User +from app.models.event import Event +from app.services.auth import get_password_hash + + +@pytest.fixture() +def pooled_engine(monkeypatch): + """A real file-backed SQLite engine using QueuePool (default), so + engine.pool.checkedout() reflects actual checked-out connections. + + Patches app.db.session.SessionLocal AND the name already imported into + app.api.sse so the endpoint resolves our pooled session factory. + """ + import app.db.session as db_session + import app.api.sse as sse_module + + engine = create_engine( + "sqlite:///file:sse_pool_test?mode=memory&cache=shared&uri=true", + pool_size=5, + max_overflow=10, + ) + Base.metadata.create_all(bind=engine) + TestSession = sessionmaker(autocommit=False, autoflush=False, bind=engine) + + monkeypatch.setattr(db_session, "SessionLocal", TestSession) + monkeypatch.setattr(sse_module, "SessionLocal", TestSession, raising=False) + + # Seed an active event using a short-lived session. + with TestSession() as s: + user = User( + username="pooluser", + password_hash=get_password_hash("poolpassword123"), + role="dj", + ) + s.add(user) + s.commit() + s.refresh(user) + evt = Event( + code="POOL01", + join_code="POOLJN", + name="Pool Event", + created_by_user_id=user.id, + expires_at=utcnow() + timedelta(hours=6), + ) + s.add(evt) + s.commit() + + try: + yield engine, TestSession + finally: + Base.metadata.drop_all(bind=engine) + engine.dispose() + + +def _make_request(code: str) -> StarletteRequest: + """Minimal ASGI scope for a GET that reports as connected.""" + scope = { + "type": "http", + "method": "GET", + "path": f"/api/public/events/{code}/stream", + "headers": [], + "query_string": b"", + } + + async def receive(): # pragma: no cover - never drained in these tests + return {"type": "http.disconnect"} + + return StarletteRequest(scope, receive) + + +def test_event_stream_returns_with_pool_checked_in(pooled_engine): + """After event_stream() returns, the existence-check connection must be + back in the pool (checkedout() == 0).""" + from app.api.sse import event_stream + + engine, _ = pooled_engine + assert engine.pool.checkedout() == 0 + + req = _make_request("POOLJN") + response = asyncio.run(event_stream(code="POOLJN", request=req)) + + # EventSourceResponse created, generator not yet iterated. + assert engine.pool.checkedout() == 0 + + +def test_n_concurrent_idle_streams_hold_zero_pool_connections(pooled_engine): + """N concurrent open (idle) SSE streams must hold ~0 pooled connections. + + Open N generators (past pool_size + max_overflow = 15), prime each one + tick so the generator body is actively suspended on queue.get(), then + assert the pool has 0 checked-out connections. Before the fix this would + be N (one pinned per stream) and would TimeoutError past 15. + """ + from app.api.sse import event_stream + + engine, _ = pooled_engine + n = 25 # well past pool capacity (15) + + async def drive(): + generators = [] + for _ in range(n): + req = _make_request("POOLJN") + resp = await event_stream(code="POOLJN", request=req) + gen = resp.body_iterator + generators.append(gen) + + # Prime each generator one step so it subscribes and suspends on + # queue.get(); give the event loop a tick to settle. + primer_tasks = [asyncio.ensure_future(g.__anext__()) for g in generators] + await asyncio.sleep(0.05) + + checked_out = engine.pool.checkedout() + + # Cancel the primers and close generators to release subscriptions. + for t in primer_tasks: + t.cancel() + for g in generators: + await g.aclose() + + return checked_out + + checked_out = asyncio.run(drive()) + assert checked_out == 0, ( + f"Expected 0 pooled connections held by {n} idle SSE streams, " + f"got {checked_out} — the stream is pinning DB connections." + ) + + +def test_event_stream_preserves_404_for_unknown_event(pooled_engine): + """Existence check must still reject unknown codes with 404.""" + from fastapi import HTTPException + + from app.api.sse import event_stream + + req = _make_request("NOEXIS") + with pytest.raises(HTTPException) as exc: + asyncio.run(event_stream(code="NOEXIS", request=req)) + assert exc.value.status_code == 404 +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run (from `server/`): `.venv/bin/pytest tests/test_sse_pool.py -v` +Expected: `test_event_stream_returns_with_pool_checked_in` raises `TypeError` because `event_stream` still requires the `db` parameter (FastAPI `Depends` default is not auto-injected when calling the function directly), and/or the pool assertions fail. RED. + +- [ ] **Step 3: Implement the fix in `server/app/api/sse.py`** + +Remove the `db: Session = Depends(get_db)` parameter. Import `SessionLocal`. Run the existence check in a short-lived session closed before returning. + +```python +"""SSE streaming endpoint for real-time event updates (no authentication required).""" + +import asyncio +import json +import logging +from typing import Any + +from fastapi import APIRouter, HTTPException, Request +from sse_starlette.sse import EventSourceResponse + +from app.core.rate_limit import limiter +from app.db.session import SessionLocal +from app.services.event import EventLookupResult, get_event_by_join_code_with_status +from app.services.event_bus import get_event_bus + +logger = logging.getLogger(__name__) +router = APIRouter() + +DISCONNECT_CHECK_INTERVAL = 15 # seconds + + +async def _event_generator( + request: Request, + event_code: str, +) -> Any: + """Yield SSE events for a given event code until the client disconnects. + + Keepalive pings are handled by sse-starlette's built-in ping task (every 15s). + This generator only yields actual events. The timeout on queue.get() lets us + periodically check for client disconnect without blocking forever. + + NOTE (issue #356): this generator deliberately holds NO DB session. If a + future change needs per-tick DB access it MUST open its own short-lived + `with SessionLocal() as s:` session per tick and close it before awaiting — + never hold a pooled connection across the stream lifetime. + """ + bus = get_event_bus() + queue = bus.subscribe(event_code) + try: + while True: + if await request.is_disconnected(): + break + try: + message = await asyncio.wait_for(queue.get(), timeout=DISCONNECT_CHECK_INTERVAL) + yield { + "event": message["event"], + "data": json.dumps(message["data"]), + } + except TimeoutError: + # No event received — loop to check is_disconnected() + continue + finally: + bus.unsubscribe(event_code, queue) + + +@router.get("/events/{code}/stream") +@limiter.limit("10/minute") +async def event_stream( + code: str, + request: Request, +) -> EventSourceResponse: + """Public SSE endpoint for real-time event updates. + + SECURITY (CRIT-5): rate-limited and existence-checked. Before this fix, + the endpoint had no rate limit and no existence check, allowing + unauthenticated DoS (unlimited long-lived connections exhausting FDs) + and passive eavesdropping via 6-char event-code brute force. + + POOL SAFETY (issue #356): the existence/auth check runs in a short-lived + session that is closed (its pooled connection returned) BEFORE the + EventSourceResponse is returned. An EventSource connection can stay open + indefinitely, so we must NOT hold a request-scoped get_db session across + the stream lifetime — doing so pinned one pooled connection per open + stream and exhausted the QueuePool (size 5 + overflow 10) under guest load. + + Event types: + - request_created: New request submitted + - request_status_changed: Request status update + - now_playing_changed: Now-playing track update + - requests_bulk_update: Batch accept/reject + - bridge_status_changed: Bridge connect/disconnect + """ + with SessionLocal() as db: + event, result = get_event_by_join_code_with_status(db, code) + if result == EventLookupResult.NOT_FOUND: + raise HTTPException(status_code=404, detail="Event not found") + if result == EventLookupResult.ARCHIVED: + raise HTTPException(status_code=410, detail="Event has been archived") + if result == EventLookupResult.EXPIRED: + raise HTTPException(status_code=410, detail="Event has expired") + event_code = event.code + + return EventSourceResponse( + _event_generator(request, event_code), + media_type="text/event-stream", + headers={"X-Accel-Buffering": "no"}, + ) +``` + +- [ ] **Step 4: Run the new tests to verify they pass** + +Run (from `server/`): `.venv/bin/pytest tests/test_sse_pool.py -v` +Expected: all 3 tests PASS. + +- [ ] **Step 5: Run existing SSE security tests to confirm no regression** + +Run (from `server/`): `.venv/bin/pytest tests/test_sse_security.py -v` +Expected: all PASS (404/410 existence checks + rate limit preserved). + +- [ ] **Step 6: Full backend CI gate** + +Run (from `server/`): +```bash +.venv/bin/ruff check . +.venv/bin/ruff format --check . +.venv/bin/bandit -r app -c pyproject.toml -q +.venv/bin/pytest --tb=short -q +``` +Expected: all green, coverage >= 80%. + +- [ ] **Step 7: Commit** + +```bash +git add server/app/api/sse.py server/tests/test_sse_pool.py docs/superpowers/plans/2026-05-26-sse-stream-pooled-db-connection-leak.md +git commit -m "fix(sse): don't pin a pooled DB connection for the SSE stream lifetime (#356)" +``` + +--- + +## Self-Review + +**Spec coverage:** +- "Open SSE streams no longer hold a pooled DB connection while idle" → fix removes `Depends(get_db)`, uses `with SessionLocal()` closed before returning; `test_n_concurrent_idle_streams_hold_zero_pool_connections` proves it. +- "A test confirms N concurrent open streams consume ~0 idle pool connections" → `test_n_concurrent_idle_streams_hold_zero_pool_connections` (N=25 > pool capacity 15). +- "Existence/auth checks preserved" → `test_event_stream_preserves_404_for_unknown_event` + existing `test_sse_security.py`. + +**Placeholder scan:** none. + +**Type consistency:** `event_stream(code, request)`, `_event_generator(request, event_code)`, `SessionLocal()` — consistent across plan and fix. diff --git a/docs/superpowers/plans/2026-05-28-llm-cost-quota-caps.md b/docs/superpowers/plans/2026-05-28-llm-cost-quota-caps.md new file mode 100644 index 00000000..318aaa01 --- /dev/null +++ b/docs/superpowers/plans/2026-05-28-llm-cost-quota-caps.md @@ -0,0 +1,1208 @@ +# LLM Cost / Quota Caps per DJ Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Let admins set a monthly token cap per DJ LLM connector; the gateway refuses calls that would push the current calendar month over the cap with a clear DJ-facing message. + +**Architecture:** Add a nullable `monthly_token_cap` integer column to `LlmConnector` (None = unlimited). A direct aggregation query sums `tokens_in + tokens_out` from `llm_call_log` for the current calendar month per connector. The gateway runs a pre-flight check in `dispatch()`: if current month usage already meets/exceeds the cap, raise a new `QuotaCapReached` exception. Admins set caps via a new PATCH endpoint in `admin_llm.py`; the admin UI adds a cap input + usage-vs-cap progress bar per connector row. + +**Tech Stack:** FastAPI, SQLAlchemy 2.0, Alembic, Pydantic v2, Next.js 16 / React 19 (vanilla CSS), pytest, vitest. + +**Why direct aggregation (not a rollup table):** At current volume (`llm_call_log` has a 30-365 day retention window, per-DJ DJ-initiated recommendation calls — low hundreds/month at most), a single indexed `SUM(...) WHERE created_at >= month_start GROUP BY connector_id` is correct and cheap. `llm_call_log.created_at` is already indexed. A materialized view or hourly cron rollup adds operational complexity (refresh scheduling, staleness windows, an extra table + migration) with no measurable benefit until call volume is orders of magnitude higher. Documented here and in the PR; revisit if usage telemetry shows the aggregation query becoming hot. + +--- + +## File Structure + +**Backend:** +- `server/app/models/llm_connector.py` — add `monthly_token_cap: Mapped[int | None]` column on `LlmConnector`. +- `server/alembic/versions/050_llm_connector_monthly_token_cap.py` — new migration (down_revision `049`). +- `server/app/services/llm/exceptions.py` — add `QuotaCapReached(LlmError)`. +- `server/app/services/llm/connector_storage.py` — add `current_month_token_usage(db, connector_id)` aggregation helper + `set_monthly_cap(connector, cap)` setter with validation. +- `server/app/services/llm/gateway.py` — add a pre-flight cap check in `dispatch()` before the primary attempt (and before any fallback attempt against a connector with a cap). +- `server/app/schemas/llm.py` — add `monthly_token_cap` to `ConnectorOut`; add `AdminConnectorCapPatch` request schema; add `current_month_tokens` to `AdminConnectorOut`. +- `server/app/api/admin_llm.py` — add `PATCH /connectors/{id}/cap` endpoint; populate `current_month_tokens` in the connectors listing. +- `server/app/api/events.py` — ensure `QuotaCapReached` from the LLM recommendation endpoint surfaces the DJ-facing 429 message instead of the generic 502. + +**Frontend:** +- `dashboard/lib/api-types.generated.ts` — regenerated from backend OpenAPI (do not hand-edit). +- `dashboard/lib/api.ts` — add `setAdminLlmConnectorCap(id, cap)` method. +- `dashboard/app/admin/ai/page.tsx` — add cap input + usage-vs-cap progress bar to each per-DJ connector row. + +**Tests:** +- `server/tests/test_llm_quota_cap.py` — new: aggregation helper, gateway pre-flight enforcement, cap setter validation. +- `server/tests/test_llm_api.py` — extend: admin cap PATCH endpoint (auth, validation, set/clear). +- `dashboard/app/admin/ai/__tests__/` or inline — cap UI rendering + progress bar (if an existing test harness for the page exists; otherwise add focused component-free logic test). + +--- + +## Task 1: Add `QuotaCapReached` exception + +**Files:** +- Modify: `server/app/services/llm/exceptions.py` +- Test: `server/tests/test_llm_quota_cap.py` + +- [ ] **Step 1: Write the failing test** + +Create `server/tests/test_llm_quota_cap.py`: + +```python +"""Tests for per-DJ monthly token caps (issue #339).""" + +from __future__ import annotations + +from app.services.llm.exceptions import LlmError, QuotaCapReached + + +def test_quota_cap_reached_is_llm_error(): + exc = QuotaCapReached("cap reached") + assert isinstance(exc, LlmError) + assert str(exc) == "cap reached" +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v` +Expected: FAIL — `ImportError: cannot import name 'QuotaCapReached'` + +- [ ] **Step 3: Add the exception** + +In `server/app/services/llm/exceptions.py`, after the `QuotaExceeded` class: + +```python +class QuotaCapReached(LlmError): + """The DJ's admin-set monthly token cap for this connector is reached. + + Distinct from :class:`QuotaExceeded` (a provider-side billing/quota error): + this is a WrzDJ-internal pre-flight refusal raised *before* any provider + call, so no tokens are spent. The DJ-facing message is fixed and contains + no internal details — see the gateway pre-flight check. + """ +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add server/app/services/llm/exceptions.py server/tests/test_llm_quota_cap.py +git commit -m "feat(llm): add QuotaCapReached exception for monthly token caps" +``` + +--- + +## Task 2: Add `monthly_token_cap` column + migration + +**Files:** +- Modify: `server/app/models/llm_connector.py` +- Create: `server/alembic/versions/050_llm_connector_monthly_token_cap.py` +- Test: `server/tests/test_llm_quota_cap.py` + +- [ ] **Step 1: Write the failing test** + +Append to `server/tests/test_llm_quota_cap.py`: + +```python +import json + +from app.models.llm_connector import LlmConnector +from app.models.user import User +from app.services.auth import get_password_hash + + +def _make_dj(db, username="capdj"): + user = User(username=username, password_hash=get_password_hash("password123"), role="dj") + db.add(user) + db.commit() + db.refresh(user) + return user + + +def _make_connector(db, user, *, monthly_token_cap=None): + row = LlmConnector( + user_id=user.id, + connector_type="openai_apikey", + display_name="Cap connector", + status="active", + credentials=json.dumps({"api_key": "sk-fake-key"}), + model_hint="gpt-5-mini", + monthly_token_cap=monthly_token_cap, + ) + db.add(row) + db.commit() + db.refresh(row) + return row + + +def test_connector_defaults_to_no_cap(db): + user = _make_dj(db) + connector = _make_connector(db, user) + assert connector.monthly_token_cap is None + + +def test_connector_stores_cap(db): + user = _make_dj(db, username="capdj2") + connector = _make_connector(db, user, monthly_token_cap=100_000) + db.refresh(connector) + assert connector.monthly_token_cap == 100_000 +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v` +Expected: FAIL — `TypeError: 'monthly_token_cap' is an invalid keyword argument for LlmConnector` + +- [ ] **Step 3: Add the model column** + +In `server/app/models/llm_connector.py`, inside `LlmConnector`, after the `last_health_check_status` column (before `__table_args__`): + +```python + # Admin-set monthly token cap (issue #339). NULL = unlimited. When set, the + # gateway refuses dispatch once the current calendar month's summed + # tokens_in + tokens_out for this connector meets or exceeds the cap. The + # cap is admin-only (set via /api/admin/llm/connectors/{id}/cap) and is + # checked PRE-FLIGHT only — editing it never disrupts an in-flight call. + monthly_token_cap: Mapped[int | None] = mapped_column(Integer, nullable=True) +``` + +- [ ] **Step 4: Create the migration** + +Create `server/alembic/versions/050_llm_connector_monthly_token_cap.py`: + +```python +"""Add monthly_token_cap to llm_connectors (issue #339). + +Revision ID: 050 +Revises: 049 +Create Date: 2026-05-28 + +Adds an admin-set per-DJ monthly token cap to ``llm_connectors``: + +- ``monthly_token_cap`` (Integer, nullable) — NULL means unlimited. When set, + the LLM gateway refuses dispatch once the current calendar month's summed + ``tokens_in + tokens_out`` for the connector meets or exceeds this value. + +Nullable with no server default so existing connectors stay unlimited. +""" + +import sqlalchemy as sa + +from alembic import op + +revision: str = "050" +down_revision: str | None = "049" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + "llm_connectors", + sa.Column("monthly_token_cap", sa.Integer(), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("llm_connectors", "monthly_token_cap") +``` + +- [ ] **Step 5: Run test to verify it passes** + +Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v` +Expected: PASS (SQLite test DB recreates schema from models) + +- [ ] **Step 6: Verify alembic on isolated Postgres DB** + +Run (isolated DB avoids the shared-DB drift from sibling worktrees): +```bash +DATABASE_URL="postgresql+psycopg://wrzdj:wrzdj@localhost:5432/wrzdj_issue339" .venv/bin/alembic upgrade head +DATABASE_URL="postgresql+psycopg://wrzdj:wrzdj@localhost:5432/wrzdj_issue339" .venv/bin/alembic check +``` +Expected: `No new upgrade operations detected.` + +If the isolated DB was already at head from a prior run, recreate it first: +```bash +docker exec wrzdj-db-1 psql -U wrzdj -d postgres -c "DROP DATABASE IF EXISTS wrzdj_issue339;" -c "CREATE DATABASE wrzdj_issue339;" +``` + +- [ ] **Step 7: Commit** + +```bash +git add server/app/models/llm_connector.py server/alembic/versions/050_llm_connector_monthly_token_cap.py server/tests/test_llm_quota_cap.py +git commit -m "feat(llm): add monthly_token_cap column + migration 050" +``` + +--- + +## Task 3: Add current-month usage aggregation + cap setter helpers + +**Files:** +- Modify: `server/app/services/llm/connector_storage.py` +- Test: `server/tests/test_llm_quota_cap.py` + +- [ ] **Step 1: Write the failing test** + +Append to `server/tests/test_llm_quota_cap.py`: + +```python +from datetime import timedelta + +import pytest + +from app.core.time import utcnow +from app.models.llm_connector import LlmCallLog +from app.services.llm.connector_storage import ( + current_month_token_usage, + set_monthly_cap, +) + + +def _log(db, connector_id, *, tokens_in, tokens_out, when=None): + row = LlmCallLog( + connector_id=connector_id, + purpose="test", + status="ok", + latency_ms=10, + tokens_in=tokens_in, + tokens_out=tokens_out, + ) + db.add(row) + db.flush() + if when is not None: + row.created_at = when + db.commit() + return row + + +def test_current_month_usage_sums_in_and_out(db): + user = _make_dj(db, username="usagedj") + connector = _make_connector(db, user) + _log(db, connector.id, tokens_in=100, tokens_out=50) + _log(db, connector.id, tokens_in=10, tokens_out=5) + assert current_month_token_usage(db, connector.id) == 165 + + +def test_current_month_usage_excludes_prior_months(db): + user = _make_dj(db, username="usagedj2") + connector = _make_connector(db, user) + # 40 days ago — previous month, must be excluded. + _log(db, connector.id, tokens_in=1000, tokens_out=1000, when=utcnow() - timedelta(days=40)) + _log(db, connector.id, tokens_in=7, tokens_out=3) + assert current_month_token_usage(db, connector.id) == 10 + + +def test_current_month_usage_treats_null_tokens_as_zero(db): + user = _make_dj(db, username="usagedj3") + connector = _make_connector(db, user) + _log(db, connector.id, tokens_in=None, tokens_out=None) + _log(db, connector.id, tokens_in=5, tokens_out=None) + assert current_month_token_usage(db, connector.id) == 5 + + +def test_set_monthly_cap_accepts_positive_int(db): + user = _make_dj(db, username="capset") + connector = _make_connector(db, user) + set_monthly_cap(connector, 50_000) + assert connector.monthly_token_cap == 50_000 + + +def test_set_monthly_cap_accepts_none_to_clear(db): + user = _make_dj(db, username="capclear") + connector = _make_connector(db, user, monthly_token_cap=10) + set_monthly_cap(connector, None) + assert connector.monthly_token_cap is None + + +def test_set_monthly_cap_rejects_negative(db): + user = _make_dj(db, username="capneg") + connector = _make_connector(db, user) + with pytest.raises(ValueError): + set_monthly_cap(connector, -1) +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v` +Expected: FAIL — `ImportError: cannot import name 'current_month_token_usage'` + +- [ ] **Step 3: Implement the helpers** + +In `server/app/services/llm/connector_storage.py`, add a module-level helper for the month boundary and the two functions. Add near the other aggregation helpers (after `get_usage_stats`): + +```python +def _calendar_month_start() -> "datetime": + """First instant (UTC, naive) of the current calendar month.""" + from app.core.time import utcnow + + now = utcnow() + return now.replace(day=1, hour=0, minute=0, second=0, microsecond=0) + + +def current_month_token_usage(db: Session, connector_id: int) -> int: + """Sum tokens_in + tokens_out for ``connector_id`` in the current month. + + Direct aggregation against the indexed ``llm_call_log.created_at`` column. + NULL token counts are coalesced to 0. Returns 0 when there are no rows. + Used by the gateway pre-flight cap check + the admin usage-vs-cap display. + """ + month_start = _calendar_month_start() + total = db.execute( + select( + func.coalesce(func.sum(LlmCallLog.tokens_in), 0) + + func.coalesce(func.sum(LlmCallLog.tokens_out), 0) + ).where( + LlmCallLog.connector_id == connector_id, + LlmCallLog.created_at >= month_start, + ) + ).scalar_one() + return int(total or 0) + + +def set_monthly_cap(connector: LlmConnector, cap: int | None) -> LlmConnector: + """Set (or clear) the connector's monthly token cap. Caller commits. + + ``cap=None`` clears the cap (unlimited). A non-None cap must be a + non-negative integer; negative values are rejected with ``ValueError`` + (→ HTTP 400 at the API boundary). + """ + if cap is not None and cap < 0: + raise ValueError("monthly_token_cap must be a non-negative integer or null") + connector.monthly_token_cap = cap + return connector +``` + +Add `datetime` to the typing import context — the `_calendar_month_start` return annotation uses a string forward-ref `"datetime"`, but for clarity add `from datetime import datetime` at the top of the module if not already imported. Check the existing imports first; if `datetime` is not imported, add it. Then change the annotation to `-> datetime:` (drop the quotes). + +Add both new names to the `__all__` list: + +```python + "current_month_token_usage", + "set_monthly_cap", +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add server/app/services/llm/connector_storage.py server/tests/test_llm_quota_cap.py +git commit -m "feat(llm): add current-month usage aggregation + cap setter" +``` + +--- + +## Task 4: Gateway pre-flight cap enforcement + +**Files:** +- Modify: `server/app/services/llm/gateway.py` +- Test: `server/tests/test_llm_quota_cap.py` + +- [ ] **Step 1: Write the failing test** + +Append to `server/tests/test_llm_quota_cap.py`: + +```python +from unittest.mock import AsyncMock, patch + +from app.services.llm.adapters.openai_apikey import OpenAIApiKeyAdapter +from app.services.llm.base import ChatRequest, ChatResponse, Message, TokenUsage +from app.services.llm.exceptions import QuotaCapReached +from app.services.llm.gateway import Gateway + + +def _req() -> ChatRequest: + return ChatRequest(messages=[Message(role="user", content="hi")]) + + +@pytest.mark.asyncio +async def test_dispatch_allows_when_under_cap(db): + user = _make_dj(db, username="undercap") + connector = _make_connector(db, user, monthly_token_cap=1_000) + _log(db, connector.id, tokens_in=100, tokens_out=100) # 200 used, under 1000 + + fake = ChatResponse(text="ok", tool_calls=[], stop_reason="end_turn", + usage=TokenUsage(prompt=5, completion=2)) + with patch.object(OpenAIApiKeyAdapter, "chat", new=AsyncMock(return_value=fake)): + resp = await Gateway.dispatch(db, user, _req(), purpose="test") + assert resp.text == "ok" + + +@pytest.mark.asyncio +async def test_dispatch_refuses_when_cap_reached(db): + user = _make_dj(db, username="atcap") + connector = _make_connector(db, user, monthly_token_cap=200) + _log(db, connector.id, tokens_in=150, tokens_out=50) # 200 used, == cap + + # The adapter must NOT be called — refusal is pre-flight. + chat_mock = AsyncMock() + with patch.object(OpenAIApiKeyAdapter, "chat", new=chat_mock): + with pytest.raises(QuotaCapReached): + await Gateway.dispatch(db, user, _req(), purpose="test") + chat_mock.assert_not_called() + + +@pytest.mark.asyncio +async def test_dispatch_unlimited_when_cap_none(db): + user = _make_dj(db, username="nolimit") + connector = _make_connector(db, user, monthly_token_cap=None) + _log(db, connector.id, tokens_in=10_000, tokens_out=10_000) + + fake = ChatResponse(text="ok", tool_calls=[], stop_reason="end_turn", + usage=TokenUsage(prompt=1, completion=1)) + with patch.object(OpenAIApiKeyAdapter, "chat", new=AsyncMock(return_value=fake)): + resp = await Gateway.dispatch(db, user, _req(), purpose="test") + assert resp.text == "ok" +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -k cap -v` +Expected: FAIL — `test_dispatch_refuses_when_cap_reached` fails because the adapter is called and no `QuotaCapReached` is raised. + +- [ ] **Step 3: Implement the pre-flight check** + +In `server/app/services/llm/gateway.py`: + +Add the import for the helper + exception. Update the `from app.services.llm.connector_storage import ...` line: + +```python +from app.services.llm.connector_storage import ( + audit_event, + current_month_token_usage, + log_call, +) +``` + +Add `QuotaCapReached` to the exceptions import block: + +```python +from app.services.llm.exceptions import ( + AuthInvalid, + LlmError, + NoLlmConfigured, + ProviderUnavailable, + QuotaCapReached, + QuotaExceeded, + RateLimited, + ToolTranslationError, +) +``` + +Add a module-level helper after `_fallback_trigger`: + +```python +def _enforce_monthly_cap(db: Session, connector: LlmConnector) -> None: + """Pre-flight: refuse dispatch when the connector's monthly cap is reached. + + No-op when the connector has no cap (``monthly_token_cap is None``). + Compares the current calendar month's summed token usage against the cap; + refuses when usage already meets or exceeds it. Raised BEFORE any provider + call, so no tokens are spent and editing the cap never disrupts an + already-dispatched (in-flight) call. + + The error message is fixed and leaks no internals (usage totals, cap value, + connector id) — see issue #339 security note. + """ + cap = connector.monthly_token_cap + if cap is None: + return + used = current_month_token_usage(db, connector.id) + if used >= cap: + raise QuotaCapReached( + "Your monthly token cap is reached. Contact your admin to raise it." + ) +``` + +In `Gateway.dispatch`, add the pre-flight check immediately after `primary = _resolve_connector(...)` / `actor_id = ...` and before "Attempt 1": + +```python + primary = _resolve_connector(db, actor) + actor_id = actor.id if actor else _system_actor_id(db, primary) + + # Pre-flight: refuse if the resolved connector's monthly cap is reached + # (issue #339). Raised before any provider call — no tokens spent. + _enforce_monthly_cap(db, primary) +``` + +Also enforce the cap on the fallback connector before the fallback attempt. In the fallback branch, after `fallback = _resolve_org_default(db)` and the `if fallback is None or fallback.id == primary.id: raise` guard, before the `audit_event(...)` write, add: + +```python + # The fallback connector may itself be capped — refuse rather than + # silently spending another DJ's budget. + _enforce_monthly_cap(db, fallback) +``` + +`QuotaCapReached` is a subclass of `LlmError` but is NOT in `_FALLBACK_TRIGGERS`, so `_fallback_trigger()` returns `None` for it and the primary-connector cap refusal short-circuits to `raise` (no fallback) — which is correct: a cap is not a transient/credential error. + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `.venv/bin/pytest tests/test_llm_quota_cap.py -v` +Expected: PASS (all cap tests) + +Run the full gateway suite to confirm no regression: +Run: `.venv/bin/pytest tests/test_llm_gateway.py -v` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add server/app/services/llm/gateway.py server/tests/test_llm_quota_cap.py +git commit -m "feat(llm): enforce monthly token cap pre-flight in gateway dispatch" +``` + +--- + +## Task 5: Expose cap in schemas + admin connectors listing + +**Files:** +- Modify: `server/app/schemas/llm.py` +- Modify: `server/app/api/admin_llm.py` +- Test: `server/tests/test_llm_api.py` + +- [ ] **Step 1: Write the failing test** + +Add to `server/tests/test_llm_api.py` (find the admin connectors-listing test area; add a new test). First inspect the file for an existing admin connector + admin_headers fixture pattern, then add: + +```python +def test_admin_connectors_listing_includes_cap_and_usage(client, db, admin_headers, dj_user): + # Create a connector for a DJ with a cap, and log some usage this month. + import json as _json + + from app.models.llm_connector import LlmCallLog, LlmConnector + + connector = LlmConnector( + user_id=dj_user.id, + connector_type="openai_apikey", + display_name="Capped", + status="active", + credentials=_json.dumps({"api_key": "sk-fake-key"}), + monthly_token_cap=1000, + ) + db.add(connector) + db.commit() + db.refresh(connector) + db.add(LlmCallLog(connector_id=connector.id, purpose="test", status="ok", + latency_ms=5, tokens_in=120, tokens_out=80)) + db.commit() + + resp = client.get("/api/admin/llm/connectors", headers=admin_headers) + assert resp.status_code == 200 + row = next(r for r in resp.json() if r["id"] == connector.id) + assert row["monthly_token_cap"] == 1000 + assert row["current_month_tokens"] == 200 +``` + +If `test_llm_api.py` has no `dj_user` fixture, create the DJ inline (mirror the local connector-creation helpers already used in that file). + +- [ ] **Step 2: Run test to verify it fails** + +Run: `.venv/bin/pytest tests/test_llm_api.py -k cap_and_usage -v` +Expected: FAIL — `KeyError: 'monthly_token_cap'` or `'current_month_tokens'` + +- [ ] **Step 3: Update schemas** + +In `server/app/schemas/llm.py`: + +Add to `ConnectorOut` (after `last_health_check_status`): + +```python + # Admin-set monthly token cap (issue #339). None = unlimited. + monthly_token_cap: int | None = None +``` + +Add to `AdminConnectorOut` (after `dj_username`): + +```python + # Current calendar-month token usage (tokens_in + tokens_out), so the admin + # UI can render a usage-vs-cap progress bar without a second round-trip. + current_month_tokens: int = 0 +``` + +Add a new request schema near `AdminPolicyPatch`: + +```python +class AdminConnectorCapPatch(BaseModel): + """Admin set/clear a connector's monthly token cap (issue #339). + + ``monthly_token_cap = null`` clears the cap (unlimited). A non-null value + must be a non-negative integer; ``0`` means "no further calls this month". + """ + + monthly_token_cap: int | None = Field(default=None, ge=0, le=1_000_000_000) +``` + +- [ ] **Step 4: Populate `current_month_tokens` in the listing** + +In `server/app/api/admin_llm.py`: + +Import the helper: + +```python +from app.services.llm.connector_storage import ( + AUDIT_POLICY_CHANGED, + AUDIT_REVOKED_BY_ADMIN, + audit_event, + current_month_token_usage, + get_connector, + get_usage_stats, + get_user_label, + list_all_connectors, + revoke_connector, +) +``` + +Update `_connector_to_admin_out` to accept and inject `current_month_tokens`: + +```python +def _connector_to_admin_out( + row: LlmConnector, dj_username: str, current_month_tokens: int = 0 +) -> AdminConnectorOut: + return AdminConnectorOut.model_validate( + { + **{c.name: getattr(row, c.name) for c in LlmConnector.__table__.columns}, + "dj_username": dj_username, + "current_month_tokens": current_month_tokens, + } + ) +``` + +In `list_connectors_admin`, compute usage per row: + +```python + return [ + _connector_to_admin_out( + r, + usernames.get(r.user_id) or f"user#{r.user_id}", + current_month_token_usage(db, r.id), + ) + for r in rows + ] +``` + +Update the two other `_connector_to_admin_out(...)` call sites in `revoke_connector_admin` (and the new cap endpoint in Task 6) to pass `current_month_token_usage(db, row.id)`. + +- [ ] **Step 5: Run test to verify it passes** + +Run: `.venv/bin/pytest tests/test_llm_api.py -k cap_and_usage -v` +Expected: PASS + +- [ ] **Step 6: Commit** + +```bash +git add server/app/schemas/llm.py server/app/api/admin_llm.py server/tests/test_llm_api.py +git commit -m "feat(llm): expose monthly cap + current-month usage in admin listing" +``` + +--- + +## Task 6: Admin PATCH endpoint to set/clear a connector cap + +**Files:** +- Modify: `server/app/api/admin_llm.py` +- Test: `server/tests/test_llm_api.py` + +- [ ] **Step 1: Write the failing test** + +Add to `server/tests/test_llm_api.py`: + +```python +def test_admin_set_connector_cap(client, db, admin_headers, dj_user): + import json as _json + + from app.models.llm_connector import LlmConnector + + connector = LlmConnector( + user_id=dj_user.id, connector_type="openai_apikey", display_name="C", + status="active", credentials=_json.dumps({"api_key": "sk-fake-key"}), + ) + db.add(connector) + db.commit() + db.refresh(connector) + + resp = client.patch( + f"/api/admin/llm/connectors/{connector.id}/cap", + headers=admin_headers, + json={"monthly_token_cap": 50000}, + ) + assert resp.status_code == 200 + assert resp.json()["monthly_token_cap"] == 50000 + + # Clear it. + resp = client.patch( + f"/api/admin/llm/connectors/{connector.id}/cap", + headers=admin_headers, + json={"monthly_token_cap": None}, + ) + assert resp.status_code == 200 + assert resp.json()["monthly_token_cap"] is None + + +def test_admin_set_cap_rejects_negative(client, db, admin_headers, dj_user): + import json as _json + + from app.models.llm_connector import LlmConnector + + connector = LlmConnector( + user_id=dj_user.id, connector_type="openai_apikey", display_name="C2", + status="active", credentials=_json.dumps({"api_key": "sk-fake-key"}), + ) + db.add(connector) + db.commit() + db.refresh(connector) + + resp = client.patch( + f"/api/admin/llm/connectors/{connector.id}/cap", + headers=admin_headers, + json={"monthly_token_cap": -5}, + ) + assert resp.status_code == 422 # Pydantic ge=0 rejection + + +def test_admin_set_cap_404_for_missing_connector(client, admin_headers): + resp = client.patch( + "/api/admin/llm/connectors/999999/cap", + headers=admin_headers, + json={"monthly_token_cap": 100}, + ) + assert resp.status_code == 404 + + +def test_set_cap_requires_admin(client, db, auth_headers, test_user): + # A non-admin (plain DJ) must be rejected. + import json as _json + + from app.models.llm_connector import LlmConnector + + connector = LlmConnector( + user_id=test_user.id, connector_type="openai_apikey", display_name="C3", + status="active", credentials=_json.dumps({"api_key": "sk-fake-key"}), + ) + db.add(connector) + db.commit() + db.refresh(connector) + + resp = client.patch( + f"/api/admin/llm/connectors/{connector.id}/cap", + headers=auth_headers, + json={"monthly_token_cap": 100}, + ) + assert resp.status_code == 403 +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `.venv/bin/pytest tests/test_llm_api.py -k cap -v` +Expected: FAIL — 404/405 (endpoint not yet defined) + +- [ ] **Step 3: Add the endpoint** + +In `server/app/api/admin_llm.py`: + +Add `AdminConnectorCapPatch` to the schema imports and `set_monthly_cap` + audit constant to the storage imports. Add a new audit constant usage — reuse `AUDIT_POLICY_CHANGED` for cap changes (it is the closest existing lifecycle event and avoids a model change), OR add a dedicated `AUDIT_CAP_CHANGED` if preferred. Use `AUDIT_POLICY_CHANGED` to avoid touching the model's audit constants and migrations. + +Imports: + +```python +from app.schemas.llm import ( + AdminAuditOut, + AdminConnectorCapPatch, + AdminConnectorOut, + AdminPolicyOut, + AdminPolicyPatch, + AdminUsageOut, + AuditEventRow, + UsageRow, +) +from app.services.llm.connector_storage import ( + AUDIT_POLICY_CHANGED, + AUDIT_REVOKED_BY_ADMIN, + audit_event, + current_month_token_usage, + get_connector, + get_usage_stats, + get_user_label, + list_all_connectors, + revoke_connector, + set_monthly_cap, +) +``` + +Add the endpoint (place it after `revoke_connector_admin`): + +```python +@router.patch("/connectors/{connector_id}/cap", response_model=AdminConnectorOut) +@limiter.limit("30/minute") +def set_connector_cap_admin( + request: FastAPIRequest, + connector_id: int, + payload: AdminConnectorCapPatch, + admin: User = Depends(get_current_admin), + db: Session = Depends(get_db), +) -> AdminConnectorOut: + """Set or clear a connector's monthly token cap (admin-only, issue #339). + + ``monthly_token_cap = null`` clears the cap (unlimited). The change is + pre-flight only: an in-flight gateway call already past its cap check is + unaffected. Pydantic enforces the non-negative bound (``ge=0``). + """ + row = get_connector(db, connector_id) + if row is None: + raise HTTPException(status_code=404, detail="Connector not found") + + try: + set_monthly_cap(row, payload.monthly_token_cap) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + audit_event( + db, + actor_user_id=admin.id, + target_connector_id=row.id, + event_type=AUDIT_POLICY_CHANGED, + ) + db.commit() + db.refresh(row) + return _connector_to_admin_out( + row, get_user_label(db, row.user_id), current_month_token_usage(db, row.id) + ) +``` + +Also update `revoke_connector_admin`'s final return to pass usage: + +```python + return _connector_to_admin_out( + row, get_user_label(db, row.user_id), current_month_token_usage(db, row.id) + ) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `.venv/bin/pytest tests/test_llm_api.py -k cap -v` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add server/app/api/admin_llm.py server/tests/test_llm_api.py +git commit -m "feat(llm): admin endpoint to set/clear per-connector monthly cap" +``` + +--- + +## Task 7: Surface `QuotaCapReached` as a clear DJ-facing error + +**Files:** +- Modify: `server/app/api/events.py:923-988` (the `/recommendations/llm` endpoint) +- Test: `server/tests/test_llm_recommendation_via_gateway.py` (or `test_llm_quota_cap.py`) + +- [ ] **Step 1: Write the failing test** + +Inspect `server/tests/test_llm_recommendation_via_gateway.py` for the existing event + DJ + connector fixture pattern and how `/recommendations/llm` is exercised. Add a test that pre-fills usage at/over a cap and asserts a 429 with the DJ-facing message: + +```python +def test_llm_recommendation_returns_429_when_cap_reached(client, db, ...): + # ... set up event owned by a DJ with a capped, active connector and + # a connected music service (tidal/beatport token), then log usage >= cap. + # POST /api/events/{code}/recommendations/llm with a prompt. + assert resp.status_code == 429 + assert "monthly token cap is reached" in resp.json()["detail"].lower() +``` + +Model this test on the existing setup in `test_llm_recommendation_via_gateway.py`. If that file's fixtures are too heavy to reuse cleanly, instead unit-test the mapping by patching `generate_recommendations_from_llm` to raise `QuotaCapReached` and asserting the endpoint returns 429 with the message. + +- [ ] **Step 2: Run test to verify it fails** + +Run: `.venv/bin/pytest tests/test_llm_recommendation_via_gateway.py -k cap -v` +Expected: FAIL — endpoint returns 502 (generic) instead of 429 with the cap message. + +- [ ] **Step 3: Handle `QuotaCapReached` before the generic catch** + +In `server/app/api/events.py`, in `get_llm_recommendations`, change the try/except around `generate_recommendations_from_llm` to catch the cap error first: + +```python + from app.services.llm.exceptions import QuotaCapReached + + try: + result = await generate_recommendations_from_llm(db, user, event, prompt_request.prompt) + except QuotaCapReached as exc: + # DJ-facing message only — no internal usage/cap details leaked. + raise HTTPException(status_code=429, detail=str(exc)) from exc + except Exception: + import logging + + logging.getLogger(__name__).exception("LLM recommendation failed") + raise HTTPException( + status_code=502, + detail="LLM service error. Try again or use algorithmic recommendations.", + ) +``` + +Place the `from app.services.llm.exceptions import QuotaCapReached` import with the other local imports at the top of the function (next to the existing `from app.services.recommendation...` imports). + +- [ ] **Step 4: Run test to verify it passes** + +Run: `.venv/bin/pytest tests/test_llm_recommendation_via_gateway.py -k cap -v` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add server/app/api/events.py server/tests/test_llm_recommendation_via_gateway.py +git commit -m "feat(llm): surface QuotaCapReached as 429 with DJ-facing message" +``` + +--- + +## Task 8: Regenerate frontend types + add api.ts method + +**Files:** +- Modify: `dashboard/lib/api-types.generated.ts` (regenerated) +- Modify: `dashboard/lib/api-types.ts` (add `LlmAdminConnectorCapPatch` alias) +- Modify: `dashboard/lib/api.ts` +- Test: `dashboard/lib/__tests__/api.test.ts` + +- [ ] **Step 1: Regenerate types from backend OpenAPI** + +Run from `dashboard/`: +```bash +npm run types:export +npm run types:generate +``` +Verify `AdminConnectorCapPatch` and `current_month_tokens` / `monthly_token_cap` appear in `dashboard/lib/api-types.generated.ts`. + +- [ ] **Step 2: Add type alias** + +In `dashboard/lib/api-types.ts`, near the other LLM aliases: + +```typescript +export type LlmAdminConnectorCapPatch = Schemas['AdminConnectorCapPatch']; +``` + +- [ ] **Step 3: Write the failing test** + +In `dashboard/lib/__tests__/api.test.ts`, add a test mirroring the existing admin-LLM method tests (find one like `revokeAdminLlmConnector`): + +```typescript +it('setAdminLlmConnectorCap PATCHes the cap endpoint', async () => { + const connector = { id: 7, monthly_token_cap: 5000 }; + mockFetchOnce(connector); + const result = await api.setAdminLlmConnectorCap(7, 5000); + expect(lastFetchUrl()).toContain('/api/admin/llm/connectors/7/cap'); + expect(lastFetchInit().method).toBe('PATCH'); + expect(JSON.parse(lastFetchInit().body as string)).toEqual({ monthly_token_cap: 5000 }); + expect(result).toEqual(connector); +}); +``` + +Adjust `mockFetchOnce`/`lastFetchUrl`/`lastFetchInit` to match the helpers already used in that test file. + +- [ ] **Step 4: Run test to verify it fails** + +Run from `dashboard/`: `npm test -- --run api.test` +Expected: FAIL — `api.setAdminLlmConnectorCap is not a function` + +- [ ] **Step 5: Add the method** + +In `dashboard/lib/api.ts`, in the "Admin LLM policy + oversight" section (after `getAdminLlmUsage`): + +```typescript + async setAdminLlmConnectorCap( + id: number, + monthlyTokenCap: number | null, + ): Promise { + return this.fetch(`/api/admin/llm/connectors/${id}/cap`, { + method: 'PATCH', + body: JSON.stringify({ monthly_token_cap: monthlyTokenCap }), + }); + } +``` + +Add `LlmAdminConnectorCapPatch` to the imports if you reference it; the method signature above uses primitives, so an import is optional. + +- [ ] **Step 6: Run test to verify it passes** + +Run from `dashboard/`: `npm test -- --run api.test` +Expected: PASS + +- [ ] **Step 7: Commit** + +```bash +git add dashboard/lib/api-types.generated.ts dashboard/lib/api-types.ts dashboard/lib/api.ts dashboard/lib/__tests__/api.test.ts +git commit -m "feat(ai-ui): add setAdminLlmConnectorCap api client method + types" +``` + +--- + +## Task 9: Admin UI — cap input + usage-vs-cap progress bar + +**Files:** +- Modify: `dashboard/app/admin/ai/page.tsx` +- Test: extend the page's test if one exists, otherwise a focused logic test for the percent helper. + +- [ ] **Step 1: Add a cap-percent helper + extract a small pure function (testable)** + +In `dashboard/app/admin/ai/page.tsx`, add near the top-level helpers (e.g. after `formatTimestamp`): + +```typescript +// Percent of the monthly cap consumed. Returns null when there is no cap +// (unlimited) so the UI can render "Unlimited" instead of a bar. Clamps to +// 0–100 so an over-cap connector (possible: cap lowered mid-month) shows full. +function capPercent(used: number, cap: number | null | undefined): number | null { + if (cap == null) return null; + if (cap === 0) return 100; + return Math.min(100, Math.max(0, Math.round((used / cap) * 100))); +} +``` + +- [ ] **Step 2: Add a "Monthly cap" column to the connectors table** + +Add a `` to the table head (after "Result", before "Actions"). + +In each connector ``, add a cell that shows the current usage, an editable cap input, and a progress bar: + +```tsx + +
+ handleCapBlur(c, e.target.value)} + aria-label={`Monthly token cap for ${c.dj_username} ${c.display_name}`} + /> +
+
+ {c.monthly_token_cap == null + ? `${c.current_month_tokens.toLocaleString()} this month · unlimited` + : `${c.current_month_tokens.toLocaleString()} / ${c.monthly_token_cap.toLocaleString()}`} +
+ {c.monthly_token_cap != null && ( +
+
= 100 + ? 'var(--color-danger)' + : (capPercent(c.current_month_tokens, c.monthly_token_cap) ?? 0) >= 80 + ? 'var(--color-warning, #c08418)' + : 'var(--color-success)', + }} + /> +
+ )} + +``` + +- [ ] **Step 3: Add the `handleCapBlur` handler** + +Add inside the component (near `handleRevoke`): + +```typescript + const handleCapBlur = async (connector: LlmAdminConnector, raw: string) => { + const trimmed = raw.trim(); + // Empty input clears the cap (unlimited). + let next: number | null; + if (trimmed === '') { + next = null; + } else { + const parsed = parseInt(trimmed, 10); + if (Number.isNaN(parsed) || parsed < 0) { + setError('Monthly cap must be a non-negative whole number.'); + return; + } + next = parsed; + } + // No-op when unchanged. + if (next === (connector.monthly_token_cap ?? null)) return; + try { + const updated = await api.setAdminLlmConnectorCap(connector.id, next); + setConnectors((prev) => prev.map((c) => (c.id === connector.id ? updated : c))); + } catch (err) { + setError(err instanceof Error ? err.message : 'Failed to update cap'); + } + }; +``` + +- [ ] **Step 4: Type check + lint + tests** + +Run from `dashboard/`: +```bash +npx tsc --noEmit +npm run lint +npm test -- --run +git checkout next-env.d.ts 2>/dev/null || true +``` +Expected: all green. Fix any type errors (e.g. `current_month_tokens` should be a `number` on `LlmAdminConnector` from the regenerated types). + +- [ ] **Step 5: Commit** + +```bash +git add dashboard/app/admin/ai/page.tsx +git commit -m "feat(ai-ui): admin cap input + usage-vs-cap progress bar per connector" +``` + +--- + +## Task 10: Full local CI + finalize + +- [ ] **Step 1: Backend CI** + +From `server/`: +```bash +.venv/bin/ruff check . +.venv/bin/ruff format --check . +.venv/bin/bandit -r app -c pyproject.toml -q +.venv/bin/pytest --tb=short -q +``` +Fix anything red. Run `.venv/bin/ruff format .` then `.venv/bin/ruff check --fix .` if needed. + +- [ ] **Step 2: Alembic on isolated DB** + +```bash +docker exec wrzdj-db-1 psql -U wrzdj -d postgres -c "DROP DATABASE IF EXISTS wrzdj_issue339;" -c "CREATE DATABASE wrzdj_issue339;" +DATABASE_URL="postgresql+psycopg://wrzdj:wrzdj@localhost:5432/wrzdj_issue339" .venv/bin/alembic upgrade head +DATABASE_URL="postgresql+psycopg://wrzdj:wrzdj@localhost:5432/wrzdj_issue339" .venv/bin/alembic check +``` +Expected: `No new upgrade operations detected.` + +- [ ] **Step 3: Frontend CI** + +From `dashboard/`: +```bash +npm run lint +npx tsc --noEmit +npm test -- --run +git checkout next-env.d.ts 2>/dev/null || true +``` + +- [ ] **Step 4: Push + PR** + +Use `superpowers:finishing-a-development-branch` option 2. Create the PR with `gh pr create --base epic/ai-engine`. PR body MUST include `Closes #339`, a `## Design decisions` section (direct-aggregation rationale, pre-flight-only enforcement, reuse of `AUDIT_POLICY_CHANGED`, 429 mapping), and a note that it targets `epic/ai-engine`. + +--- + +## Self-Review Notes + +- **Spec coverage:** column (T2), aggregation (T3), pre-flight `QuotaCapReached` (T4), admin set/edit endpoint (T6), DJ-facing message (T4 msg + T7 mapping), admin UI cap input + progress bar (T9). Acceptance: cap enforced (T4), clear DJ error (T4/T7), admin edits without disrupting in-flight calls (pre-flight-only, documented T4/T6). ✓ +- **Type consistency:** `current_month_token_usage(db, connector_id)`, `set_monthly_cap(connector, cap)`, `monthly_token_cap`, `current_month_tokens`, `setAdminLlmConnectorCap(id, cap)`, `capPercent(used, cap)`, `handleCapBlur(connector, raw)` used consistently across tasks. ✓ +- **Security:** Pydantic `ge=0` + service `ValueError` guard; admin-only via `get_current_admin`; fixed DJ-facing message leaks no internals; parameterized SQLAlchemy queries only. ✓ diff --git a/docs/superpowers/plans/2026-05-28-llm-gateway-streaming.md b/docs/superpowers/plans/2026-05-28-llm-gateway-streaming.md new file mode 100644 index 00000000..0b3b06b8 --- /dev/null +++ b/docs/superpowers/plans/2026-05-28-llm-gateway-streaming.md @@ -0,0 +1,1861 @@ +# LLM Gateway Streaming Support Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add provider-agnostic streaming (`Gateway.stream`) to the LLM Gateway with native SSE for OpenAI, Anthropic, and OpenAI-compatible adapters, an authenticated SSE backend endpoint, and a minimal frontend consumer — closing GitHub issue #335. + +**Architecture:** A new `ChatResponseChunk` canonical model carries incremental text, partial tool-call fragments, and (on the final chunk) `stop_reason` + `usage`. The `LlmAdapter` ABC gains an `async def stream(self, request) -> AsyncIterator[ChatResponseChunk]` with a default that raises `StreamingUnsupported`. OpenAI-wire adapters (platform + compatible) parse SSE `data:` lines with incremental tool-call JSON assembly; the Anthropic adapter consumes the SDK's typed event stream (content_block_delta / input_json_delta). `Gateway.stream` mirrors `dispatch`'s connector resolution and writes a single counts-only `llm_call_log` row (plus auth/audit rows) when the stream completes or errors. A new authenticated `POST /api/llm/connectors/{id}/stream-test` endpoint emits `text/event-stream`; client disconnect cancels the upstream request via async generator cleanup. The frontend gets an `apiClient.streamConnectorTest()` consumer using `fetch` + `ReadableStream` (EventSource cannot send the Bearer header). + +**Tech Stack:** Python / FastAPI / `sse_starlette` (already a dep) / `httpx` async streaming / `anthropic` SDK `messages.stream()` / pytest-asyncio. Frontend: Next.js / TypeScript / `fetch` streaming. + +--- + +## File Structure + +- **Create** `server/app/services/llm/streaming.py` — `ChatResponseChunk` model + `StreamingUnsupported` exception + shared SSE-line helpers (`iter_sse_data_lines`, OpenAI partial tool-call accumulator). One responsibility: streaming primitives shared by adapters. +- **Modify** `server/app/services/llm/base.py` — add `stream()` to the `LlmAdapter` ABC with a non-abstract default that raises `StreamingUnsupported`; re-export `ChatResponseChunk`. +- **Modify** `server/app/services/llm/exceptions.py` — add `StreamingUnsupported(LlmError)`. +- **Modify** `server/app/services/llm/adapters/_httpx_openai.py` — add `stream_openai_chat(...)` async generator. +- **Modify** `server/app/services/llm/adapters/openai_apikey.py` — implement `stream()`. +- **Modify** `server/app/services/llm/adapters/openai_compatible.py` — implement `stream()`. +- **Modify** `server/app/services/llm/adapters/anthropic_apikey.py` — implement `stream()`. +- **Modify** `server/app/services/llm/gateway.py` — add `Gateway.stream(...)` + `_attempt_stream(...)` helper (additive, separate functions — no edits to existing `dispatch`/`_attempt` bodies, to minimize merge conflicts with siblings #337/#339). +- **Modify** `server/app/api/llm.py` — add `POST /connectors/{id}/stream-test` SSE endpoint. +- **Modify** `dashboard/lib/api.ts` — add `streamConnectorTest(id, onChunk)` consumer + a `StreamChunk` type. +- **Modify** `dashboard/app/admin/ai/page.tsx` — wire a minimal "stream test" affordance OR document scope as plumbing-only (decision recorded in Task 11). +- **Create** `server/tests/test_llm_streaming.py` — chunk model + adapter stream parsing (OpenAI text, OpenAI tool-call fragments, Anthropic deltas, compatible, unsupported default). +- **Create** `server/tests/test_llm_gateway_stream.py` — gateway resolution + logging + cancellation propagation. +- **Create** `server/tests/test_llm_stream_endpoint.py` — SSE endpoint auth + content-type + body shape. + +--- + +## Task 1: `ChatResponseChunk` model + `StreamingUnsupported` exception + +**Files:** +- Create: `server/app/services/llm/streaming.py` +- Modify: `server/app/services/llm/exceptions.py` +- Modify: `server/app/services/llm/base.py` +- Test: `server/tests/test_llm_streaming.py` + +- [ ] **Step 1: Write the failing test** + +```python +# server/tests/test_llm_streaming.py +"""Tests for streaming primitives: ChatResponseChunk + SSE helpers.""" + +from __future__ import annotations + +from app.services.llm.base import ChatResponseChunk, LlmAdapter +from app.services.llm.exceptions import StreamingUnsupported + + +def test_chunk_defaults_are_empty(): + chunk = ChatResponseChunk() + assert chunk.text_delta == "" + assert chunk.tool_call_deltas == [] + assert chunk.stop_reason is None + assert chunk.usage is None + assert chunk.done is False + + +def test_chunk_final_carries_stop_reason_and_usage(): + from app.services.llm.base import TokenUsage + + chunk = ChatResponseChunk( + stop_reason="end_turn", + usage=TokenUsage(prompt=3, completion=5), + done=True, + ) + assert chunk.done is True + assert chunk.stop_reason == "end_turn" + assert chunk.usage.completion == 5 + + +def test_tool_call_delta_fragment_shape(): + from app.services.llm.base import ToolCallDelta + + delta = ToolCallDelta(index=0, id="call_1", name="search", input_json_fragment='{"q":') + assert delta.index == 0 + assert delta.id == "call_1" + assert delta.name == "search" + assert delta.input_json_fragment == '{"q":' +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -q` +Expected: FAIL with `ImportError: cannot import name 'ChatResponseChunk'` + +- [ ] **Step 3: Add `StreamingUnsupported` to exceptions** + +In `server/app/services/llm/exceptions.py`, append: + +```python + + +class StreamingUnsupported(LlmError): + """The resolved adapter does not implement provider-native streaming.""" +``` + +- [ ] **Step 4: Create `streaming.py` with chunk-side helpers (model lives in base.py)** + +The chunk + delta models live in `base.py` (Task adds them there) so they sit alongside `ChatResponse`. `streaming.py` holds the SSE-line helpers only — created in Task 4. For this task, only add the models to `base.py`. + +In `server/app/services/llm/base.py`, after `ChatResponse`, add: + +```python +class ToolCallDelta(BaseModel): + """A fragment of a streamed tool call. + + Providers emit tool-call arguments incrementally. ``index`` groups fragments + belonging to the same call (OpenAI sends an array index; Anthropic uses the + content-block index). ``id`` / ``name`` arrive once at the start of a call; + ``input_json_fragment`` carries the raw, possibly-partial argument JSON text. + Consumers concatenate fragments per ``index`` and JSON-parse the result when + the stream completes. + """ + + index: int + id: str | None = None + name: str | None = None + input_json_fragment: str = "" + + +class ChatResponseChunk(BaseModel): + """One incremental chunk of a streamed chat response. + + Non-final chunks carry ``text_delta`` and/or ``tool_call_deltas``. The final + chunk sets ``done=True`` and carries the canonical ``stop_reason`` plus + ``usage`` (when the provider reports it). ``stop_reason``/``usage`` may be + ``None`` on every non-final chunk. + """ + + text_delta: str = "" + tool_call_deltas: list[ToolCallDelta] = Field(default_factory=list) + stop_reason: Literal["end_turn", "tool_use", "max_tokens", "error"] | None = None + usage: TokenUsage | None = None + done: bool = False +``` + +- [ ] **Step 5: Add `stream()` default to the `LlmAdapter` ABC** + +In `server/app/services/llm/base.py`, add these imports at the top (merge with existing): + +```python +from collections.abc import AsyncIterator +``` + +Then inside `class LlmAdapter`, after `health_check`, add: + +```python + async def stream(self, request: ChatRequest) -> AsyncIterator[ChatResponseChunk]: + """Stream a chat response as incremental chunks. + + Default raises :class:`StreamingUnsupported`. Adapters that support + provider-native streaming override this. The body is unreachable but + present so the method is an async generator for type-checkers. + """ + from app.services.llm.exceptions import StreamingUnsupported + + raise StreamingUnsupported( + f"connector_type={self.connector_type!r} does not support streaming" + ) + yield # pragma: no cover (makes this an async generator) +``` + +- [ ] **Step 6: Run test to verify it passes** + +Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -q` +Expected: PASS (3 tests) + +- [ ] **Step 7: Commit** + +```bash +git add server/app/services/llm/base.py server/app/services/llm/exceptions.py server/tests/test_llm_streaming.py +git commit -m "feat(llm): add ChatResponseChunk + streaming ABC default" +``` + +--- + +## Task 2: Default `stream()` raises `StreamingUnsupported` + +**Files:** +- Test: `server/tests/test_llm_streaming.py` + +- [ ] **Step 1: Write the failing test** + +Append to `server/tests/test_llm_streaming.py`: + +```python +import pytest + +from app.services.llm.base import ChatRequest, Message + + +class _BareAdapter(LlmAdapter): + connector_type = "bare" + + async def chat(self, request): # pragma: no cover + raise NotImplementedError + + async def health_check(self): # pragma: no cover + raise NotImplementedError + + +@pytest.mark.asyncio +async def test_default_stream_raises_streaming_unsupported(): + adapter = _BareAdapter(connector=None) + req = ChatRequest(messages=[Message(role="user", content="hi")]) + with pytest.raises(StreamingUnsupported): + async for _ in adapter.stream(req): + pass +``` + +- [ ] **Step 2: Run test to verify it passes (default already implemented in Task 1)** + +Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py::test_default_stream_raises_streaming_unsupported -q` +Expected: PASS + +- [ ] **Step 3: Commit** + +```bash +git add server/tests/test_llm_streaming.py +git commit -m "test(llm): default adapter stream raises StreamingUnsupported" +``` + +--- + +## Task 3: OpenAI partial tool-call accumulator helper + +**Files:** +- Create: `server/app/services/llm/streaming.py` +- Test: `server/tests/test_llm_streaming.py` + +- [ ] **Step 1: Write the failing test** + +Append to `server/tests/test_llm_streaming.py`: + +```python +def test_parse_openai_stream_line_text(): + from app.services.llm.streaming import parse_openai_stream_event + + chunk = parse_openai_stream_event( + {"choices": [{"delta": {"content": "Hello"}, "finish_reason": None}]} + ) + assert chunk is not None + assert chunk.text_delta == "Hello" + assert chunk.tool_call_deltas == [] + assert chunk.done is False + + +def test_parse_openai_stream_line_tool_call_fragment(): + from app.services.llm.streaming import parse_openai_stream_event + + chunk = parse_openai_stream_event( + { + "choices": [ + { + "delta": { + "tool_calls": [ + { + "index": 0, + "id": "call_1", + "function": {"name": "search", "arguments": '{"q":'}, + } + ] + }, + "finish_reason": None, + } + ] + } + ) + assert chunk is not None + assert chunk.text_delta == "" + assert len(chunk.tool_call_deltas) == 1 + d = chunk.tool_call_deltas[0] + assert d.index == 0 and d.id == "call_1" and d.name == "search" + assert d.input_json_fragment == '{"q":' + + +def test_parse_openai_stream_line_finish(): + from app.services.llm.streaming import parse_openai_stream_event + + chunk = parse_openai_stream_event( + { + "choices": [{"delta": {}, "finish_reason": "tool_calls"}], + "usage": {"prompt_tokens": 7, "completion_tokens": 11}, + } + ) + assert chunk is not None + assert chunk.done is True + assert chunk.stop_reason == "tool_use" + assert chunk.usage is not None and chunk.usage.prompt == 7 +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -k parse_openai_stream -q` +Expected: FAIL with `ModuleNotFoundError`/`ImportError` for `streaming.parse_openai_stream_event` + +- [ ] **Step 3: Create `streaming.py` helpers** + +```python +# server/app/services/llm/streaming.py +"""Shared streaming primitives for LLM adapters. + +Holds SSE-line parsing helpers reused by the OpenAI-wire adapters. The chunk / +delta models themselves live in ``base.py`` alongside ``ChatResponse``. +""" + +from __future__ import annotations + +from app.services.llm.base import ChatResponseChunk, ToolCallDelta, TokenUsage +from app.services.llm.tool_translation import _normalise_finish_reason # noqa: PLC2701 + +# OpenAI streaming finish_reason → canonical, reusing the non-stream mapping. +_FINISH_REASON_OPENAI = { + "stop": "end_turn", + "tool_calls": "tool_use", + "function_call": "tool_use", + "length": "max_tokens", +} + + +def parse_openai_stream_event(payload: dict) -> ChatResponseChunk | None: + """Translate one parsed OpenAI streaming JSON object into a chunk. + + Returns ``None`` for payloads carrying no usable signal (e.g. the initial + role-only delta). The final event sets ``done=True`` with the mapped + ``stop_reason`` and (when present) token usage. + """ + choices = payload.get("choices") or [] + choice = choices[0] if choices else {} + delta = choice.get("delta") or {} + + text_delta = delta.get("content") or "" + + tool_call_deltas: list[ToolCallDelta] = [] + for tc in delta.get("tool_calls") or []: + if not isinstance(tc, dict): + continue + fn = tc.get("function") or {} + tool_call_deltas.append( + ToolCallDelta( + index=int(tc.get("index", 0)), + id=tc.get("id"), + name=(fn.get("name") if isinstance(fn, dict) else None), + input_json_fragment=( + (fn.get("arguments") or "") if isinstance(fn, dict) else "" + ), + ) + ) + + finish_reason = choice.get("finish_reason") + usage_payload = payload.get("usage") or {} + + done = finish_reason is not None + stop_reason = None + usage = None + if done: + stop_reason = _normalise_finish_reason(finish_reason, _FINISH_REASON_OPENAI) + if usage_payload: + usage = TokenUsage( + prompt=int(usage_payload.get("prompt_tokens", 0)), + completion=int(usage_payload.get("completion_tokens", 0)), + ) + + if not text_delta and not tool_call_deltas and not done: + return None + + return ChatResponseChunk( + text_delta=text_delta, + tool_call_deltas=tool_call_deltas, + stop_reason=stop_reason, + usage=usage, + done=done, + ) +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -k parse_openai_stream -q` +Expected: PASS (3 tests) + +- [ ] **Step 5: Commit** + +```bash +git add server/app/services/llm/streaming.py server/tests/test_llm_streaming.py +git commit -m "feat(llm): OpenAI streaming event → ChatResponseChunk parser" +``` + +--- + +## Task 4: `stream_openai_chat` async generator (httpx) + +**Files:** +- Modify: `server/app/services/llm/adapters/_httpx_openai.py` +- Test: `server/tests/test_llm_streaming.py` + +- [ ] **Step 1: Write the failing test (mock httpx streaming response)** + +Append to `server/tests/test_llm_streaming.py`: + +```python +class _FakeStreamResponse: + """Minimal stand-in for httpx streaming response.""" + + def __init__(self, lines: list[str], status_code: int = 200): + self._lines = lines + self.status_code = status_code + self.headers = {} + + async def aiter_lines(self): + for line in self._lines: + yield line + + async def aread(self): + return b"" + + +class _FakeStreamClient: + def __init__(self, response): + self._response = response + + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + return False + + def stream(self, method, url, **kwargs): + client = self + + class _Ctx: + async def __aenter__(self_inner): + return client._response + + async def __aexit__(self_inner, *exc): + return False + + return _Ctx() + + +@pytest.mark.asyncio +async def test_stream_openai_chat_yields_text_then_final(monkeypatch): + from app.services.llm.adapters import _httpx_openai + + sse_lines = [ + 'data: {"choices":[{"delta":{"role":"assistant"},"finish_reason":null}]}', + 'data: {"choices":[{"delta":{"content":"Hi"},"finish_reason":null}]}', + 'data: {"choices":[{"delta":{"content":" there"},"finish_reason":null}]}', + 'data: {"choices":[{"delta":{},"finish_reason":"stop"}],' + '"usage":{"prompt_tokens":4,"completion_tokens":2}}', + "data: [DONE]", + ] + fake_resp = _FakeStreamResponse(sse_lines) + monkeypatch.setattr( + _httpx_openai.httpx, "AsyncClient", lambda *a, **k: _FakeStreamClient(fake_resp) + ) + + req = ChatRequest(messages=[Message(role="user", content="hi")], model="gpt-x") + chunks = [] + async for c in _httpx_openai.stream_openai_chat( + base_url="https://api.openai.com/v1", + api_key="sk-test", + request=req, + fallback_model="gpt-x", + ): + chunks.append(c) + + text = "".join(c.text_delta for c in chunks) + assert text == "Hi there" + assert chunks[-1].done is True + assert chunks[-1].stop_reason == "end_turn" + assert chunks[-1].usage.prompt == 4 +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py::test_stream_openai_chat_yields_text_then_final -q` +Expected: FAIL — `stream_openai_chat` not defined + +- [ ] **Step 3: Implement `stream_openai_chat` in `_httpx_openai.py`** + +Add imports near the top of `server/app/services/llm/adapters/_httpx_openai.py`: + +```python +from collections.abc import AsyncIterator + +from app.services.llm.base import ChatResponseChunk +from app.services.llm.exceptions import AuthInvalid, QuotaExceeded, RateLimited +from app.services.llm.streaming import parse_openai_stream_event +``` + +(Merge with the existing import block; `ProviderUnavailable` / `ToolTranslationError` are already imported.) + +Add this function after `call_openai_chat`: + +```python +def _map_stream_status(status_code: int) -> None: + """Raise the canonical typed error for a non-2xx streaming status.""" + if status_code in (401, 403): + raise AuthInvalid(f"Auth failed (HTTP {status_code})") + if status_code == 402: + raise QuotaExceeded("Quota or billing failure (HTTP 402)") + if status_code == 429: + raise RateLimited("Rate limited (HTTP 429)") + if 500 <= status_code < 600: + raise ProviderUnavailable(f"Upstream error (HTTP {status_code})") + raise ToolTranslationError(f"Upstream rejected request (HTTP {status_code})") + + +async def stream_openai_chat( + *, + base_url: str, + api_key: str | None, + request: ChatRequest, + fallback_model: str | None, + extra_headers: dict | None = None, + max_tokens_field: str = "max_tokens", +) -> AsyncIterator[ChatResponseChunk]: + """Issue a streaming Chat Completions request, yielding canonical chunks. + + Cancellation: if the consumer stops iterating (e.g. SSE client disconnect), + the ``async with client.stream(...)`` context exits and httpx closes the + upstream connection, cancelling the provider request. Errors are mapped to + canonical typed exceptions before the first chunk; mid-stream network drops + surface as ``ProviderUnavailable``. + """ + model = request.model or fallback_model + if not model: + raise ToolTranslationError( + "model is required (set ChatRequest.model or LlmConnector.model_hint)" + ) + + endpoint = _build_chat_endpoint(base_url) + headers: dict[str, str] = { + "Content-Type": "application/json", + "Accept": "text/event-stream", + } + if api_key: + headers["Authorization"] = f"Bearer {api_key}" # nosec B106 + if extra_headers: + headers.update(extra_headers) + + timeout = request.timeout_seconds or DEFAULT_TIMEOUT_SECONDS + timeout = min(max(timeout, 1.0), MAX_TIMEOUT_SECONDS) + + payload = _build_payload(request, model, max_tokens_field=max_tokens_field) + payload["stream"] = True + # Ask OpenAI to include usage in the terminal stream event. + payload["stream_options"] = {"include_usage": True} + + try: + async with httpx.AsyncClient(timeout=timeout) as client: + async with client.stream( + "POST", endpoint, json=payload, headers=headers + ) as resp: + if resp.status_code >= 300: + # Drain the (small) error body so the connection releases. + await resp.aread() + _map_stream_status(resp.status_code) + async for line in resp.aiter_lines(): + if not line or not line.startswith("data:"): + continue + data = line[len("data:") :].strip() + if data == "[DONE]": + break + try: + obj = json.loads(data) + except json.JSONDecodeError: + # Tolerate keepalive/comment lines. + continue + chunk = parse_openai_stream_event(obj) + if chunk is not None: + yield chunk + except httpx.TimeoutException as exc: + raise ProviderUnavailable("Upstream timeout") from exc + except httpx.HTTPError as exc: + raise ProviderUnavailable("Upstream network error") from exc +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py::test_stream_openai_chat_yields_text_then_final -q` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add server/app/services/llm/adapters/_httpx_openai.py server/tests/test_llm_streaming.py +git commit -m "feat(llm): httpx OpenAI-wire streaming generator" +``` + +--- + +## Task 5: OpenAI Platform + OpenAI-compatible adapter `stream()` + +**Files:** +- Modify: `server/app/services/llm/adapters/openai_apikey.py` +- Modify: `server/app/services/llm/adapters/openai_compatible.py` +- Test: `server/tests/test_llm_streaming.py` + +- [ ] **Step 1: Write the failing test** + +Append to `server/tests/test_llm_streaming.py`: + +```python +@pytest.mark.asyncio +async def test_openai_apikey_adapter_stream(monkeypatch): + import json as _json + + from app.models.llm_connector import LlmConnector + from app.services.llm.adapters import openai_apikey + from app.services.llm.base import ChatResponseChunk + + captured = {} + + async def fake_stream(**kwargs): + captured.update(kwargs) + yield ChatResponseChunk(text_delta="ok", done=False) + yield ChatResponseChunk(stop_reason="end_turn", done=True) + + monkeypatch.setattr(openai_apikey, "stream_openai_chat", fake_stream) + + connector = LlmConnector( + user_id=1, + connector_type="openai_apikey", + display_name="x", + status="active", + credentials=_json.dumps({"api_key": "sk-test"}), + model_hint="gpt-x", + ) + adapter = openai_apikey.OpenAIApiKeyAdapter(connector) + req = ChatRequest(messages=[Message(role="user", content="hi")]) + chunks = [c async for c in adapter.stream(req)] + assert [c.text_delta for c in chunks] == ["ok", ""] + assert chunks[-1].done is True + assert captured["max_tokens_field"] == "max_completion_tokens" + assert captured["api_key"] == "sk-test" +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py::test_openai_apikey_adapter_stream -q` +Expected: FAIL — adapter `stream` falls through to default `StreamingUnsupported` + +- [ ] **Step 3: Implement `stream()` in `openai_apikey.py`** + +Add to imports: + +```python +from collections.abc import AsyncIterator + +from app.services.llm.adapters._httpx_openai import ( + build_healthcheck_request, + call_openai_chat, + stream_openai_chat, +) +from app.services.llm.base import ChatResponseChunk +``` + +(Merge with the existing `_httpx_openai` import — add `stream_openai_chat`.) + +Add the method to `OpenAIApiKeyAdapter` (after `health_check`): + +```python + async def stream(self, request: ChatRequest) -> AsyncIterator[ChatResponseChunk]: + api_key = self._extract_api_key() + async for chunk in stream_openai_chat( + base_url=OPENAI_BASE_URL, + api_key=api_key, + request=request, + fallback_model=self.connector.model_hint or DEFAULT_MODEL, + max_tokens_field=_MAX_TOKENS_FIELD, + ): + yield chunk +``` + +- [ ] **Step 4: Implement `stream()` in `openai_compatible.py`** + +Add to imports: + +```python +from collections.abc import AsyncIterator + +from app.services.llm.adapters._httpx_openai import ( + build_healthcheck_request, + call_openai_chat, + stream_openai_chat, +) +from app.services.llm.base import ChatResponseChunk +``` + +Add the method to `OpenAICompatibleAdapter` (after `health_check`): + +```python + async def stream(self, request: ChatRequest) -> AsyncIterator[ChatResponseChunk]: + base_url, bearer = self._extract_credentials() + async for chunk in stream_openai_chat( + base_url=base_url, + api_key=bearer, + request=request, + fallback_model=self.connector.model_hint or DEFAULT_MODEL, + ): + yield chunk +``` + +- [ ] **Step 5: Add a compatible-adapter stream test** + +Append to `server/tests/test_llm_streaming.py`: + +```python +@pytest.mark.asyncio +async def test_openai_compatible_adapter_stream(monkeypatch): + import json as _json + + from app.models.llm_connector import LlmConnector + from app.services.llm.adapters import openai_compatible + from app.services.llm.base import ChatResponseChunk + + async def fake_stream(**kwargs): + assert kwargs["base_url"] == "http://127.0.0.1:1234/v1" + yield ChatResponseChunk(text_delta="hey", done=False) + yield ChatResponseChunk(stop_reason="end_turn", done=True) + + monkeypatch.setattr(openai_compatible, "stream_openai_chat", fake_stream) + + connector = LlmConnector( + user_id=1, + connector_type="openai_compatible", + display_name="local", + status="active", + credentials=_json.dumps({"base_url": "http://127.0.0.1:1234/v1"}), + model_hint="local-model", + ) + adapter = openai_compatible.OpenAICompatibleAdapter(connector) + req = ChatRequest(messages=[Message(role="user", content="hi")]) + chunks = [c async for c in adapter.stream(req)] + assert "".join(c.text_delta for c in chunks) == "hey" + assert chunks[-1].done is True +``` + +- [ ] **Step 6: Run tests to verify they pass** + +Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -k "openai_apikey_adapter_stream or openai_compatible_adapter_stream" -q` +Expected: PASS (2 tests) + +- [ ] **Step 7: Commit** + +```bash +git add server/app/services/llm/adapters/openai_apikey.py server/app/services/llm/adapters/openai_compatible.py server/tests/test_llm_streaming.py +git commit -m "feat(llm): streaming for OpenAI platform + compatible adapters" +``` + +--- + +## Task 6: Anthropic adapter `stream()` + +**Files:** +- Modify: `server/app/services/llm/adapters/anthropic_apikey.py` +- Test: `server/tests/test_llm_streaming.py` + +- [ ] **Step 1: Write the failing test (fake SDK event stream)** + +Append to `server/tests/test_llm_streaming.py`: + +```python +class _FakeEvent: + """Stand-in for an anthropic SDK stream event (attribute access).""" + + def __init__(self, **kw): + for k, v in kw.items(): + setattr(self, k, v) + + +class _FakeAnthropicStream: + def __init__(self, events): + self._events = events + + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + return False + + async def __aiter__(self): + for e in self._events: + yield e + + +def _anthropic_text_events(): + return [ + _FakeEvent(type="message_start"), + _FakeEvent( + type="content_block_start", + index=0, + content_block=_FakeEvent(type="text", text=""), + ), + _FakeEvent( + type="content_block_delta", + index=0, + delta=_FakeEvent(type="text_delta", text="Hel"), + ), + _FakeEvent( + type="content_block_delta", + index=0, + delta=_FakeEvent(type="text_delta", text="lo"), + ), + _FakeEvent(type="content_block_stop", index=0), + _FakeEvent( + type="message_delta", + delta=_FakeEvent(stop_reason="end_turn"), + usage=_FakeEvent(output_tokens=5), + ), + _FakeEvent(type="message_stop"), + ] + + +@pytest.mark.asyncio +async def test_anthropic_adapter_stream_text(monkeypatch): + import json as _json + + from app.models.llm_connector import LlmConnector + from app.services.llm.adapters import anthropic_apikey + + class _FakeMessages: + def stream(self, **kwargs): + return _FakeAnthropicStream(_anthropic_text_events()) + + class _FakeClient: + def __init__(self, *a, **k): + self.messages = _FakeMessages() + + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + return False + + monkeypatch.setattr(anthropic_apikey, "AsyncAnthropic", _FakeClient) + + connector = LlmConnector( + user_id=1, + connector_type="anthropic_apikey", + display_name="claude", + status="active", + credentials=_json.dumps({"api_key": "sk-ant-test"}), + model_hint="claude-x", + ) + adapter = anthropic_apikey.AnthropicApiKeyAdapter(connector) + req = ChatRequest(messages=[Message(role="user", content="hi")]) + chunks = [c async for c in adapter.stream(req)] + assert "".join(c.text_delta for c in chunks) == "Hello" + assert chunks[-1].done is True + assert chunks[-1].stop_reason == "end_turn" + assert chunks[-1].usage.completion == 5 + + +def _anthropic_tool_events(): + return [ + _FakeEvent(type="message_start"), + _FakeEvent( + type="content_block_start", + index=0, + content_block=_FakeEvent(type="tool_use", id="toolu_1", name="search"), + ), + _FakeEvent( + type="content_block_delta", + index=0, + delta=_FakeEvent(type="input_json_delta", partial_json='{"q":'), + ), + _FakeEvent( + type="content_block_delta", + index=0, + delta=_FakeEvent(type="input_json_delta", partial_json='"house"}'), + ), + _FakeEvent(type="content_block_stop", index=0), + _FakeEvent( + type="message_delta", + delta=_FakeEvent(stop_reason="tool_use"), + usage=_FakeEvent(output_tokens=9), + ), + _FakeEvent(type="message_stop"), + ] + + +@pytest.mark.asyncio +async def test_anthropic_adapter_stream_tool_use(monkeypatch): + import json as _json + + from app.models.llm_connector import LlmConnector + from app.services.llm.adapters import anthropic_apikey + + class _FakeMessages: + def stream(self, **kwargs): + return _FakeAnthropicStream(_anthropic_tool_events()) + + class _FakeClient: + def __init__(self, *a, **k): + self.messages = _FakeMessages() + + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + return False + + monkeypatch.setattr(anthropic_apikey, "AsyncAnthropic", _FakeClient) + + connector = LlmConnector( + user_id=1, + connector_type="anthropic_apikey", + display_name="claude", + status="active", + credentials=_json.dumps({"api_key": "sk-ant-test"}), + model_hint="claude-x", + ) + adapter = anthropic_apikey.AnthropicApiKeyAdapter(connector) + req = ChatRequest(messages=[Message(role="user", content="hi")]) + chunks = [c async for c in adapter.stream(req)] + + # Reassemble tool-call fragments by index. + frags = [d for c in chunks for d in c.tool_call_deltas] + assert frags[0].id == "toolu_1" and frags[0].name == "search" + joined = "".join(d.input_json_fragment for d in frags) + assert _json.loads(joined) == {"q": "house"} + assert chunks[-1].done is True + assert chunks[-1].stop_reason == "tool_use" +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -k anthropic_adapter_stream -q` +Expected: FAIL — `StreamingUnsupported` + +- [ ] **Step 3: Implement `stream()` in `anthropic_apikey.py`** + +Add to imports (merge with existing): + +```python +from collections.abc import AsyncIterator + +from app.services.llm.base import ( + ChatRequest, + ChatResponse, + ChatResponseChunk, + LlmAdapter, + Message, + TokenUsage, + ToolCallDelta, +) +from app.services.llm.exceptions import ( + AuthInvalid, + ProviderUnavailable, + QuotaExceeded, + RateLimited, + ToolTranslationError, +) +``` + +Add a module-level finish-reason map near `DEFAULT_MODEL`: + +```python +_STREAM_FINISH_REASON = { + "end_turn": "end_turn", + "stop_sequence": "end_turn", + "tool_use": "tool_use", + "max_tokens": "max_tokens", +} +``` + +Add the method (after `health_check`): + +```python + async def stream(self, request: ChatRequest) -> AsyncIterator[ChatResponseChunk]: + model = request.model or self.connector.model_hint or DEFAULT_MODEL + max_tokens = request.max_tokens or DEFAULT_MAX_TOKENS + timeout = min( + max(request.timeout_seconds or DEFAULT_TIMEOUT_SECONDS, 1.0), + MAX_TIMEOUT_SECONDS, + ) + + anthropic_messages = to_anthropic_messages(request.messages) + tools, choice = to_anthropic_tools(request.tools, request.force_tool) + + kwargs: dict[str, Any] = { + "model": model, + "max_tokens": max_tokens, + "messages": anthropic_messages, + } + if request.system: + kwargs["system"] = request.system + if request.temperature is not None: + kwargs["temperature"] = request.temperature + if tools: + kwargs["tools"] = tools + if choice is not None: + kwargs["tool_choice"] = choice + + # Per-content-block index → tool id/name (sent once at block start). + block_index_to_tool: dict[int, str] = {} + stop_reason: str | None = None + output_tokens: int | None = None + + try: + async with self._client(timeout=timeout) as client: + async with client.messages.stream(**kwargs) as stream: + async for event in stream: + etype = getattr(event, "type", None) + if etype == "content_block_start": + block = getattr(event, "content_block", None) + if getattr(block, "type", None) == "tool_use": + idx = int(getattr(event, "index", 0)) + tool_id = getattr(block, "id", None) + name = getattr(block, "name", None) + block_index_to_tool[idx] = name or "" + yield ChatResponseChunk( + tool_call_deltas=[ + ToolCallDelta(index=idx, id=tool_id, name=name) + ] + ) + elif etype == "content_block_delta": + delta = getattr(event, "delta", None) + dtype = getattr(delta, "type", None) + if dtype == "text_delta": + yield ChatResponseChunk( + text_delta=getattr(delta, "text", "") or "" + ) + elif dtype == "input_json_delta": + idx = int(getattr(event, "index", 0)) + yield ChatResponseChunk( + tool_call_deltas=[ + ToolCallDelta( + index=idx, + input_json_fragment=getattr( + delta, "partial_json", "" + ) + or "", + ) + ] + ) + elif etype == "message_delta": + delta = getattr(event, "delta", None) + sr = getattr(delta, "stop_reason", None) + if sr is not None: + stop_reason = sr + usage = getattr(event, "usage", None) + if usage is not None: + ot = getattr(usage, "output_tokens", None) + if ot is not None: + output_tokens = int(ot) + except APITimeoutError as exc: + raise ProviderUnavailable("Upstream timeout") from exc + except APIConnectionError as exc: + raise ProviderUnavailable("Upstream network error") from exc + except APIStatusError as exc: + self._raise_for_status(exc) + except APIError as exc: + raise ProviderUnavailable( + f"Anthropic API error: {type(exc).__name__}" + ) from exc + + canonical_stop = _STREAM_FINISH_REASON.get(stop_reason or "", "end_turn") + if block_index_to_tool and canonical_stop != "tool_use": + canonical_stop = "tool_use" + final_usage = ( + TokenUsage(prompt=0, completion=output_tokens) + if output_tokens is not None + else None + ) + yield ChatResponseChunk( + stop_reason=canonical_stop, + usage=final_usage, + done=True, + ) +``` + +Note: Anthropic streams `output_tokens` in `message_delta` but `input_tokens` only in `message_start.usage`. For the counts-only call log this completion count is sufficient; prompt is recorded as 0 when unavailable. (Documented as a design decision.) + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `cd server && .venv/bin/pytest tests/test_llm_streaming.py -k anthropic_adapter_stream -q` +Expected: PASS (2 tests) + +- [ ] **Step 5: Commit** + +```bash +git add server/app/services/llm/adapters/anthropic_apikey.py server/tests/test_llm_streaming.py +git commit -m "feat(llm): Anthropic provider-native streaming (text + tool_use deltas)" +``` + +--- + +## Task 7: `Gateway.stream` + `_attempt_stream` with counts-only logging + +**Files:** +- Modify: `server/app/services/llm/gateway.py` +- Test: `server/tests/test_llm_gateway_stream.py` + +- [ ] **Step 1: Write the failing test** + +```python +# server/tests/test_llm_gateway_stream.py +"""Tests for Gateway.stream — resolution mirrors dispatch, counts-only logging.""" + +from __future__ import annotations + +import json + +import pytest + +from app.models.llm_connector import LlmCallLog, LlmConnector +from app.models.user import User +from app.services.auth import get_password_hash +from app.services.llm.adapters.openai_apikey import OpenAIApiKeyAdapter +from app.services.llm.base import ChatRequest, ChatResponseChunk, Message, TokenUsage +from app.services.llm.exceptions import NoLlmConfigured, ProviderUnavailable +from app.services.llm.gateway import Gateway + + +@pytest.fixture +def dj_user(db) -> User: + user = User( + username="streamdj", + password_hash=get_password_hash("password123"), + role="dj", + ) + db.add(user) + db.commit() + db.refresh(user) + return user + + +def _make_connector(db, user, **kw) -> LlmConnector: + row = LlmConnector( + user_id=user.id, + connector_type=kw.get("connector_type", "openai_apikey"), + display_name=kw.get("display_name", "Test"), + status=kw.get("status", "active"), + credentials=json.dumps({"api_key": "sk-fake"}), + model_hint="gpt-5-mini", + ) + db.add(row) + db.commit() + db.refresh(row) + return row + + +def _fake_stream(chunks): + async def _gen(self, request): + for c in chunks: + yield c + + return _gen + + +@pytest.mark.asyncio +async def test_stream_no_actor_no_default_raises(db): + req = ChatRequest(messages=[Message(role="user", content="hi")]) + with pytest.raises(NoLlmConfigured): + async for _ in Gateway.stream(db, None, req, purpose="test"): + pass + + +@pytest.mark.asyncio +async def test_stream_dispatches_and_logs_counts_only(db, dj_user, monkeypatch): + connector = _make_connector(db, dj_user) + chunks = [ + ChatResponseChunk(text_delta="Hel"), + ChatResponseChunk(text_delta="lo"), + ChatResponseChunk( + stop_reason="end_turn", + usage=TokenUsage(prompt=4, completion=2), + done=True, + ), + ] + monkeypatch.setattr(OpenAIApiKeyAdapter, "stream", _fake_stream(chunks)) + + req = ChatRequest(messages=[Message(role="user", content="hi")]) + out = [c async for c in Gateway.stream(db, dj_user, req, purpose="recommendation")] + assert "".join(c.text_delta for c in out) == "Hello" + + log = db.query(LlmCallLog).filter(LlmCallLog.connector_id == connector.id).one() + assert log.status == "ok" + assert log.purpose == "recommendation" + assert log.tokens_in == 4 + assert log.tokens_out == 2 + db.refresh(connector) + assert connector.last_used_at is not None + + +@pytest.mark.asyncio +async def test_stream_error_logs_provider_unavailable(db, dj_user, monkeypatch): + connector = _make_connector(db, dj_user) + + async def _boom(self, request): + raise ProviderUnavailable("down") + yield # pragma: no cover + + monkeypatch.setattr(OpenAIApiKeyAdapter, "stream", _boom) + + req = ChatRequest(messages=[Message(role="user", content="hi")]) + with pytest.raises(ProviderUnavailable): + async for _ in Gateway.stream(db, dj_user, req, purpose="test"): + pass + + log = db.query(LlmCallLog).filter(LlmCallLog.connector_id == connector.id).one() + assert log.status == "provider_unavailable" + + +@pytest.mark.asyncio +async def test_stream_consumer_cancel_logs_and_propagates(db, dj_user, monkeypatch): + """Consumer stops early (client disconnect) → GeneratorExit, log written once.""" + connector = _make_connector(db, dj_user) + + async def _infinite(self, request): + i = 0 + while True: + yield ChatResponseChunk(text_delta=str(i)) + i += 1 + + monkeypatch.setattr(OpenAIApiKeyAdapter, "stream", _infinite) + + req = ChatRequest(messages=[Message(role="user", content="hi")]) + agen = Gateway.stream(db, dj_user, req, purpose="test") + first = await agen.__anext__() + assert first.text_delta == "0" + await agen.aclose() # simulate client disconnect + + log = db.query(LlmCallLog).filter(LlmCallLog.connector_id == connector.id).one() + assert log.status in ("ok", "cancelled") +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd server && .venv/bin/pytest tests/test_llm_gateway_stream.py -q` +Expected: FAIL — `Gateway.stream` not defined + +- [ ] **Step 3: Implement `Gateway.stream` + `_attempt_stream`** + +Add imports at the top of `server/app/services/llm/gateway.py` (merge): + +```python +from collections.abc import AsyncIterator + +from app.services.llm.base import ChatRequest, ChatResponse, ChatResponseChunk +``` + +Add a `stream` staticmethod inside `class Gateway` (after `dispatch`): + +```python + @staticmethod + async def stream( + db: Session, + actor: User | None, + request: ChatRequest, + *, + purpose: str, + ) -> AsyncIterator[ChatResponseChunk]: + """Stream a chat response, mirroring ``dispatch`` resolution + logging. + + Resolution is identical to ``dispatch`` (per-DJ default → MRU → org + default). Logging differs only in timing: a single counts-only + ``llm_call_log`` row is written when the stream finishes (success), + errors, or is cancelled by the consumer (client disconnect → the async + generator is closed and ``GeneratorExit`` fires the ``finally``). + + Auto-fallback (``fallback_policy``) is intentionally NOT applied to + streaming: chunks have already been delivered to the consumer by the + time a mid-stream error surfaces, so transparently restarting on another + connector would corrupt the output. Streaming always fails fast. + """ + primary = _resolve_connector(db, actor) + actor_id = actor.id if actor else _system_actor_id(db, primary) + async for chunk in _attempt_stream( + db, primary, request, purpose=purpose, actor_id=actor_id + ): + yield chunk +``` + +Add the module-level `_attempt_stream` async generator (after `_attempt`): + +```python +async def _attempt_stream( + db: Session, + connector: LlmConnector, + request: ChatRequest, + *, + purpose: str, + actor_id: int, +) -> AsyncIterator[ChatResponseChunk]: + """Run a single adapter stream, logging exactly one outcome row. + + The call log is written in a ``finally`` so it fires on success, on a typed + error, AND on consumer cancellation (``GeneratorExit`` raised into the + generator when the SSE client disconnects). The status reflects which path + fired; counts come only from a terminal chunk's ``usage`` (never content). + """ + adapter_cls = get_adapter_class(connector.connector_type) + adapter = adapter_cls(connector) + + started = monotonic() + status = "ok" + error_code: str | None = None + tokens_in: int | None = None + tokens_out: int | None = None + auth_failed = False + + try: + async for chunk in adapter.stream(request): + if chunk.usage is not None: + tokens_in = chunk.usage.prompt + tokens_out = chunk.usage.completion + yield chunk + except GeneratorExit: + # Consumer disconnected — record as cancelled and re-raise so the + # adapter's own finally/cleanup closes the upstream connection. + status = "cancelled" + error_code = "client_disconnect" + raise + except AuthInvalid: + status = "auth_invalid" + error_code = "401" + auth_failed = True + raise + except RateLimited as exc: + status = "rate_limited" + error_code = str(exc.retry_after_seconds or "") + raise + except QuotaExceeded: + status = "quota_exceeded" + error_code = "402" + raise + except ProviderUnavailable as exc: + status = "provider_unavailable" + error_code = type(exc).__name__ + raise + except ToolTranslationError: + status = "tool_translation_error" + error_code = "translation" + raise + except LlmError: + status = "error" + error_code = "llm_error" + raise + finally: + latency_ms = int((monotonic() - started) * 1000) + if status == "ok": + connector.last_used_at = utcnow() + connector.last_error = None + if auth_failed: + connector.status = STATUS_AUTH_INVALID + connector.last_error = "auth_invalid" + log_call( + db, + connector_id=connector.id, + purpose=purpose, + status=status, + latency_ms=latency_ms, + tokens_in=tokens_in if status == "ok" else None, + tokens_out=tokens_out if status == "ok" else None, + error_code=error_code, + ) + if auth_failed: + audit_event( + db, + actor_user_id=actor_id, + target_connector_id=connector.id, + event_type=AUDIT_AUTH_INVALID_OBSERVED, + ) + db.commit() +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `cd server && .venv/bin/pytest tests/test_llm_gateway_stream.py -q` +Expected: PASS (4 tests) + +- [ ] **Step 5: Commit** + +```bash +git add server/app/services/llm/gateway.py server/tests/test_llm_gateway_stream.py +git commit -m "feat(llm): Gateway.stream with counts-only logging + cancellation" +``` + +--- + +## Task 8: SSE backend endpoint `POST /api/llm/connectors/{id}/stream-test` + +**Files:** +- Modify: `server/app/api/llm.py` +- Test: `server/tests/test_llm_stream_endpoint.py` + +- [ ] **Step 1: Write the failing test** + +```python +# server/tests/test_llm_stream_endpoint.py +"""SSE stream-test endpoint: auth, content-type, body shape, ownership.""" + +from __future__ import annotations + +import json + +import pytest + +from app.models.llm_connector import LlmConnector +from app.services.llm.adapters.openai_apikey import OpenAIApiKeyAdapter +from app.services.llm.base import ChatResponseChunk, TokenUsage + + +def _make_connector(db, user) -> LlmConnector: + row = LlmConnector( + user_id=user.id, + connector_type="openai_apikey", + display_name="Test", + status="active", + credentials=json.dumps({"api_key": "sk-fake"}), + model_hint="gpt-5-mini", + ) + db.add(row) + db.commit() + db.refresh(row) + return row + + +def test_stream_test_requires_auth(client, db, test_user): + connector = _make_connector(db, test_user) + resp = client.post(f"/api/llm/connectors/{connector.id}/stream-test") + assert resp.status_code == 401 + + +def test_stream_test_404_for_unowned(client, db, test_user, auth_headers): + # Connector owned by a different user. + from app.models.user import User + from app.services.auth import get_password_hash + + other = User(username="other", password_hash=get_password_hash("x123456789"), role="dj") + db.add(other) + db.commit() + db.refresh(other) + connector = _make_connector(db, other) + resp = client.post( + f"/api/llm/connectors/{connector.id}/stream-test", headers=auth_headers + ) + assert resp.status_code == 404 + + +def test_stream_test_streams_chunks(client, db, test_user, auth_headers, monkeypatch): + connector = _make_connector(db, test_user) + + async def _fake_stream(self, request): + yield ChatResponseChunk(text_delta="Hi") + yield ChatResponseChunk(text_delta=" there") + yield ChatResponseChunk( + stop_reason="end_turn", usage=TokenUsage(prompt=2, completion=2), done=True + ) + + monkeypatch.setattr(OpenAIApiKeyAdapter, "stream", _fake_stream) + + resp = client.post( + f"/api/llm/connectors/{connector.id}/stream-test", headers=auth_headers + ) + assert resp.status_code == 200 + assert resp.headers["content-type"].startswith("text/event-stream") + body = resp.text + # Each SSE event line starts with "data:". Reconstruct the JSON payloads. + payloads = [ + json.loads(line[len("data:") :].strip()) + for line in body.splitlines() + if line.startswith("data:") + ] + text = "".join(p.get("text_delta", "") for p in payloads) + assert "Hi there" in text + assert any(p.get("done") for p in payloads) +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd server && .venv/bin/pytest tests/test_llm_stream_endpoint.py -q` +Expected: FAIL — 404/405 (endpoint missing) + +- [ ] **Step 3: Implement the SSE endpoint in `llm.py`** + +Add imports (merge with existing): + +```python +import json as _json + +from sse_starlette.sse import EventSourceResponse + +from app.models.user import User +from app.services.llm.base import ChatRequest, Message +from app.services.llm.exceptions import LlmError, NoLlmConfigured +from app.services.llm.gateway import Gateway +``` + +Add the endpoint (place after `test_connector`): + +```python +# A short, fixed prompt for the streaming health probe. Streams a single +# sentence so the DJ sees tokens arrive in real time, exercising the full +# resolve → adapter.stream → SSE path end-to-end. +_STREAM_TEST_PROMPT = "Reply with one short friendly sentence confirming you are online." + + +@router.post("/connectors/{connector_id}/stream-test") +@limiter.limit("10/minute") +async def stream_test_connector( + request: FastAPIRequest, + connector_id: int, + user: User = Depends(get_current_active_user), + db: Session = Depends(get_db), +) -> EventSourceResponse: + """Stream a short sentence through the connector as ``text/event-stream``. + + Validates ownership up front (404 for connectors the DJ doesn't own — never + leaks existence). Each SSE ``data:`` frame is a JSON ``ChatResponseChunk``. + On a typed gateway error, a terminal ``event: error`` frame is emitted with a + sanitised code (never the upstream payload), then the stream ends. Client + disconnect cancels the upstream provider request (the gateway generator's + ``finally`` writes the counts-only call log + closes the adapter). + """ + row = _get_owned_connector_or_404(db, connector_id, user.id) + + chat_request = ChatRequest( + messages=[Message(role="user", content=_STREAM_TEST_PROMPT)], + max_tokens=64, + temperature=0.0, + model=row.model_hint or None, + ) + + async def _publisher(): + try: + async for chunk in Gateway.stream( + db, user, chat_request, purpose="stream_test" + ): + yield {"data": _json.dumps(chunk.model_dump())} + except NoLlmConfigured: + yield {"event": "error", "data": _json.dumps({"code": "no_connector"})} + except LlmError as exc: + # Map to a sanitised, stable code — never echo the provider message. + code = type(exc).__name__ + logger.info("stream-test failed for connector %s: %s", connector_id, code) + yield {"event": "error", "data": _json.dumps({"code": code})} + + return EventSourceResponse( + _publisher(), + media_type="text/event-stream", + headers={"X-Accel-Buffering": "no"}, + ) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `cd server && .venv/bin/pytest tests/test_llm_stream_endpoint.py -q` +Expected: PASS (3 tests) + +- [ ] **Step 5: Commit** + +```bash +git add server/app/api/llm.py server/tests/test_llm_stream_endpoint.py +git commit -m "feat(api): authenticated SSE stream-test endpoint for connectors" +``` + +--- + +## Task 9: Backend CI green (ruff / format / bandit / full pytest) + +**Files:** none new — fix-ups only. + +- [ ] **Step 1: Auto-format + lint-fix** + +Run: `cd server && .venv/bin/ruff format . && .venv/bin/ruff check --fix .` + +- [ ] **Step 2: Lint check** + +Run: `cd server && .venv/bin/ruff check . && .venv/bin/ruff format --check .` +Expected: no errors. If `_normalise_finish_reason` import triggers a private-import lint (PLC2701), keep the `# noqa` already added in Task 3, or inline a local copy of the 4-line mapping function into `streaming.py` to avoid importing a private name. + +- [ ] **Step 3: Bandit** + +Run: `cd server && .venv/bin/bandit -r app -c pyproject.toml -q` +Expected: no new findings (the `# nosec B106` on the Authorization header is preserved). + +- [ ] **Step 4: Full backend test suite + coverage gate** + +Run: `cd server && .venv/bin/pytest --tb=short -q` +Expected: PASS, coverage ≥ gate. If new streaming files drag coverage, the dedicated stream tests above should cover them; add targeted tests for any uncovered branch the report flags. + +- [ ] **Step 5: Commit any fix-ups** + +```bash +git add -A +git commit -m "chore(llm): backend lint/format/coverage fix-ups for streaming" +``` + +--- + +## Task 10: Frontend SSE consumer `streamConnectorTest` + +**Files:** +- Modify: `dashboard/lib/api.ts` +- Test: `dashboard/lib/__tests__/api.test.ts` (append) + +- [ ] **Step 1: Write the failing test** + +Append to `dashboard/lib/__tests__/api.test.ts` (match the file's existing import + setup style): + +```typescript +describe('streamConnectorTest', () => { + it('parses SSE data frames and invokes onChunk per frame', async () => { + const sse = + 'data: {"text_delta":"Hi","done":false}\n\n' + + 'data: {"text_delta":" there","done":false}\n\n' + + 'data: {"text_delta":"","stop_reason":"end_turn","done":true}\n\n'; + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(sse)); + controller.close(); + }, + }); + const fetchMock = vi.fn().mockResolvedValue( + new Response(stream, { + status: 200, + headers: { 'Content-Type': 'text/event-stream' }, + }), + ); + vi.stubGlobal('fetch', fetchMock); + + apiClient.setToken('jwt-token'); + const chunks: Array<{ text_delta?: string; done?: boolean }> = []; + await apiClient.streamConnectorTest(7, (c) => chunks.push(c)); + + expect(chunks.map((c) => c.text_delta).join('')).toBe('Hi there'); + expect(chunks.at(-1)?.done).toBe(true); + // Auth header present. + const init = fetchMock.mock.calls[0][1] as RequestInit; + const headers = new Headers(init.headers); + expect(headers.get('Authorization')).toBe('Bearer jwt-token'); + vi.unstubAllGlobals(); + }); +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd dashboard && npm test -- --run lib/__tests__/api.test.ts` +Expected: FAIL — `apiClient.streamConnectorTest is not a function` + +- [ ] **Step 3: Add the type + method to `api.ts`** + +Add near the other LLM types (search for `LlmConnectorTestResult`): + +```typescript +export interface LlmStreamChunk { + text_delta?: string; + tool_call_deltas?: Array<{ + index: number; + id?: string | null; + name?: string | null; + input_json_fragment?: string; + }>; + stop_reason?: 'end_turn' | 'tool_use' | 'max_tokens' | 'error' | null; + usage?: { prompt: number; completion: number } | null; + done?: boolean; +} +``` + +Add the method to the `ApiClient` class (near `testLlmConnector`): + +```typescript + /** + * Stream a short health-check sentence through a connector via SSE. + * + * Uses fetch + ReadableStream rather than EventSource because EventSource + * cannot send the Authorization header this authenticated endpoint requires. + * Pass an AbortSignal to cancel — aborting closes the connection, which the + * backend treats as a client disconnect and cancels the upstream provider + * request. ``onChunk`` is invoked for every parsed SSE data frame. + */ + async streamConnectorTest( + id: number, + onChunk: (chunk: LlmStreamChunk) => void, + signal?: AbortSignal, + ): Promise { + const headers = new Headers({ Accept: 'text/event-stream' }); + if (this.token) headers.set('Authorization', `Bearer ${this.token}`); + + const response = await fetch( + `${getApiUrl()}/api/llm/connectors/${id}/stream-test`, + { method: 'POST', headers, signal }, + ); + if (!response.ok || !response.body) { + if (response.status === 401 && this.onUnauthorized) this.onUnauthorized(); + throw new ApiError('Stream test failed', response.status); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + // SSE frames are separated by a blank line. + let sep: number; + while ((sep = buffer.indexOf('\n\n')) !== -1) { + const frame = buffer.slice(0, sep); + buffer = buffer.slice(sep + 2); + for (const line of frame.split('\n')) { + if (!line.startsWith('data:')) continue; + const data = line.slice('data:'.length).trim(); + if (!data || data === '[DONE]') continue; + try { + onChunk(JSON.parse(data) as LlmStreamChunk); + } catch { + // Ignore unparseable keepalive frames. + } + } + } + } + } finally { + reader.releaseLock(); + } + } +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `cd dashboard && npm test -- --run lib/__tests__/api.test.ts` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add dashboard/lib/api.ts dashboard/lib/__tests__/api.test.ts +git commit -m "feat(ai-ui): SSE stream consumer for connector stream-test" +``` + +--- + +## Task 11: Minimal UI consumer wiring (admin/ai stream test) + scope note + +**Files:** +- Modify: `dashboard/app/admin/ai/page.tsx` + +**Decision:** The recommendation flow is a backend background pipeline that returns a final JSON payload to the UI (not a live token feed), so retrofitting it to SSE would be a large, risky change outside this issue's intent. Per the issue's "use reasonable judgment on scope and document it", the frontend consumer is the reusable `apiClient.streamConnectorTest` plumbing (Task 10) plus a minimal live "Stream test" affordance on the existing AI settings surface. The recommendation UI migration to SSE is explicitly deferred (future set-builder UI, §11.6) and noted in the PR body. + +- [ ] **Step 1: Read the admin/ai page to find the connector row / actions area** + +Run: `cd dashboard && grep -n "testLlmConnector\|Test\|connector" app/admin/ai/page.tsx | head -30` + +- [ ] **Step 2: Add a "Stream test" button that appends streamed text into local state** + +Add (adapt names to the file's existing component structure — this is the behavior to wire, not a verbatim drop-in): + +```tsx +// Local state near the component's other useState hooks: +const [streamText, setStreamText] = useState(''); +const [streaming, setStreaming] = useState(null); + +async function handleStreamTest(connectorId: number) { + setStreamText(''); + setStreaming(connectorId); + try { + await apiClient.streamConnectorTest(connectorId, (chunk) => { + if (chunk.text_delta) setStreamText((prev) => prev + chunk.text_delta); + }); + } catch { + setStreamText('(stream test failed)'); + } finally { + setStreaming(null); + } +} +``` + +And in the per-connector action area, next to the existing test button: + +```tsx + +{streaming === connector.id && streamText && ( +

{streamText}

+)} +``` + +- [ ] **Step 3: Type-check + existing page tests** + +Run: `cd dashboard && npx tsc --noEmit` +Run: `cd dashboard && npm test -- --run app/admin/ai` +Expected: PASS. If the admin/ai page has snapshot/DOM tests that assert exact button sets, update those fixtures to include the new button. + +- [ ] **Step 4: Commit** + +```bash +git checkout dashboard/next-env.d.ts 2>/dev/null || true +git add dashboard/app/admin/ai/page.tsx +git commit -m "feat(ai-ui): minimal live stream-test affordance on AI settings" +``` + +--- + +## Task 12: Full local CI sweep + finishing the branch + +**Files:** none new. + +- [ ] **Step 1: Backend CI** + +Run from `server/`: +```bash +.venv/bin/ruff check . +.venv/bin/ruff format --check . +.venv/bin/bandit -r app -c pyproject.toml -q +.venv/bin/pytest --tb=short -q +``` +Expected: all green, coverage gate satisfied. + +- [ ] **Step 2: Frontend CI** + +Run from `dashboard/`: +```bash +npm run lint +npx tsc --noEmit +npm test -- --run +``` +Expected: all green. Then `git checkout dashboard/next-env.d.ts` if auto-modified. + +- [ ] **Step 3: Confirm no Alembic migration was introduced** + +Run: `cd server && git diff --name-only origin/epic/ai-engine...HEAD | grep alembic || echo "no migrations — correct"` +Expected: `no migrations — correct` (streaming requires no schema change). + +- [ ] **Step 4: Use superpowers:finishing-a-development-branch (option 2: Push + PR)** + +Create the PR with `gh pr create --base epic/ai-engine`. PR body must include `Closes #335`, a `## Design decisions` section, and a note that the PR targets `epic/ai-engine`. + +--- + +## Self-Review + +**Spec coverage (issue #335 acceptance criteria):** +- `Gateway.stream(...) -> AsyncIterator[ChatResponseChunk]` → Task 7. ✅ +- `ChatResponseChunk` carries incremental text + partial tool_calls + final stop_reason + usage → Task 1 (model), Tasks 3/4/6 (population). ✅ +- Each adapter implements provider-native streaming (OpenAI, Anthropic, OpenAI-compatible) → Tasks 5, 6. ✅ +- Non-streaming adapters degrade gracefully (`StreamingUnsupported`) → Tasks 1, 2. ✅ +- SSE backend endpoint (text/event-stream) → Task 8. ✅ +- Tool-use mid-stream parses across providers (OpenAI partial JSON, Anthropic delta blocks) → Task 3 (OpenAI tool frags), Task 6 (`input_json_delta`). ✅ +- Cancellation propagates upstream (frontend disconnect → adapter cancels upstream) → Task 7 (`GeneratorExit` → adapter `async with` cleanup closes httpx/SDK stream), Task 10 (`AbortSignal`). ✅ +- Counts-only call log + audit consistency with non-stream path → Task 7 `_attempt_stream`. ✅ +- Frontend consumer → Tasks 10 (plumbing) + 11 (minimal UI, recommendation-migration deferral documented). ✅ + +**Placeholder scan:** No TBD/TODO. Frontend Task 11 step 2 is explicitly behavior-to-wire (adapt to existing component) because the exact JSX scaffold depends on the live file — the implementer reads it in step 1. + +**Type consistency:** `ChatResponseChunk` fields (`text_delta`, `tool_call_deltas`, `stop_reason`, `usage`, `done`) and `ToolCallDelta` fields (`index`, `id`, `name`, `input_json_fragment`) are used identically in base.py, streaming.py, adapters, gateway, endpoint, and frontend type. `stream_openai_chat` signature matches its callers in both OpenAI adapters. `Gateway.stream` / `_attempt_stream` signatures match `dispatch` / `_attempt`. diff --git a/docs/superpowers/plans/2026-05-28-per-feature-connector-preference.md b/docs/superpowers/plans/2026-05-28-per-feature-connector-preference.md new file mode 100644 index 00000000..58fc2f36 --- /dev/null +++ b/docs/superpowers/plans/2026-05-28-per-feature-connector-preference.md @@ -0,0 +1,1314 @@ +# Per-Feature Connector Preference Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Let each DJ pin a specific LLM connector to a specific agentic feature (e.g. recommendation → connector A, set_builder → connector B), with graceful fallback when the pinned connector is gone or auth-invalid. + +**Architecture:** A new `LlmFeaturePreference` table maps `(user_id, feature) → connector_id` with a UNIQUE constraint. `Gateway.dispatch` already receives `purpose` (the feature key), so resolution gains a new first step: look up the DJ's pinned connector for `purpose`, use it if active, else fall through to the existing chain (per-DJ default → MRU → org default → `NoLlmConfigured`). New `/api/llm/feature-preferences` endpoints (set/clear/list) are scoped to the current DJ and validate connector ownership + feature against an allowlist. The DJ AI settings UI gains a "Per-feature defaults" section. + +**Tech Stack:** FastAPI, SQLAlchemy 2.0, Alembic, slowapi, Pydantic v2, Next.js/React 19/TypeScript, vitest. + +--- + +## File Structure + +**Backend (create):** +- `server/app/models/llm_feature_preference.py` — the new model + feature allowlist constants. +- `server/alembic/versions/050_llm_feature_preference.py` — migration (down_revision = `049`). +- `server/tests/test_llm_feature_preference.py` — model + gateway resolution + endpoint tests. + +**Backend (modify):** +- `server/app/models/__init__.py` — register `LlmFeaturePreference`. +- `server/app/services/llm/connector_storage.py` — feature-preference CRUD helpers. +- `server/app/services/llm/gateway.py` — add feature-preference as the first resolution step. +- `server/app/api/llm.py` — set/clear/list feature-preference endpoints. +- `server/app/schemas/llm.py` — request/response schemas + known-feature constant. + +**Frontend (modify):** +- `dashboard/lib/api.ts` — `listFeaturePreferences`, `setFeaturePreference`, `clearFeaturePreference`. +- `dashboard/components/AiProvidersSection.tsx` — "Per-feature defaults" section. +- `dashboard/lib/api-types.ts` — re-export the new generated schema types. +- `dashboard/lib/api-types.generated.ts` — regenerated from OpenAPI (via `npm run types:export && npm run types:generate`). + +**Design decisions (locked in):** +- Feature key reuses the gateway `purpose` string. Known features allowlist: `{"recommendation", "set_builder"}`. `recommendation` is the only `purpose` in use today; `set_builder` is named in the issue spec for an upcoming feature. The allowlist lives in one place (`schemas/llm.py`) and is imported by both the API validation and the model docstring reference. +- The endpoint surface is `POST /api/llm/feature-preferences` (upsert set), `DELETE /api/llm/feature-preferences/{feature}` (clear), `GET /api/llm/feature-preferences` (list). Upsert semantics keep "set" and "change" as one operation (the UNIQUE constraint makes change == replace). +- Ownership: setting a preference validates the connector belongs to the current DJ (404 if not, mirroring the existing connector-ownership 404 convention so another DJ's connector existence is never leaked). +- Graceful fallback: gateway resolution skips a pinned preference whose connector is deleted (FK row gone) or whose status != `active`. No exception — falls through to the next resolution step. +- We do NOT add a frontend "set inactive connector" guard beyond what the picker offers; the gateway already skips inactive pins, and the API rejects pinning a non-active connector with 400 (mirrors the per-DJ default endpoint), so a DJ can't silently break their own routing. + +--- + +## Task 1: LlmFeaturePreference model + feature allowlist + +**Files:** +- Create: `server/app/models/llm_feature_preference.py` +- Modify: `server/app/models/__init__.py` +- Test: `server/tests/test_llm_feature_preference.py` + +- [ ] **Step 1: Write the failing test** + +Create `server/tests/test_llm_feature_preference.py`: + +```python +"""Tests for per-feature connector preference (issue #337).""" + +from __future__ import annotations + +import json + +import pytest +from sqlalchemy.exc import IntegrityError + +from app.models.llm_connector import LlmConnector +from app.models.llm_feature_preference import KNOWN_FEATURES, LlmFeaturePreference +from app.models.user import User +from app.services.auth import get_password_hash + + +@pytest.fixture +def dj_user(db) -> User: + user = User( + username="prefdj", + password_hash=get_password_hash("password123"), + role="dj", + ) + db.add(user) + db.commit() + db.refresh(user) + return user + + +def _make_connector(db, user, *, display_name="Pref connector", status="active"): + row = LlmConnector( + user_id=user.id, + connector_type="openai_apikey", + display_name=display_name, + status=status, + credentials=json.dumps({"api_key": "sk-fake-key"}), + model_hint="gpt-5-mini", + ) + db.add(row) + db.commit() + db.refresh(row) + return row + + +def test_known_features_contains_recommendation_and_set_builder(): + assert "recommendation" in KNOWN_FEATURES + assert "set_builder" in KNOWN_FEATURES + + +def test_unique_constraint_one_pref_per_user_feature(db, dj_user): + c1 = _make_connector(db, dj_user, display_name="A") + c2 = _make_connector(db, dj_user, display_name="B") + db.add( + LlmFeaturePreference(user_id=dj_user.id, feature="recommendation", connector_id=c1.id) + ) + db.commit() + db.add( + LlmFeaturePreference(user_id=dj_user.id, feature="recommendation", connector_id=c2.id) + ) + with pytest.raises(IntegrityError): + db.commit() + db.rollback() +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q` +Expected: FAIL with `ModuleNotFoundError: No module named 'app.models.llm_feature_preference'` + +- [ ] **Step 3: Write the model** + +Create `server/app/models/llm_feature_preference.py`: + +```python +"""Per-feature connector preference — pins a DJ's connector to a feature. + +A DJ can pin the recommendation engine to one connector and the set-builder +to another. The gateway consults this table first (keyed by ``purpose``) +before falling back to the per-DJ default / MRU / org-default chain. + +See issue #337, spec §11.8. +""" + +from datetime import datetime + +from sqlalchemy import DateTime, ForeignKey, Integer, String, UniqueConstraint, func +from sqlalchemy.orm import Mapped, mapped_column + +from app.models.base import Base + +# Allowlist of feature keys a DJ may pin. These mirror the gateway ``purpose`` +# strings. ``recommendation`` is the only purpose dispatched today; +# ``set_builder`` is reserved for the upcoming set-builder feature (issue spec +# §11.8). Validation of API input against this set lives in ``schemas/llm.py`` +# (KNOWN_FEATURES is re-exported there to keep a single source of truth). +KNOWN_FEATURES = frozenset({"recommendation", "set_builder"}) + + +class LlmFeaturePreference(Base): + """Maps ``(user_id, feature)`` to a pinned ``connector_id``. + + At most one row per ``(user_id, feature)`` — enforced by a UNIQUE + constraint. Deleting the connector cascades (ON DELETE CASCADE) so a stale + preference never points at a missing connector. + """ + + __tablename__ = "llm_feature_preferences" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + user_id: Mapped[int] = mapped_column( + ForeignKey("users.id", ondelete="CASCADE"), index=True, nullable=False + ) + feature: Mapped[str] = mapped_column(String(40), nullable=False) + connector_id: Mapped[int] = mapped_column( + ForeignKey("llm_connectors.id", ondelete="CASCADE"), index=True, nullable=False + ) + created_at: Mapped[datetime] = mapped_column( + DateTime, nullable=False, server_default=func.now() + ) + + __table_args__ = ( + UniqueConstraint("user_id", "feature", name="uq_llm_feature_pref_user_feature"), + ) +``` + +- [ ] **Step 4: Register the model** + +Modify `server/app/models/__init__.py` — add the import after the `llm_connector` import line and the name to `__all__` (alphabetical-ish, keep grouped with other Llm names): + +```python +from app.models.llm_connector import LlmAuditEvent, LlmCallLog, LlmConnector +from app.models.llm_feature_preference import LlmFeaturePreference +``` + +And add `"LlmFeaturePreference",` to the `__all__` list (right after `"LlmConnector",`). + +- [ ] **Step 5: Run test to verify it passes** + +Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q` +Expected: PASS (3 tests) + +- [ ] **Step 6: Commit** + +```bash +git add server/app/models/llm_feature_preference.py server/app/models/__init__.py server/tests/test_llm_feature_preference.py +git commit -m "feat(llm): add LlmFeaturePreference model + feature allowlist" +``` + +--- + +## Task 2: Alembic migration + +**Files:** +- Create: `server/alembic/versions/050_llm_feature_preference.py` + +- [ ] **Step 1: Write the migration** + +Create `server/alembic/versions/050_llm_feature_preference.py`: + +```python +"""Add llm_feature_preferences table. + +Revision ID: 050 +Revises: 049 +Create Date: 2026-05-28 + +Per-feature connector preference (issue #337). Maps ``(user_id, feature)`` to a +pinned ``connector_id`` with a UNIQUE constraint so a DJ has at most one pinned +connector per feature. Both FKs cascade on delete so a deleted user or +connector never leaves a dangling preference. +""" + +import sqlalchemy as sa + +from alembic import op + +revision: str = "050" +down_revision: str | None = "049" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.create_table( + "llm_feature_preferences", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("user_id", sa.Integer(), nullable=False), + sa.Column("feature", sa.String(length=40), nullable=False), + sa.Column("connector_id", sa.Integer(), nullable=False), + sa.Column( + "created_at", + sa.DateTime(), + server_default=sa.func.now(), + nullable=False, + ), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["connector_id"], ["llm_connectors.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("user_id", "feature", name="uq_llm_feature_pref_user_feature"), + ) + op.create_index( + "ix_llm_feature_preferences_user_id", + "llm_feature_preferences", + ["user_id"], + ) + op.create_index( + "ix_llm_feature_preferences_connector_id", + "llm_feature_preferences", + ["connector_id"], + ) + + +def downgrade() -> None: + op.drop_index("ix_llm_feature_preferences_connector_id", table_name="llm_feature_preferences") + op.drop_index("ix_llm_feature_preferences_user_id", table_name="llm_feature_preferences") + op.drop_table("llm_feature_preferences") +``` + +- [ ] **Step 2: Run migration + drift check** + +Run: `cd server && .venv/bin/alembic upgrade head && .venv/bin/alembic check` +Expected: `upgrade` runs cleanly to revision `050`, and `alembic check` prints `No new upgrade operations detected.` + +If `alembic check` reports drift, reconcile the migration columns/indexes with the model (`index=True` on `user_id` and `connector_id` matches the two `create_index` calls). + +- [ ] **Step 3: Commit** + +```bash +git add server/alembic/versions/050_llm_feature_preference.py +git commit -m "feat(llm): migration 050 for llm_feature_preferences" +``` + +--- + +## Task 3: connector_storage CRUD helpers + +**Files:** +- Modify: `server/app/services/llm/connector_storage.py` +- Test: `server/tests/test_llm_feature_preference.py` + +- [ ] **Step 1: Write the failing test** + +Append to `server/tests/test_llm_feature_preference.py`: + +```python +def test_set_feature_preference_upserts(db, dj_user): + from app.services.llm.connector_storage import ( + get_feature_preferences_for_user, + set_feature_preference, + ) + + c1 = _make_connector(db, dj_user, display_name="A") + c2 = _make_connector(db, dj_user, display_name="B") + + set_feature_preference(db, user_id=dj_user.id, feature="recommendation", connector_id=c1.id) + db.commit() + prefs = get_feature_preferences_for_user(db, dj_user.id) + assert {p.feature: p.connector_id for p in prefs} == {"recommendation": c1.id} + + # Re-set the same feature → replace, not duplicate. + set_feature_preference(db, user_id=dj_user.id, feature="recommendation", connector_id=c2.id) + db.commit() + prefs = get_feature_preferences_for_user(db, dj_user.id) + assert {p.feature: p.connector_id for p in prefs} == {"recommendation": c2.id} + + +def test_clear_feature_preference_removes_row(db, dj_user): + from app.services.llm.connector_storage import ( + clear_feature_preference, + get_feature_preferences_for_user, + set_feature_preference, + ) + + c1 = _make_connector(db, dj_user, display_name="A") + set_feature_preference(db, user_id=dj_user.id, feature="recommendation", connector_id=c1.id) + db.commit() + + removed = clear_feature_preference(db, user_id=dj_user.id, feature="recommendation") + db.commit() + assert removed is True + assert get_feature_preferences_for_user(db, dj_user.id) == [] + + # Clearing a non-existent preference is a no-op (returns False). + assert clear_feature_preference(db, user_id=dj_user.id, feature="recommendation") is False +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q` +Expected: FAIL with `ImportError: cannot import name 'set_feature_preference'` + +- [ ] **Step 3: Add the helpers** + +In `server/app/services/llm/connector_storage.py`, add the model import to the existing `from app.models.llm_connector import (...)` block is NOT possible (different module). Add a new import near the top imports: + +```python +from app.models.llm_feature_preference import LlmFeaturePreference +``` + +Then add these functions (place them after `unset_default_for_user`): + +```python +def get_feature_preferences_for_user(db: Session, user_id: int) -> list[LlmFeaturePreference]: + """Return all of a DJ's per-feature connector pins.""" + return ( + db.query(LlmFeaturePreference) + .filter(LlmFeaturePreference.user_id == user_id) + .order_by(LlmFeaturePreference.feature.asc()) + .all() + ) + + +def get_feature_preference( + db: Session, *, user_id: int, feature: str +) -> LlmFeaturePreference | None: + """Return the DJ's pin for ``feature``, or ``None`` if unset.""" + return ( + db.query(LlmFeaturePreference) + .filter( + LlmFeaturePreference.user_id == user_id, + LlmFeaturePreference.feature == feature, + ) + .one_or_none() + ) + + +def set_feature_preference( + db: Session, *, user_id: int, feature: str, connector_id: int +) -> LlmFeaturePreference: + """Upsert the DJ's pin for ``feature`` → ``connector_id``. Caller commits. + + Replace-in-place when a row already exists so the UNIQUE constraint on + ``(user_id, feature)`` is never violated. + """ + existing = get_feature_preference(db, user_id=user_id, feature=feature) + if existing is not None: + existing.connector_id = connector_id + db.flush() + return existing + row = LlmFeaturePreference(user_id=user_id, feature=feature, connector_id=connector_id) + db.add(row) + db.flush() + return row + + +def clear_feature_preference(db: Session, *, user_id: int, feature: str) -> bool: + """Delete the DJ's pin for ``feature``. Returns True iff a row was removed. + + Caller commits. + """ + existing = get_feature_preference(db, user_id=user_id, feature=feature) + if existing is None: + return False + db.delete(existing) + db.flush() + return True +``` + +Add the four function names to the `__all__` list alphabetically: +`"clear_feature_preference",`, `"get_feature_preference",`, `"get_feature_preferences_for_user",`, `"set_feature_preference",`. + +- [ ] **Step 4: Run test to verify it passes** + +Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add server/app/services/llm/connector_storage.py server/tests/test_llm_feature_preference.py +git commit -m "feat(llm): feature-preference CRUD helpers in connector_storage" +``` + +--- + +## Task 4: Gateway resolution — feature preference first + +**Files:** +- Modify: `server/app/services/llm/gateway.py` +- Test: `server/tests/test_llm_feature_preference.py` + +- [ ] **Step 1: Write the failing test** + +Append to `server/tests/test_llm_feature_preference.py`: + +```python +from unittest.mock import AsyncMock, patch # noqa: E402 (grouped with gateway tests) + +from app.services.llm.adapters.openai_apikey import OpenAIApiKeyAdapter # noqa: E402 +from app.services.llm.base import ChatRequest, ChatResponse, Message, TokenUsage # noqa: E402 +from app.services.llm.gateway import Gateway # noqa: E402 + + +def _ok_response() -> ChatResponse: + return ChatResponse( + text="ok", tool_calls=[], stop_reason="end_turn", usage=TokenUsage(prompt=1, completion=1) + ) + + +@pytest.mark.asyncio +async def test_gateway_prefers_feature_pin_over_default(db, dj_user): + from app.services.llm.connector_storage import set_default_for_user, set_feature_preference + + pinned = _make_connector(db, dj_user, display_name="pinned") + other = _make_connector(db, dj_user, display_name="default") + set_default_for_user(db, connector=other) # per-DJ default points elsewhere + set_feature_preference( + db, user_id=dj_user.id, feature="recommendation", connector_id=pinned.id + ) + db.commit() + + captured = {} + + async def fake_chat(self, request): # noqa: ANN001 + captured["connector_id"] = self.connector.id + return _ok_response() + + with patch.object(OpenAIApiKeyAdapter, "chat", new=fake_chat): + await Gateway.dispatch( + db, + dj_user, + ChatRequest(messages=[Message(role="user", content="hi")]), + purpose="recommendation", + ) + assert captured["connector_id"] == pinned.id + + +@pytest.mark.asyncio +async def test_gateway_falls_back_when_pinned_connector_auth_invalid(db, dj_user): + from app.services.llm.connector_storage import set_default_for_user, set_feature_preference + + pinned = _make_connector(db, dj_user, display_name="pinned", status="auth_invalid") + fallback = _make_connector(db, dj_user, display_name="fallback") + set_default_for_user(db, connector=fallback) + set_feature_preference( + db, user_id=dj_user.id, feature="recommendation", connector_id=pinned.id + ) + db.commit() + + captured = {} + + async def fake_chat(self, request): # noqa: ANN001 + captured["connector_id"] = self.connector.id + return _ok_response() + + with patch.object(OpenAIApiKeyAdapter, "chat", new=fake_chat): + await Gateway.dispatch( + db, + dj_user, + ChatRequest(messages=[Message(role="user", content="hi")]), + purpose="recommendation", + ) + # Skips the auth_invalid pin, falls through to the per-DJ default. + assert captured["connector_id"] == fallback.id + + +@pytest.mark.asyncio +async def test_gateway_ignores_pin_for_unknown_feature(db, dj_user): + """A pin set for one feature must not leak into another purpose.""" + from app.services.llm.connector_storage import set_feature_preference + + pinned = _make_connector(db, dj_user, display_name="pinned") + mru = _make_connector(db, dj_user, display_name="mru") + set_feature_preference( + db, user_id=dj_user.id, feature="recommendation", connector_id=pinned.id + ) + db.commit() + + captured = {} + + async def fake_chat(self, request): # noqa: ANN001 + captured["connector_id"] = self.connector.id + return _ok_response() + + with patch.object(OpenAIApiKeyAdapter, "chat", new=fake_chat): + await Gateway.dispatch( + db, + dj_user, + ChatRequest(messages=[Message(role="user", content="hi")]), + purpose="set_builder", + ) + # No pin for set_builder → MRU resolution (most recently created here is `mru`). + assert captured["connector_id"] == mru.id +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q -k gateway` +Expected: FAIL — the pin is ignored because `_resolve_connector` doesn't know about `purpose`. + +- [ ] **Step 3: Thread purpose into resolution** + +In `server/app/services/llm/gateway.py`: + +Add the storage import near the existing imports: + +```python +from app.services.llm.connector_storage import audit_event, get_feature_preference, log_call +``` + +(modify the existing `from app.services.llm.connector_storage import audit_event, log_call` line) + +In `Gateway.dispatch`, change the resolve call to pass `purpose`: + +```python + primary = _resolve_connector(db, actor, purpose=purpose) +``` + +Update `_resolve_connector`'s signature and add the feature-preference step as the FIRST check inside the `if actor is not None:` block: + +```python +def _resolve_connector(db: Session, actor: User | None, *, purpose: str) -> LlmConnector: + if actor is not None: + # 0. Per-feature pin (issue #337) takes precedence over the per-DJ + # default and MRU. Skipped gracefully when the pinned connector was + # deleted (FK row gone) or is no longer active, so a stale/broken + # pin never silently breaks the DJ — resolution falls through. + pref = get_feature_preference(db, user_id=actor.id, feature=purpose) + if pref is not None: + pinned = db.get(LlmConnector, pref.connector_id) + if ( + pinned is not None + and pinned.user_id == actor.id + and pinned.status == STATUS_ACTIVE + ): + return pinned + + # Per-DJ explicit default takes precedence over MRU (issue #336). + ... +``` + +(Leave the rest of `_resolve_connector` unchanged — the `pinned` default block, the MRU block, the org-default fallback.) + +- [ ] **Step 4: Run test to verify it passes** + +Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q -k gateway` +Expected: PASS + +Then run the full gateway suite to confirm no regression: +Run: `cd server && .venv/bin/pytest tests/test_llm_gateway.py tests/test_llm_default_connector.py -q` +Expected: PASS + +- [ ] **Step 5: Update gateway module docstring** + +In `server/app/services/llm/gateway.py`, update the "Resolution order" docstring at the top to list the feature-preference step first: + +``` +Resolution order: +1. If ``actor`` is not ``None``: + a. The DJ's per-feature pin for ``purpose`` if set and the pinned connector + is active (``LlmFeaturePreference`` — issue #337). + b. Else: the DJ's explicit default active connector if one is pinned + (``LlmConnector.is_default = True``) — issue #336. + c. Else: most-recently-used active connector for the DJ. +2. Else: ``SystemSettings.llm_default_connector_id`` if set and active. +3. Else: raise :class:`NoLlmConfigured`. +``` + +- [ ] **Step 6: Commit** + +```bash +git add server/app/services/llm/gateway.py server/tests/test_llm_feature_preference.py +git commit -m "feat(llm): gateway resolves per-feature pin first, falls back gracefully" +``` + +--- + +## Task 5: API schemas + +**Files:** +- Modify: `server/app/schemas/llm.py` + +- [ ] **Step 1: Add the schemas + feature literal** + +In `server/app/schemas/llm.py`, after the existing imports add the known-feature import + a `Literal`-derived alias. Near the top (after `from typing import Literal`): + +```python +from app.models.llm_feature_preference import KNOWN_FEATURES + +# Sorted tuple so the OpenAPI enum + frontend list are deterministic. +KNOWN_FEATURE_VALUES: tuple[str, ...] = tuple(sorted(KNOWN_FEATURES)) +FeatureKey = Literal["recommendation", "set_builder"] +``` + +At the end of the file add: + +```python +class FeaturePreferenceOut(BaseModel): + """A single per-feature connector pin.""" + + model_config = ConfigDict(from_attributes=True) + + feature: FeatureKey + connector_id: int + + +class FeaturePreferencesListOut(BaseModel): + """All of a DJ's per-feature pins + the catalogue of pinnable features.""" + + preferences: list[FeaturePreferenceOut] + known_features: list[FeatureKey] + + +class FeaturePreferenceSet(BaseModel): + """Set/change a per-feature pin. Upsert — replaces any existing pin.""" + + feature: FeatureKey + connector_id: int = Field(..., ge=1) +``` + +> Note: `FeatureKey` is hand-maintained to match `KNOWN_FEATURES` (Pydantic `Literal` can't be built from a runtime frozenset and still emit a static OpenAPI enum). The model docstring in `llm_feature_preference.py` flags that both must stay in sync; a test in Task 7 asserts they match. + +- [ ] **Step 2: Verify it imports** + +Run: `cd server && .venv/bin/python -c "from app.schemas.llm import FeaturePreferenceSet, FeaturePreferencesListOut, KNOWN_FEATURE_VALUES; print(KNOWN_FEATURE_VALUES)"` +Expected: prints `('recommendation', 'set_builder')` + +- [ ] **Step 3: Commit** + +```bash +git add server/app/schemas/llm.py +git commit -m "feat(llm): feature-preference API schemas" +``` + +--- + +## Task 6: API endpoints + +**Files:** +- Modify: `server/app/api/llm.py` +- Test: `server/tests/test_llm_feature_preference.py` + +- [ ] **Step 1: Write the failing test** + +Append to `server/tests/test_llm_feature_preference.py`: + +```python +from fastapi.testclient import TestClient # noqa: E402 + + +def _login(client: TestClient, username: str, password: str) -> dict[str, str]: + resp = client.post("/api/auth/login", data={"username": username, "password": password}) + assert resp.status_code == 200, resp.json() + return {"Authorization": f"Bearer {resp.json()['access_token']}"} + + +def test_set_list_clear_feature_preference_endpoints(client, db, test_user, auth_headers): + c = _make_connector(db, test_user, display_name="Endpoint connector") + + # Set + resp = client.post( + "/api/llm/feature-preferences", + json={"feature": "recommendation", "connector_id": c.id}, + headers=auth_headers, + ) + assert resp.status_code == 200, resp.json() + body = resp.json() + assert {p["feature"]: p["connector_id"] for p in body["preferences"]} == { + "recommendation": c.id + } + assert "set_builder" in body["known_features"] + + # List + resp = client.get("/api/llm/feature-preferences", headers=auth_headers) + assert resp.status_code == 200 + assert resp.json()["preferences"][0]["connector_id"] == c.id + + # Clear + resp = client.delete("/api/llm/feature-preferences/recommendation", headers=auth_headers) + assert resp.status_code == 200 + assert resp.json()["preferences"] == [] + + +def test_set_feature_preference_rejects_unknown_feature(client, db, test_user, auth_headers): + c = _make_connector(db, test_user, display_name="X") + resp = client.post( + "/api/llm/feature-preferences", + json={"feature": "totally_made_up", "connector_id": c.id}, + headers=auth_headers, + ) + assert resp.status_code == 422 # Pydantic Literal rejects it + + +def test_set_feature_preference_rejects_other_djs_connector( + client, db, test_user, auth_headers +): + # Another DJ owns this connector. + other = User( + username="otherdj", password_hash=get_password_hash("password123"), role="dj" + ) + db.add(other) + db.commit() + db.refresh(other) + foreign = _make_connector(db, other, display_name="Not yours") + + resp = client.post( + "/api/llm/feature-preferences", + json={"feature": "recommendation", "connector_id": foreign.id}, + headers=auth_headers, + ) + assert resp.status_code == 404 # ownership not leaked + + +def test_set_feature_preference_rejects_inactive_connector( + client, db, test_user, auth_headers +): + c = _make_connector(db, test_user, display_name="Broken", status="auth_invalid") + resp = client.post( + "/api/llm/feature-preferences", + json={"feature": "recommendation", "connector_id": c.id}, + headers=auth_headers, + ) + assert resp.status_code == 400 + + +def test_clear_unknown_feature_returns_422(client, auth_headers): + resp = client.delete("/api/llm/feature-preferences/bogus", headers=auth_headers) + assert resp.status_code == 422 +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q -k endpoint` +Expected: FAIL with 404 (route not found) on the first POST. + +- [ ] **Step 3: Add the endpoints** + +In `server/app/api/llm.py`: + +Add to the schema import block: + +```python +from app.schemas.llm import ( + ConnectorCreate, + ConnectorCredentialsRotate, + ConnectorOut, + ConnectorPatch, + ConnectorTestResult, + DjPolicyOut, + FeatureKey, + FeaturePreferenceSet, + FeaturePreferencesListOut, +) +``` + +Add to the connector_storage import block: + +```python +from app.services.llm.connector_storage import ( + ...existing names..., + clear_feature_preference, + get_feature_preferences_for_user, + set_feature_preference, +) +``` + +Add a small helper near `_get_owned_connector_or_404`: + +```python +def _feature_prefs_response(db: Session, user_id: int) -> FeaturePreferencesListOut: + """Build the list response: the DJ's current pins + the pinnable catalogue.""" + from app.schemas.llm import KNOWN_FEATURE_VALUES, FeaturePreferenceOut + + rows = get_feature_preferences_for_user(db, user_id) + return FeaturePreferencesListOut( + preferences=[FeaturePreferenceOut.model_validate(r) for r in rows], + known_features=list(KNOWN_FEATURE_VALUES), # type: ignore[arg-type] + ) +``` + +Add the three endpoints (place after the unset-default endpoint, before the delete-connector endpoint): + +```python +@router.get("/feature-preferences", response_model=FeaturePreferencesListOut) +@limiter.limit("60/minute") +def list_feature_preferences( + request: FastAPIRequest, + user: User = Depends(get_current_active_user), + db: Session = Depends(get_db), +) -> FeaturePreferencesListOut: + """List the DJ's per-feature connector pins (issue #337).""" + return _feature_prefs_response(db, user.id) + + +@router.post( + "/feature-preferences", + response_model=FeaturePreferencesListOut, + responses={ + 400: {"description": "Connector is not active and cannot be pinned."}, + 404: {"description": "Connector not found for current user."}, + }, +) +@limiter.limit("30/minute") +def set_feature_preference_endpoint( + request: FastAPIRequest, + payload: FeaturePreferenceSet, + user: User = Depends(get_current_active_user), + db: Session = Depends(get_db), +) -> FeaturePreferencesListOut: + """Pin (or re-pin) a connector to a feature for the current DJ. + + Validates connector ownership server-side (404 for IDs the DJ doesn't own, + so another DJ's connector existence is never leaked) and rejects pinning a + non-active connector (400) — the gateway would skip it anyway, so silently + accepting it is a footgun. + """ + row = _get_owned_connector_or_404(db, payload.connector_id, user.id) + if row.status != "active": + raise HTTPException( + status_code=400, + detail="Only an active connector can be pinned to a feature", + ) + set_feature_preference( + db, user_id=user.id, feature=payload.feature, connector_id=row.id + ) + db.commit() + return _feature_prefs_response(db, user.id) + + +@router.delete("/feature-preferences/{feature}", response_model=FeaturePreferencesListOut) +@limiter.limit("30/minute") +def clear_feature_preference_endpoint( + request: FastAPIRequest, + feature: FeatureKey, + user: User = Depends(get_current_active_user), + db: Session = Depends(get_db), +) -> FeaturePreferencesListOut: + """Clear the DJ's pin for ``feature`` (no-op if unset). Returns the new list.""" + clear_feature_preference(db, user_id=user.id, feature=feature) + db.commit() + return _feature_prefs_response(db, user.id) +``` + +> Path-param `feature: FeatureKey` makes FastAPI return 422 for unknown features automatically. + +- [ ] **Step 4: Run test to verify it passes** + +Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -x -q -k "endpoint or feature"` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add server/app/api/llm.py server/tests/test_llm_feature_preference.py +git commit -m "feat(llm): set/clear/list feature-preference endpoints" +``` + +--- + +## Task 7: Consistency guard + full backend CI + +**Files:** +- Test: `server/tests/test_llm_feature_preference.py` + +- [ ] **Step 1: Add a guard test that FeatureKey == KNOWN_FEATURES** + +Append to `server/tests/test_llm_feature_preference.py`: + +```python +def test_feature_key_literal_matches_known_features(): + """FeatureKey (the OpenAPI enum) must stay in sync with KNOWN_FEATURES.""" + import typing + + from app.schemas.llm import FeatureKey + + literal_values = set(typing.get_args(FeatureKey)) + assert literal_values == set(KNOWN_FEATURES) +``` + +- [ ] **Step 2: Run the full new test file** + +Run: `cd server && .venv/bin/pytest tests/test_llm_feature_preference.py -q` +Expected: PASS (all tests) + +- [ ] **Step 3: Run full backend CI** + +```bash +cd server +.venv/bin/ruff check . +.venv/bin/ruff format --check . +.venv/bin/bandit -r app -c pyproject.toml -q +.venv/bin/alembic upgrade head && .venv/bin/alembic check +.venv/bin/pytest --tb=short -q +``` + +Expected: ruff clean, bandit clean, alembic check clean, pytest passes with coverage ≥ gate. Fix any failures before committing. + +- [ ] **Step 4: Commit** + +```bash +git add server/tests/test_llm_feature_preference.py +git commit -m "test(llm): guard FeatureKey/KNOWN_FEATURES sync" +``` + +--- + +## Task 8: Frontend — regenerate types + api.ts methods + +**Files:** +- Modify: `dashboard/lib/api-types.generated.ts` (regenerated), `dashboard/lib/api-types.ts`, `dashboard/lib/api.ts` + +- [ ] **Step 1: Regenerate OpenAPI types** + +```bash +cd dashboard +npm run types:export +npm run types:generate +git checkout ../dashboard/next-env.d.ts 2>/dev/null || true +``` + +Expected: `lib/api-types.generated.ts` now contains `FeaturePreferenceOut`, `FeaturePreferencesListOut`, `FeaturePreferenceSet` schemas. + +- [ ] **Step 2: Re-export the new types** + +In `dashboard/lib/api-types.ts`, in the LLM gateway block, add: + +```typescript +export type LlmFeaturePreference = Schemas['FeaturePreferenceOut']; +export type LlmFeaturePreferences = Schemas['FeaturePreferencesListOut']; +export type LlmFeaturePreferenceSet = Schemas['FeaturePreferenceSet']; +export type LlmFeatureKey = Schemas['FeaturePreferenceOut']['feature']; +``` + +- [ ] **Step 3: Add api.ts methods** + +In `dashboard/lib/api.ts`, add the type imports to the existing LLM import + re-export blocks: +`LlmFeaturePreferences`, `LlmFeaturePreferenceSet`, `LlmFeatureKey`. + +Then add methods after `unsetLlmConnectorDefault`: + +```typescript + async listLlmFeaturePreferences(): Promise { + return this.fetch('/api/llm/feature-preferences'); + } + + async setLlmFeaturePreference( + data: LlmFeaturePreferenceSet, + ): Promise { + return this.fetch('/api/llm/feature-preferences', { + method: 'POST', + body: JSON.stringify(data), + }); + } + + async clearLlmFeaturePreference( + feature: LlmFeatureKey, + ): Promise { + return this.fetch(`/api/llm/feature-preferences/${feature}`, { + method: 'DELETE', + }); + } +``` + +- [ ] **Step 4: Type-check** + +Run: `cd dashboard && npx tsc --noEmit` +Expected: no errors. + +- [ ] **Step 5: Commit** + +```bash +git add dashboard/lib/api-types.generated.ts dashboard/lib/api-types.ts dashboard/lib/api.ts server/openapi.json +git commit -m "feat(ai-ui): api client methods + types for feature preferences" +``` + +--- + +## Task 9: Frontend — "Per-feature defaults" section + +**Files:** +- Modify: `dashboard/components/AiProvidersSection.tsx` +- Test: `dashboard/components/__tests__/AiProvidersSection.featurePrefs.test.tsx` (create) + +- [ ] **Step 1: Write the failing test** + +Check first whether a test file already exists for this component: +Run: `ls dashboard/components/__tests__/ 2>/dev/null | grep -i aiprovider || ls dashboard/**/__tests__/ 2>/dev/null` + +Create `dashboard/components/__tests__/AiProvidersSection.featurePrefs.test.tsx`: + +```tsx +import { render, screen, waitFor, fireEvent } from '@testing-library/react'; +import { describe, it, expect, vi, beforeEach } from 'vitest'; + +import AiProvidersSection from '../AiProvidersSection'; +import { api } from '@/lib/api'; + +vi.mock('@/lib/api', () => ({ + api: { + listLlmConnectors: vi.fn(), + getLlmPolicy: vi.fn(), + listOpenRouterModels: vi.fn(), + listLlmFeaturePreferences: vi.fn(), + setLlmFeaturePreference: vi.fn(), + clearLlmFeaturePreference: vi.fn(), + }, +})); + +const connector = { + id: 1, + user_id: 1, + connector_type: 'openai_apikey', + display_name: 'My OpenAI', + status: 'active', + base_url_plain: null, + model_hint: null, + created_at: '2026-01-01T00:00:00Z', + updated_at: '2026-01-01T00:00:00Z', + last_used_at: null, + last_error: null, + is_default: false, + last_health_check_at: null, + last_health_check_status: null, +}; + +beforeEach(() => { + vi.clearAllMocks(); + (api.listLlmConnectors as any).mockResolvedValue([connector]); + (api.getLlmPolicy as any).mockResolvedValue({ + llm_apikey_connectors_enabled: true, + llm_compatible_connector_enabled: true, + allowed_connector_types: ['openai_apikey'], + }); + (api.listLlmFeaturePreferences as any).mockResolvedValue({ + preferences: [], + known_features: ['recommendation', 'set_builder'], + }); +}); + +describe('AiProvidersSection per-feature defaults', () => { + it('renders a picker per known feature and sets a pin', async () => { + (api.setLlmFeaturePreference as any).mockResolvedValue({ + preferences: [{ feature: 'recommendation', connector_id: 1 }], + known_features: ['recommendation', 'set_builder'], + }); + + render(); + + await waitFor(() => expect(screen.getByText(/Per-feature defaults/i)).toBeInTheDocument()); + + const select = screen.getByLabelText(/recommendation/i) as HTMLSelectElement; + fireEvent.change(select, { target: { value: '1' } }); + + await waitFor(() => + expect(api.setLlmFeaturePreference).toHaveBeenCalledWith({ + feature: 'recommendation', + connector_id: 1, + }), + ); + }); + + it('clears a pin when "Use account default" is selected', async () => { + (api.listLlmFeaturePreferences as any).mockResolvedValue({ + preferences: [{ feature: 'recommendation', connector_id: 1 }], + known_features: ['recommendation', 'set_builder'], + }); + (api.clearLlmFeaturePreference as any).mockResolvedValue({ + preferences: [], + known_features: ['recommendation', 'set_builder'], + }); + + render(); + await waitFor(() => expect(screen.getByText(/Per-feature defaults/i)).toBeInTheDocument()); + + const select = screen.getByLabelText(/recommendation/i) as HTMLSelectElement; + fireEvent.change(select, { target: { value: '' } }); + + await waitFor(() => + expect(api.clearLlmFeaturePreference).toHaveBeenCalledWith('recommendation'), + ); + }); +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cd dashboard && npm test -- --run AiProvidersSection.featurePrefs` +Expected: FAIL — no "Per-feature defaults" section yet. + +- [ ] **Step 3: Implement the section** + +In `dashboard/components/AiProvidersSection.tsx`: + +Add to the type import block: + +```typescript +import type { + AIModelInfo, + LlmConnector, + LlmConnectorCreate, + LlmConnectorType, + LlmDjPolicy, + LlmFeaturePreferences, + LlmFeatureKey, +} from '@/lib/api-types'; +``` + +Add a human-readable feature label map near `CONNECTOR_TYPE_LABELS`: + +```typescript +const FEATURE_LABELS: Record = { + recommendation: 'Recommendations', + set_builder: 'Set builder', +}; +``` + +Add state inside the component (next to the other `useState` hooks): + +```typescript + const [featurePrefs, setFeaturePrefs] = useState(null); +``` + +Add `api.listLlmFeaturePreferences()` to the initial `Promise.all`: + +```typescript + Promise.all([api.listLlmConnectors(), fetchPolicySoft(), fetchFeaturePrefsSoft()]) + .then(([rows, p, prefs]) => { + if (!active) return; + setConnectors(rows); + setPolicy(p); + setFeaturePrefs(prefs); + }) +``` + +Add handlers near `handleUnsetDefault`: + +```typescript + const handleFeaturePrefChange = async (feature: LlmFeatureKey, value: string) => { + try { + const updated = + value === '' + ? await api.clearLlmFeaturePreference(feature) + : await api.setLlmFeaturePreference({ + feature, + connector_id: Number(value), + }); + setFeaturePrefs(updated); + setError(''); + } catch (err) { + setError(err instanceof Error ? err.message : 'Failed to update feature default'); + } + }; +``` + +Add the section JSX after the "Connected providers" `
` (before the "Add provider" section). Only render it when there is at least one active connector to pin: + +```tsx + {featurePrefs && featurePrefs.known_features.length > 0 && ( +
+

Per-feature defaults

+

+ Pin a specific provider to each AI feature. Unpinned features use your account + default (or most-recently-used) connector. Inactive connectors are skipped + automatically. +

+ {featurePrefs.known_features.map((feature) => { + const current = + featurePrefs.preferences.find((p) => p.feature === feature)?.connector_id ?? ''; + const selectId = `feature-pref-${feature}`; + const activeConnectors = connectors.filter((c) => c.status === 'active'); + return ( +
+ + +
+ ); + })} +
+ )} +``` + +Add the soft-fetch helper near `fetchPolicySoft` at the bottom: + +```typescript +async function fetchFeaturePrefsSoft(): Promise { + try { + return await api.listLlmFeaturePreferences(); + } catch { + return null; + } +} +``` + +> Design note: the `