From 57cb3488f59e4cd7ac5cfc6d1e911cd6a4af4c79 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Sun, 24 May 2026 17:32:52 +0000 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20entity=20enrichment=20=E2=80=94=20m?= =?UTF-8?q?etadata,=20relationships,=20and=20self-entity?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire up the existing metadata JSON column end-to-end: - Curator extracts description/role metadata for entities - Metadata merged on dedup (existing non-empty values win) - CLI: --metadata flag on add, new edit subcommand - Dashboard: structured metadata display + edit form Add entity-to-entity relationships: - New entity_relations table (migration v28) - Directed pairs with relation types (friend, colleague, manager, etc.) - Curator extracts relationships from conversation - CLI: relation add/rm subcommands - Dashboard: relationships section on entity detail page Add self-entity (entity_type='self'): - Auto-created from git config user.name/email - Configurable via .lore.json user section - Anchors relationships ('Melkey is my friend') - Special formatting in prompt injection ('you (the user)') Add hybrid entity injection policy: - Agent system prompt: all entities if <=30, otherwise self + relations + relevance-ranked remainder (configurable via knowledge.maxEntityInject) - Curator always gets the full entity list for grounding/dedup Type-based cross_project defaults: - person/org/service/tool -> cross-project (user-level) - repo/infra -> project-scoped by default 47 new tests covering metadata, relations, self-entity, injection policy, curator integration, merge with relations. --- .lore.md | 16 +- packages/core/src/config.ts | 28 +- packages/core/src/curator.ts | 131 +++++- packages/core/src/db.ts | 31 ++ packages/core/src/entities.ts | 423 ++++++++++++++++++- packages/core/src/git.ts | 53 +++ packages/core/src/index.ts | 2 +- packages/core/src/prompt.ts | 32 +- packages/core/test/db.test.ts | 2 +- packages/core/test/entities.test.ts | 625 ++++++++++++++++++++++++++++ packages/gateway/src/cli/entity.ts | 198 +++++++++ packages/gateway/src/ui.ts | 68 ++- 12 files changed, 1570 insertions(+), 39 deletions(-) create mode 100644 packages/core/test/entities.test.ts diff --git a/.lore.md b/.lore.md index 019b48f9..2c6c115b 100644 --- a/.lore.md +++ b/.lore.md @@ -23,7 +23,7 @@ * **Daily budget throttle curve: 3 zones + rate-aware EMA design**: Daily budget throttle: rate-aware EMA design. State in \`cost-tracker.ts\`: \`costRateEMA\` (USD/hr), \`costRateLastUpdate\`, \`costRateSeeded\`. \`updateCostRate(turnCost)\` called from \`recordConversationCost()\` after every conversation turn. First turn seeds EMA as \`turnCost \* 30\` (30 turns/hr baseline). α=0.15; time-gap-adjusted \`effectiveAlpha = 1 - (1-0.15)^(elapsedHours/(1/30))\`. \`computeThrottleDelay(dailySpend, dailyBudget, costRatePerHour, hoursRemaining)\`: no throttle if \`spendFraction < 0.50\` or \`costRatePerHour <= targetRate\`; \`MAX\_THROTTLE\_DELAY=60s\`, \`THROTTLE\_FLOOR=0.50\`. Pre-request cost estimate: \`estimateRequestCost()\` uses \`getLastTransformEstimate(sessionID)\` with fallback \`ceil(JSON.stringify(messages).length/3)\`; output estimate = \`min(inputTokens\*0.25, 16384)\`. Injection point: \`handleConversationTurn()\` AFTER gradient transform, BEFORE \`forwardToUpstream()\` (~pipeline.ts:3391). Only Case 3 conversation turns throttled — meta requests and compaction exempt. Sleep capped at 50% of remaining cache TTL window to avoid cache busts. -* **DB schema current version: 26 migrations, key columns per migration**: DB schema current version: 27 migrations. Key migrations: v2=distillations.observations; v4=session\_state(force\_min\_layer); v5=distillations.archived; v7=distillation\_fts FTS5; v8=knowledge.embedding+kv\_meta; v10=lat\_sections\_fts+knowledge\_refs; v11=F3b chunk separator; v12=distillations r\_compression+c\_norm; v13=metadata table; v14=projects.git\_remote+path\_aliases; v15=warmup\_histograms; v16=temporal\_messages.embedding; v17=distillations.call\_type; v18=session\_state 10 cost/savings cols; v19=import\_history; v22=projects.last\_import\_at; v23=session\_state LTM cache/pin+consecutive\_text\_only\_turns; v24=session identity+gradient state; v25=dedup\_feedback; v26=session\_state.parent\_session\_id TEXT (nullable) + is\_subagent INTEGER NOT NULL DEFAULT 0 + idx\_session\_state\_parent index (sub-agent tree); v27=knowledge +11 cols + team\_knowledge + team\_config tables + entities table (id, project\_id, entity\_type, canonical\_name, metadata, cross\_project). CONFLICT: feat-entity-registry branch also targets v27 — whichever merges second must bump to v28. \`migrate()\` normalizes to MIGRATIONS.length; \`SCHEMA\_VERSION\` constant is dead code. +* **DB schema current version: 26 migrations, key columns per migration**: DB schema current version: 28 migrations. Key migrations: v2=distillations.observations; v4=session\_state(force\_min\_layer); v5=distillations.archived; v7=distillation\_fts FTS5; v8=knowledge.embedding+kv\_meta; v10=lat\_sections\_fts+knowledge\_refs; v11=F3b chunk separator; v12=distillations r\_compression+c\_norm; v13=metadata table; v14=projects.git\_remote+path\_aliases; v15=warmup\_histograms; v16=temporal\_messages.embedding; v17=distillations.call\_type; v18=session\_state 10 cost/savings cols; v19=import\_history; v22=projects.last\_import\_at; v23=session\_state LTM cache/pin+consecutive\_text\_only\_turns; v24=session identity+gradient state; v25=dedup\_feedback; v26=session\_state.parent\_session\_id+is\_subagent; v27=entities+entity\_aliases+knowledge\_entity\_refs tables+FTS5+triggers (Entity Registry); v28=knowledge +11 cols (sensitivity, promotion\_status, source\_user\_id, etc.) + team\_knowledge + team\_config tables. \`migrate()\` uses \`MIGRATIONS.length\`; \`SCHEMA\_VERSION=16\` constant is dead code (never updated, never read). \`recoverMissingObjects()\` must cover v28 tables (\`team\_knowledge\`, \`team\_config\`) — omitting them causes silent permanent failure if partial migration occurs. * **embedding.ts: LocalProvider worker lifecycle — init, crash, restart, and permanent-disable paths**: \`LocalProvider\` worker lifecycle in \`embedding.ts\`: \`workerReady=true\` set at line 404 after Worker constructor returns — NOT after ONNX pipeline loads. \`workerInitError\` set permanently on \`'init-error'\` message (line 353), \`'error'\` event (line 376), or non-zero \`'exit'\` (line 387). Once set, all \`ensureWorker()\` calls throw \`LocalProviderUnavailableError\` — no restart. \`initPromise=null\` only on constructor throw (allows retry). \`shutdown()\` (line 464) resets all fields including \`workerInitError=null\` — allows re-init. \`resetProvider()\` shuts down + sets \`cachedProvider=undefined\` (allows new provider). \`\_shutdownAndDisable()\` sets \`cachedProvider=null\` (prevents new provider — test-only). \`embed()\` auto-fallback: catches \`LocalProviderUnavailableError\`, calls \`pickRemoteFallback()\`, permanently replaces \`cachedProvider\`. Voyage wins ties over OpenAI (higher code search quality). @@ -74,7 +74,7 @@ * **Sub-agent session detection, isolation, and differential treatment**: Sub-agent sessions detected via \`x-parent-session-id\` header (pipeline.ts:2773-2809). Each sub-agent gets independent session: temporal storage, gradient state, distillation pipeline, LTM injection, cost tracking. Key differential treatment: (1) cache warming ALWAYS skipped — \`shouldWarm()\` returns false unconditionally (cache-warmer.ts:731-733); (2) \`findRotationPredecessor()\` (session.ts:505) skips sub-agents from Tier 1b rotation; (3) idle distillation/curation runs normally (no isSubagent check). \`isSubagent\`/\`parentSessionId\` persisted to DB (migration v26). Dashboard: collapsible cost-rollup tree via \`loadParentChildMap()\` + \`buildLiveSessionRows()\`; \`rollUp()\` propagates grandchildren costs bottom-up. -* **Team mode knowledge promotion: both explicit sharing and auto-suggest; approval workflow**: Team mode knowledge promotion supports explicit sharing (user-initiated) and auto-suggest (system-initiated), with an approval workflow preventing noisy/wrong entries from reaching team DB. Turso sync lives in the gateway layer (not core/db.ts — synchronous bun:sqlite incompatible with async @tursodatabase/sync). Team topology: Personal DB = local SQLite synced to Turso cloud; Team DB = Turso cloud only. Same schema with nullable team columns for backward compat. Cross-project knowledge stays personal. Personal entries get score boost in forSession(). Sensitivity column is a product hint for auto-promotion, not a security boundary. Local NLP for entity detection REJECTED (heavy deps, underperforms on technical entities, duplicates curator LLM work). compromise.js also rejected. Current approach (curator detects → alias lookup → FTS5 fuzzy match) covers ~80% of cases. Entity clustering (issue #462) approved as path forward: reuses Nomic v1.5/Voyage/OpenAI embeddings. +* **Team mode knowledge promotion: both explicit sharing and auto-suggest; approval workflow**: Team mode knowledge promotion: explicit sharing + auto-suggest with approval workflow. Architecture: database-per-user/team model (NOT shared DB with visibility column — Turso partial sync is a performance optimization, not row-level isolation, so visibility-as-access-control doesn't work). Personal DB = local SQLite synced to Turso cloud; Team DB = Turso cloud only. Schema: \`knowledge\` table gets +11 cols (sensitivity, promotion\_status, source\_user\_id, source\_entry\_id, etc.); new \`team\_knowledge\` and \`team\_config\` tables (v28). Cross-project knowledge stays personal. \`PromotionStatus\`/\`ApprovalStatus\` types exported from ltm.ts but \`KnowledgeEntry\` uses \`string\` — types not yet wired. \`update()\` does not accept \`sensitivity\` — no post-creation sensitivity change. \`team\_knowledge\` missing indexes and several columns present in \`knowledge\`. RFC documented in GitHub Issue #467. * **workspace.ts: discoverWorkspaceRoot() and resolveWorkspaces() — monorepo support**: workspace.ts: discoverWorkspaceRoot() and resolveWorkspaces() — monorepo support: \`discoverWorkspaceRoot(startDir)\` walks UP checking markers: (1) \`.lore.json\` with non-empty \`workspaces\` array, (2) VCS markers (\`.git\`, \`.hg\`, \`.jj\`), (3) workspace markers (\`pnpm-workspace.yaml\`, \`nx.json\`), (4) language markers. Stops at \`homedir()\`. Process-lifetime cache. CRITICAL BUG: homedir boundary check comes AFTER marker checks — if \`~/.git\` exists, returns \`$HOME\` as workspace root. Fix: move \`if (current === stopBoundary) break\` BEFORE marker checks. \`resolveWorkspaces()\` resolves via literal paths + single-level globs. TRAP: glob \`\*\` matches dot-prefixed dirs — add \`if (entry.name.startsWith('.')) continue\`. TRAP: no path traversal guard — \`../\` patterns reach outside root. @@ -104,6 +104,9 @@ * **LOREAI-GATEWAY-Z: OAuth token expiry causes 401 storm — resolveAuth returns same stale token**: LOREAI-GATEWAY-Z: OAuth token expiry causes 401 storm. When a single-user OAuth token expires, \`resolveAuth(sessionID)\` marks the session stale and falls back to \`getLastSeenAuth()\` — which holds the same expired token. \`credentialChanged\` is false, so the retry-once path (llm-adapter.ts:448–498) is never taken, and Sentry fires on every background worker call. The 30s idle scheduler fires 4+ LLM calls per tick (distillation, meta-distill, curation, consolidation), each independently 401ing. \`isAuthStale()\` is exported but NEVER called in any non-test source file. Fix: (1) check \`isAuthStale(sessionID)\` in idle.ts before scheduling LLM work; (2) \`resolveAuth\` returns \`null\` when global fallback matches the stale session credential; (3) add \`/Worker upstream auth error/\` to \`TRANSIENT\_ERROR\_PATTERNS\` in \`instrument.ts\`. Circuit breaker (background-limiter.ts) only trips on 429, not 401. + +* **Migration version number collision when working on stale branch — always rebase before implementing schema changes**: Trap: implementing a DB migration on a branch that's behind main looks safe until rebase/merge — both branches independently claim the same migration version number (e.g. both use v27). The conflict is only discovered at merge time and requires renumbering all affected tests, comments, and type annotations. Fix: always \`git pull --rebase\` before starting schema migration work, and check \`MIGRATIONS.length\` in the current main to claim the correct next version number. When renumbering, grep for the old version string in comments and test files — they won't be caught by TypeScript. + * **ONNX OOM error code 284432024: single oversized text bypasses batch budget guard**: ONNX OOM error chain (Nomic v1.5): Nomic v1.5 pads ALL batch texts to longest sequence — one oversized text → huge tensor → OOM. Fix chain: (1) \`LOCAL\_MAX\_CHARS = 16384\` (4 chars/token × 4096 tokens) in \`embedding.ts:38\`; (2) Worker-level OOM-retry loop in \`processEmbed()\` (\`embedding-worker.ts:267-281\`): attempt=0 tries full texts; on OOM, \`maxTokens = OOM\_RETRY\_START\_TOKENS(4096) >> attempt\` → 4096→2048→1024; \`OOM\_MAX\_RETRIES=3\`; (3) \`truncateTexts(texts, maxTokens)\` helper; (4) \`nextBatch()\` adaptive batching caps \`batch\_size×max\_tokens\` at \`MAX\_BATCH\_TOKEN\_AREA=4096\`; (5) \`safeLocalTruncate()\` backs up 1 char on high surrogate boundary. \`isOomError()\` matches \`/^\d{6,}$/\` and \`/out.of.memory|alloc.\*fail|oom/i\`. Known OOM codes: 284432024, 287180544, 144786472. WASM \`Aborted()\` error is NOT caught by \`isOomError()\` — it's a fatal WASM abort. Fix: detect \`isWasmFatalError()\` matching \`/Aborted\\(\\)/i\`, post \`{ type: 'init-error' }\` and call \`process.exit(1)\` to trigger main-thread \`on('exit')\` handler which sets \`localProviderKnownBroken=true\`. @@ -113,9 +116,15 @@ * **Query expansion disabled by default AND LLM client not wired in gateway (IF-5 fix)**: Two co-required fixes for query expansion (IF-5): (1) \`queryExpansion\` default in \`packages/core/src/config.ts\` must be \`true\` in BOTH the field-level \`.default()\` AND the section-level \`.default({})\` — the section-level overrides field-level in Zod, so changing only one has no effect. (2) \`executeRecall()\` in \`packages/gateway/src/recall.ts\` must accept an optional \`LLMClient\` param and pass it to \`runRecall()\` — without it, \`if (searchConfig?.queryExpansion && llm)\` is always false. Both pipeline.ts call sites (streaming and non-streaming) must pass \`getLLMClient(config)\`. + +* **recoverMissingObjects() must be updated for every new migration that creates tables**: Trap: adding a new migration (e.g. v28 with \`team\_knowledge\`/\`team\_config\`) looks complete once the migration SQL is in the MIGRATIONS array. But \`recoverMissingObjects()\` in \`db.ts\` is a separate recovery path that re-creates tables for specific migrations (v8, v13, v14, v27). If v28 tables are omitted from \`recoverMissingObjects()\`, a partial migration leaves the tables missing permanently — subsequent runs see version=28 and skip the migration, so the tables are never created. Fix: always add a recovery block for every migration that creates new tables. + * **Remote import idempotency split-brain: isImported queries local DB, recordImport writes to remote**: When \`LORE\_REMOTE\_URL\` is set, \`lore import\` has a split-brain bug: \`isImported()\` checks the \*\*local\*\* SQLite \`import\_history\` table, but \`remotePost('/api/v1/import/record')\` writes the record to the \*\*remote\*\* DB. Result: every subsequent run re-detects all sessions as un-imported and double-extracts. Fix: (1) add \`GET /api/v1/import/history\` endpoint and query remote during dedup check when remote URL is set; (2) optionally also write locally as offline resilience. The remote DB is the source of truth for import history in remote mode. + +* **SCHEMA\_VERSION constant is dead code — migrate() uses MIGRATIONS.length**: Trap: \`SCHEMA\_VERSION\` constant in \`packages/core/src/db.ts:24\` looks like the authoritative schema version and is tempting to update when adding migrations. But \`migrate()\` uses \`MIGRATIONS.length\` (the array length), not this constant. \`SCHEMA\_VERSION\` has been stuck at 16 since migration v16 was added and is never read anywhere. Fix: ignore \`SCHEMA\_VERSION\` entirely; the migration count is the true version. The constant is actively misleading — a future session may update it thinking it matters. + * **splitSegments() infinite recursion on oversized single messages**: \`splitSegments()\` infinite recursion on oversized single messages in \`packages/core/src/distillation.ts\`: recurses infinitely when a single message exceeds \`maxSegmentTokens\` (16384). \`findSplitIndex()\` returns \`messages.length\` (=1), so \`left = messages.slice(0, 1)\` produces an identical recursive call. Triggered on large tool outputs (~49KB+). Fix: add base case after the \`totalTokens <= maxTokens\` guard — \`if (messages.length <= 1) return \[messages]\`. The oversized message becomes an indivisible segment. @@ -141,6 +150,9 @@ * **Always constrain features to minimal viable scope and flag over-engineering risks**: Design philosophy: minimal viable scope, gradual throttling, no over-engineering. (1) Always prefer the simplest implementation that solves the core problem — reject slow/complex alternatives (e.g., \`computeDailyCosts()\` too slow for hot-path), defer non-MVP concerns, avoid extra config surfaces. (2) For rate-limiting/throttling: reject hard blocks or large fixed delays ('too annoying'); prefer EMA-style velocity detection with small incremental delays the user barely notices. Use rate/velocity signals (EMA of cost-per-hour, inter-turn gaps), apply gentle progressive friction, avoid binary allow/block walls. When presenting throttle curves, favor smooth ramps with small minimum delays over step functions with large penalties. + +* **Always include recoverMissingObjects() coverage for new tables in migration reviews**: When reviewing PRs that add new database tables via migrations, the user consistently checks whether \`recoverMissingObjects()\` in \`db.ts\` has been updated to include the new tables. This is a recurring critical finding: if a migration partially fails, subsequent runs skip it (version already updated), and without \`recoverMissingObjects()\` coverage, the new tables are never created. Always verify that every new table introduced in a migration is also handled in \`recoverMissingObjects()\` with \`CREATE TABLE IF NOT EXISTS\`. Flag the omission as CRITICAL if missing. + * **Always investigate Sentry issues by exploring the full codebase context before proposing fixes**: Sentry investigation and triage rules: (1) Always explore full source files in \`packages/gateway/src/\` (not bundled dist), tracing call chains across auth/adapter/pipeline/idle/instrumentation before proposing fixes. (2) Enumerate ALL contributing root causes (numbered, with file:line) before proposing any fix. (3) Check user count and event count first; skip fixes for issues with 0 real users caused by caller-side misuse. (4) At session start, proactively fetch Sentry issues, triage by severity (user count, event count, regression, fatality), and present a prioritized action list. (5) \`log.error(..., errorObj)\` auto-forwards to Sentry via \`sink.captureException()\` — use typed error subclasses (e.g. \`LocalProviderUnavailableError\`) and \`beforeSend\` in \`instrument.ts\` to filter expected errors. diff --git a/packages/core/src/config.ts b/packages/core/src/config.ts index fc58803a..8fc23d28 100644 --- a/packages/core/src/config.ts +++ b/packages/core/src/config.ts @@ -117,8 +117,13 @@ export const LoreConfig = z.object({ * the curator, knowledge DB writes, AGENTS.md sync, and LTM injection into the * system prompt. Default: true. */ enabled: z.boolean().default(true), + /** Max entities to inject into the agent system prompt. When the total entity count + * exceeds this cap, the self entity + its relations are always included and the rest + * are relevance-ranked. Remaining entities are discoverable via recall. + * Set to 0 to disable entity injection. Default: 30. */ + maxEntityInject: z.number().min(0).default(30), }) - .default({ enabled: true }), + .default({ enabled: true, maxEntityInject: 30 }), curator: z .object({ enabled: z.boolean().default(true), @@ -258,6 +263,27 @@ export const LoreConfig = z.object({ path: z.string().default("AGENTS.md"), }) .default({ enabled: true, path: "AGENTS.md" }), + /** User identity for the self-entity. When provided, creates/updates a "self" entity + * with this information. If omitted, falls back to git config user.name / user.email. */ + user: z + .object({ + /** Display name. Overrides git config user.name. */ + name: z.string().optional(), + /** Email address. Overrides git config user.email. */ + email: z.string().optional(), + /** Additional aliases for the self entity. */ + aliases: z + .array( + z.object({ + type: z.enum(["name", "email", "github", "slack", "phone", "nickname", "url", "domain"]), + value: z.string(), + }), + ) + .default([]), + /** Metadata for the self entity (description, role, notes, etc.). */ + metadata: z.record(z.string(), z.unknown()).optional(), + }) + .optional(), }); export type LoreConfig = z.infer; diff --git a/packages/core/src/curator.ts b/packages/core/src/curator.ts index 48a893de..1e4d4b8a 100644 --- a/packages/core/src/curator.ts +++ b/packages/core/src/curator.ts @@ -9,7 +9,7 @@ import { CURATOR_SYSTEM, curatorUser, CONSOLIDATION_SYSTEM, consolidationUser } import { detectAndFormat } from "./instruction-detect"; import { curatorLimiter } from "./session-limiter"; import type { LLMClient } from "./types"; -import type { EntityType, AliasType } from "./entities"; +import type { EntityType, AliasType, RelationType } from "./entities"; /** * Maximum length (chars) for a single knowledge entry's content. @@ -24,12 +24,22 @@ export type DetectedEntity = { type: EntityType; canonical_name: string; aliases?: Array<{ type: AliasType; value: string }>; + metadata?: Record; }; -/** Parsed curator response containing both knowledge ops and detected entities. */ +/** Relationship detected by the curator from conversation context. */ +export type DetectedRelation = { + entity_a: string; // canonical name or [uuid] + entity_b: string; + relation: string; + metadata?: Record; +}; + +/** Parsed curator response containing knowledge ops, entities, and relations. */ export type CuratorResponse = { ops: CuratorOp[]; entities: DetectedEntity[]; + relations: DetectedRelation[]; }; export type CuratorOp = @@ -74,21 +84,25 @@ export function parseResponse(text: string): CuratorResponse { return { ops: filterOps(parsed), entities: [], + relations: [], }; } - // New format: { ops: [...], entities: [...] } + // New format: { ops: [...], entities: [...], relations: [...] } if (typeof parsed === "object" && parsed !== null) { const ops = Array.isArray(parsed.ops) ? filterOps(parsed.ops) : []; const detectedEntities = Array.isArray(parsed.entities) ? filterEntities(parsed.entities) : []; - return { ops, entities: detectedEntities }; + const detectedRelations = Array.isArray(parsed.relations) + ? filterRelations(parsed.relations) + : []; + return { ops, entities: detectedEntities, relations: detectedRelations }; } - return { ops: [], entities: [] }; + return { ops: [], entities: [], relations: [] }; } catch { - return { ops: [], entities: [] }; + return { ops: [], entities: [], relations: [] }; } } @@ -126,17 +140,62 @@ function filterEntities(arr: unknown[]): DetectedEntity[] { ((a as Record).value as string).length > 0, ) : undefined; + + // Validate metadata — must be a plain object with non-empty string values ≤500 chars + let validMetadata: Record | undefined; + if (typeof obj.metadata === "object" && obj.metadata !== null && !Array.isArray(obj.metadata)) { + const filtered = Object.fromEntries( + Object.entries(obj.metadata as Record).filter( + ([, v]) => typeof v === "string" && v.length > 0 && v.length <= 500, + ), + ); + if (Object.keys(filtered).length > 0) validMetadata = filtered; + } + return { type: obj.type as EntityType, canonical_name: obj.canonical_name as string, aliases: validAliases, + metadata: validMetadata, }; }); } +function filterRelations(arr: unknown[]): DetectedRelation[] { + return arr.filter((r: unknown): r is DetectedRelation => { + if (typeof r !== "object" || r === null) return false; + const obj = r as Record; + return ( + typeof obj.entity_a === "string" && + obj.entity_a.length > 0 && + typeof obj.entity_b === "string" && + obj.entity_b.length > 0 && + typeof obj.relation === "string" && + entities.RELATION_TYPES.includes(obj.relation as RelationType) + ); + }).map((obj) => { + // Validate relation metadata + let validMetadata: Record | undefined; + if (typeof obj.metadata === "object" && obj.metadata !== null && !Array.isArray(obj.metadata)) { + const filtered = Object.fromEntries( + Object.entries(obj.metadata as Record).filter( + ([, v]) => typeof v === "string" && v.length > 0 && v.length <= 500, + ), + ); + if (Object.keys(filtered).length > 0) validMetadata = filtered; + } + return { + entity_a: obj.entity_a, + entity_b: obj.entity_b, + relation: obj.relation, + metadata: validMetadata, + }; + }); +} + /** * Apply a list of curator ops (create/update/delete) to the knowledge DB, - * and optionally create detected entities. + * and optionally create detected entities and relations. * Shared by both the live curator and the conversation import system. * * @returns Counts of applied operations. @@ -150,8 +209,10 @@ export function applyOps( skipCreate?: boolean; /** Entities detected by the curator from conversation context. */ detectedEntities?: DetectedEntity[]; + /** Relations detected by the curator from conversation context. */ + detectedRelations?: DetectedRelation[]; }, -): { created: number; updated: number; deleted: number; entitiesCreated: number } { +): { created: number; updated: number; deleted: number; entitiesCreated: number; relationsCreated: number } { let created = 0; let updated = 0; let deleted = 0; @@ -224,7 +285,7 @@ export function applyOps( } } - // Create detected entities + // Create detected entities (metadata merged on dedup via create()) if (input.detectedEntities?.length) { for (const de of input.detectedEntities) { try { @@ -237,7 +298,7 @@ export function applyOps( value: a.value, source: "curator", })), - crossProject: true, // entities default to cross-project + metadata: de.metadata, }); if (result.created) entitiesCreated++; } catch (err) { @@ -246,7 +307,42 @@ export function applyOps( } } - return { created, updated, deleted, entitiesCreated }; + // Create detected relations + let relationsCreated = 0; + if (input.detectedRelations?.length) { + for (const dr of input.detectedRelations) { + try { + // Resolve entity references by canonical name or UUID + const resolveRef = (ref: string): string | null => { + // Check if it's a UUID (wrapped in brackets like [uuid]) + const uuidMatch = ref.match(/^\[([^\]]+)\]$/); + if (uuidMatch) { + const entity = entities.get(uuidMatch[1]); + return entity?.id ?? null; + } + // Try to resolve by name + const entity = entities.resolve(ref); + return entity?.id ?? null; + }; + + const aId = resolveRef(dr.entity_a); + const bId = resolveRef(dr.entity_b); + if (aId && bId && aId !== bId) { + const relId = entities.addRelation( + aId, + bId, + dr.relation as entities.RelationType, + { metadata: dr.metadata, source: "curator" }, + ); + if (relId) relationsCreated++; + } + } catch (err) { + log.warn(`relation creation failed for "${dr.entity_a}" → "${dr.entity_b}":`, err); + } + } + } + + return { created, updated, deleted, entitiesCreated, relationsCreated }; } // Track which messages we've already curated — per session to prevent @@ -271,9 +367,9 @@ export async function run(input: { projectPath: string; sessionID: string; model?: { providerID: string; modelID: string }; -}): Promise<{ created: number; updated: number; deleted: number; entitiesCreated: number }> { +}): Promise<{ created: number; updated: number; deleted: number; entitiesCreated: number; relationsCreated: number }> { const cfg = config(); - if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0, entitiesCreated: 0 }; + if (!cfg.curator.enabled) return { created: 0, updated: 0, deleted: 0, entitiesCreated: 0, relationsCreated: 0 }; // Skip-if-busy: curation is periodic, not accumulative. If a curation is // already running for this session, skip — the next trigger will pick up @@ -285,7 +381,7 @@ export async function run(input: { // if this invariant is ever violated. if (curatorLimiter.isBusy(input.sessionID)) { log.info(`curation skipped: already running for session ${input.sessionID.slice(0, 16)}`); - return { created: 0, updated: 0, deleted: 0, entitiesCreated: 0 }; + return { created: 0, updated: 0, deleted: 0, entitiesCreated: 0, relationsCreated: 0 }; } return curatorLimiter.get(input.sessionID)(() => runInner(input)); @@ -296,7 +392,7 @@ async function runInner(input: { projectPath: string; sessionID: string; model?: { providerID: string; modelID: string }; -}): Promise<{ created: number; updated: number; deleted: number; entitiesCreated: number }> { +}): Promise<{ created: number; updated: number; deleted: number; entitiesCreated: number; relationsCreated: number }> { const cfg = config(); // Get recent undistilled messages since last curation. @@ -316,7 +412,7 @@ async function runInner(input: { // This is the common case after /lore:curate runs distillation first. const distillations = distillation.loadForSession(input.projectPath, input.sessionID, true); const recentDistillations = distillations.filter((d) => d.created_at > sessionCuratedAt); - if (recentDistillations.length === 0) return { created: 0, updated: 0, deleted: 0, entitiesCreated: 0 }; + if (recentDistillations.length === 0) return { created: 0, updated: 0, deleted: 0, entitiesCreated: 0, relationsCreated: 0 }; text = recentDistillations.map((d) => d.observations).join("\n\n"); } // Include cross-project entries so the curator can see and update @@ -379,13 +475,14 @@ async function runInner(input: { userContent, { model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID, maxTokens: 2048, temperature: 0 }, ); - if (!responseText) return { created: 0, updated: 0, deleted: 0, entitiesCreated: 0 }; + if (!responseText) return { created: 0, updated: 0, deleted: 0, entitiesCreated: 0, relationsCreated: 0 }; const response = parseResponse(responseText); const result = applyOps(response.ops, { projectPath: input.projectPath, sessionID: input.sessionID, detectedEntities: response.entities, + detectedRelations: response.relations, }); // Post-curation dedup sweep: if the curator created new entries, check for diff --git a/packages/core/src/db.ts b/packages/core/src/db.ts index c5a3c369..9f3c0dd9 100644 --- a/packages/core/src/db.ts +++ b/packages/core/src/db.ts @@ -657,6 +657,24 @@ const MIGRATIONS: string[] = [ ); CREATE INDEX IF NOT EXISTS idx_knowledge_entity_refs_entity ON knowledge_entity_refs(entity_id); `, + ` + -- Version 28: Entity relationships. + + CREATE TABLE IF NOT EXISTS entity_relations ( + id TEXT PRIMARY KEY, + entity_a TEXT NOT NULL REFERENCES entities(id) ON DELETE CASCADE, + entity_b TEXT NOT NULL REFERENCES entities(id) ON DELETE CASCADE, + relation TEXT NOT NULL, + metadata TEXT, + source TEXT, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + UNIQUE(entity_a, entity_b, relation) + ); + + CREATE INDEX IF NOT EXISTS idx_entity_relations_a ON entity_relations(entity_a); + CREATE INDEX IF NOT EXISTS idx_entity_relations_b ON entity_relations(entity_b); + `, ]; /** Return the resolved path of the SQLite database file. */ @@ -843,6 +861,17 @@ function recoverMissingObjects(database: Database) { entity_id TEXT NOT NULL REFERENCES entities(id) ON DELETE CASCADE, PRIMARY KEY (knowledge_id, entity_id) ); + CREATE TABLE IF NOT EXISTS entity_relations ( + id TEXT PRIMARY KEY, + entity_a TEXT NOT NULL REFERENCES entities(id) ON DELETE CASCADE, + entity_b TEXT NOT NULL REFERENCES entities(id) ON DELETE CASCADE, + relation TEXT NOT NULL, + metadata TEXT, + source TEXT, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + UNIQUE(entity_a, entity_b, relation) + ); `); // Recover missing columns from partial migration runs. @@ -891,6 +920,8 @@ export function mergeProjectInternal( targetId, sourceId, ); + // entity_relations references entities by FK — no project_id column to update. + // Relations move implicitly when their parent entities move. d.query( "UPDATE OR IGNORE project_path_aliases SET project_id = ? WHERE project_id = ?", ).run(targetId, sourceId); diff --git a/packages/core/src/entities.ts b/packages/core/src/entities.ts index a38bef00..1cfe4b2d 100644 --- a/packages/core/src/entities.ts +++ b/packages/core/src/entities.ts @@ -8,16 +8,18 @@ import { uuidv7 } from "uuidv7"; import { db, ensureProject } from "./db"; import { ftsQuery, ftsQueryOr, EMPTY_QUERY, filterTerms } from "./search"; +import { config } from "./config"; +import { getGitUser } from "./git"; import * as log from "./log"; // --------------------------------------------------------------------------- // Types // --------------------------------------------------------------------------- -export type EntityType = "person" | "org" | "service" | "tool" | "repo" | "infra"; +export type EntityType = "self" | "person" | "org" | "service" | "tool" | "repo" | "infra"; export const ENTITY_TYPES: readonly EntityType[] = [ - "person", "org", "service", "tool", "repo", "infra", + "self", "person", "org", "service", "tool", "repo", "infra", ] as const; export type AliasType = @@ -54,6 +56,53 @@ export type EntityWithAliases = Entity & { aliases: EntityAlias[]; }; +/** Structured metadata for entities — role, description, notes. */ +export type EntityMetadata = { + description?: string; + role?: string; + notes?: string; + [key: string]: unknown; +}; + +/** Relationship types between entities. */ +export type RelationType = + | "friend" + | "colleague" + | "manager" + | "report" + | "collaborator" + | "client" + | "mentor" + | "partner"; + +export const RELATION_TYPES: readonly RelationType[] = [ + "friend", "colleague", "manager", "report", + "collaborator", "client", "mentor", "partner", +] as const; + +export type EntityRelation = { + id: string; + entity_a: string; + entity_b: string; + relation: RelationType; + metadata: string | null; + source: string | null; + created_at: number; + updated_at: number; +}; + +/** Relation with the other entity's name resolved for display. */ +export type EntityRelationResolved = EntityRelation & { + other_id: string; + other_name: string; + other_type: EntityType; +}; + +/** Entity types that default to cross-project (user-level). */ +const CROSS_PROJECT_TYPES: ReadonlySet = new Set([ + "self", "person", "org", "service", "tool", +]); + /** Columns to SELECT for Entity — avoids pulling unnecessary data. */ const ENTITY_COLS = "id, project_id, entity_type, canonical_name, metadata, cross_project, created_at, updated_at"; @@ -92,7 +141,10 @@ export function create(input: { } const pid = input.projectPath ? ensureProject(input.projectPath) : null; - const cross = input.crossProject ?? (pid === null ? 1 : 0); + // Type-based cross_project defaults: + // self/person/org/service/tool → cross-project (user-level) + // repo/infra → project-scoped + const cross = input.crossProject ?? (CROSS_PROJECT_TYPES.has(input.entityType) ? true : pid === null); const d = db(); // Dedup + insert inside a transaction to avoid race conditions @@ -101,17 +153,25 @@ export function create(input: { const existing = pid ? (d .query( - `SELECT id FROM entities WHERE canonical_name = ? COLLATE NOCASE AND (project_id = ? OR project_id IS NULL)`, + `SELECT id, metadata FROM entities WHERE canonical_name = ? COLLATE NOCASE AND (project_id = ? OR project_id IS NULL)`, ) - .get(input.canonicalName, pid) as { id: string } | null) + .get(input.canonicalName, pid) as { id: string; metadata: string | null } | null) : (d .query( - `SELECT id FROM entities WHERE canonical_name = ? COLLATE NOCASE AND project_id IS NULL`, + `SELECT id, metadata FROM entities WHERE canonical_name = ? COLLATE NOCASE AND project_id IS NULL`, ) - .get(input.canonicalName) as { id: string } | null); + .get(input.canonicalName) as { id: string; metadata: string | null } | null); if (existing) { d.exec("COMMIT"); + // Merge metadata into the existing entity (incoming fills gaps, existing wins) + if (input.metadata && Object.keys(input.metadata).length > 0) { + const merged = mergeMetadata(existing.metadata, input.metadata); + if (merged) { + d.query("UPDATE entities SET metadata = ?, updated_at = ? WHERE id = ?") + .run(JSON.stringify(merged), Date.now(), existing.id); + } + } // Add any new aliases to the existing entity (outside transaction) if (input.aliases?.length) { for (const alias of input.aliases) { @@ -205,15 +265,102 @@ export function update( } } -/** Delete an entity, its aliases, and knowledge refs. */ +/** Delete an entity, its aliases, relations, and knowledge refs. */ export function remove(id: string): void { db().query("DELETE FROM knowledge_entity_refs WHERE entity_id = ?").run(id); + db().query("DELETE FROM entity_relations WHERE entity_a = ? OR entity_b = ?").run(id, id); // Explicitly delete aliases BEFORE the entity so FTS5 content-sync triggers // fire correctly (CASCADE deletes do NOT fire AFTER DELETE triggers in SQLite). db().query("DELETE FROM entity_aliases WHERE entity_id = ?").run(id); db().query("DELETE FROM entities WHERE id = ?").run(id); } +// --------------------------------------------------------------------------- +// Self-entity +// --------------------------------------------------------------------------- + +/** + * Get the self entity (entity_type = 'self'). Returns null if none exists. + * The self entity is always cross-project — there is at most one per installation. + */ +export function getSelfEntity(): EntityWithAliases | null { + const row = db() + .query(`SELECT ${ENTITY_COLS} FROM entities WHERE entity_type = 'self' LIMIT 1`) + .get() as Entity | null; + if (!row) return null; + const aliases = db() + .query("SELECT * FROM entity_aliases WHERE entity_id = ? ORDER BY alias_type, alias_value") + .all(row.id) as EntityAlias[]; + return { ...row, aliases }; +} + +/** + * Ensure the self entity exists. Creates or updates it from: + * 1. `.lore.json` `user` config (explicit override) + * 2. `git config user.name` / `user.email` (auto-detect fallback) + * + * Returns the self entity, or null if no identity could be determined. + */ +export function ensureSelfEntity(projectPath: string): EntityWithAliases | null { + const cfg = config().user; + const git = getGitUser(projectPath); + + const name = cfg?.name || git.name; + if (!name) return getSelfEntity(); // no identity source — return existing or null + + const email = cfg?.email || git.email; + const existing = getSelfEntity(); + + if (existing) { + // Update name if changed + const updates: { canonicalName?: string; metadata?: Record } = {}; + if (existing.canonical_name !== name) { + updates.canonicalName = name; + } + // Merge config metadata into existing + if (cfg?.metadata && Object.keys(cfg.metadata).length > 0) { + const merged = mergeMetadata(existing.metadata, cfg.metadata as Record); + if (merged) updates.metadata = merged; + } + if (Object.keys(updates).length > 0) { + update(existing.id, updates); + } + // Add email alias if not present + if (email) addAlias(existing.id, "email", email, "auto"); + // Add config aliases + if (cfg?.aliases) { + for (const a of cfg.aliases) { + addAlias(existing.id, a.type as AliasType, a.value, "config"); + } + } + return getSelfEntity(); + } + + // Create self entity + const aliases: Array<{ type: AliasType; value: string; source?: string }> = []; + if (email) aliases.push({ type: "email", value: email, source: "auto" }); + if (cfg?.aliases) { + for (const a of cfg.aliases) { + aliases.push({ type: a.type as AliasType, value: a.value, source: "config" }); + } + } + + const result = create({ + projectPath, + entityType: "self", + canonicalName: name, + aliases, + metadata: cfg?.metadata as Record | undefined, + crossProject: true, + }); + + return result.id ? getSelfEntity() : null; +} + +// --------------------------------------------------------------------------- +// CRUD — Read +// --------------------------------------------------------------------------- + /** Get a single entity by ID. */ export function get(id: string): Entity | null { return ( @@ -233,6 +380,55 @@ export function getWithAliases(id: string): EntityWithAliases | null { return { ...entity, aliases }; } +// --------------------------------------------------------------------------- +// Metadata helpers +// --------------------------------------------------------------------------- + +/** + * Shallow-merge incoming metadata into existing metadata. + * Existing non-empty values win (first observation preserved); new keys fill gaps. + * Returns the merged object, or null if both inputs are empty. + */ +export function mergeMetadata( + existing: string | null, + incoming: Record | undefined, +): Record | null { + if (!incoming || Object.keys(incoming).length === 0) { + return existing ? (JSON.parse(existing) as Record) : null; + } + const base = existing ? (JSON.parse(existing) as Record) : {}; + // Start from incoming, then overlay existing non-empty values + const merged: Record = { ...incoming }; + for (const [k, v] of Object.entries(base)) { + if (v !== null && v !== undefined && v !== "") { + merged[k] = v; + } + } + return Object.keys(merged).length > 0 ? merged : null; +} + +/** + * Format metadata for prompt injection — role/description only, max 80 chars. + * Notes are omitted (too noisy for system prompts). + */ +function formatMetadataBrief(metadataJson: string | null): string { + if (!metadataJson) return ""; + try { + const m = JSON.parse(metadataJson) as Record; + const parts: string[] = []; + if (typeof m.role === "string" && m.role) parts.push(m.role); + if (typeof m.description === "string" && m.description && m.description !== m.role) { + parts.push(`"${m.description}"`); + } + if (!parts.length) return ""; + const joined = parts.join("; "); + const truncated = joined.length > 80 ? joined.slice(0, 77) + "..." : joined; + return ` — ${truncated}`; + } catch { + return ""; + } +} + // --------------------------------------------------------------------------- // CRUD — Aliases // --------------------------------------------------------------------------- @@ -580,6 +776,16 @@ export function merge(targetId: string, sourceId: string): void { `UPDATE OR IGNORE knowledge_entity_refs SET entity_id = ? WHERE entity_id = ?`, ).run(targetId, sourceId); + // Move relations from source to target (update both sides) + d.query( + `UPDATE OR IGNORE entity_relations SET entity_a = ? WHERE entity_a = ?`, + ).run(targetId, sourceId); + d.query( + `UPDATE OR IGNORE entity_relations SET entity_b = ? WHERE entity_b = ?`, + ).run(targetId, sourceId); + // Clean up any remaining source relations (UNIQUE conflict → left behind by OR IGNORE) + d.query("DELETE FROM entity_relations WHERE entity_a = ? OR entity_b = ?").run(sourceId, sourceId); + // Delete source — explicit alias delete so FTS5 triggers fire // (CASCADE deletes don't fire AFTER DELETE triggers in SQLite) d.query("DELETE FROM knowledge_entity_refs WHERE entity_id = ?").run(sourceId); @@ -596,6 +802,117 @@ export function merge(targetId: string, sourceId: string): void { } } +// --------------------------------------------------------------------------- +// CRUD — Relations +// --------------------------------------------------------------------------- + +/** + * Add a relationship between two entities. Silently ignores duplicates + * (UNIQUE constraint on entity_a + entity_b + relation). + * Returns the relation ID or null if already exists. + */ +export function addRelation( + entityA: string, + entityB: string, + relation: RelationType, + opts?: { metadata?: Record; source?: string }, +): string | null { + if (!RELATION_TYPES.includes(relation)) { + throw new Error(`invalid relation type: ${relation}`); + } + const id = uuidv7(); + const now = Date.now(); + try { + db() + .query( + `INSERT INTO entity_relations (id, entity_a, entity_b, relation, metadata, source, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, + ) + .run( + id, + entityA, + entityB, + relation, + opts?.metadata ? JSON.stringify(opts.metadata) : null, + opts?.source ?? null, + now, + now, + ); + return id; + } catch (e: unknown) { + if (e instanceof Error && /UNIQUE constraint/i.test(e.message)) { + log.info(`relation already exists: ${entityA} → ${entityB} (${relation})`); + return null; + } + throw e; + } +} + +/** Remove a relation by its ID. */ +export function removeRelation(id: string): void { + db().query("DELETE FROM entity_relations WHERE id = ?").run(id); +} + +/** + * Get all relations for an entity (either side), with the other entity's + * name and type resolved for display. + */ +export function relationsFor(entityId: string): EntityRelationResolved[] { + const rows = db() + .query( + `SELECT r.*, + CASE WHEN r.entity_a = ? THEN r.entity_b ELSE r.entity_a END AS other_id, + CASE WHEN r.entity_a = ? THEN eb.canonical_name ELSE ea.canonical_name END AS other_name, + CASE WHEN r.entity_a = ? THEN eb.entity_type ELSE ea.entity_type END AS other_type + FROM entity_relations r + JOIN entities ea ON ea.id = r.entity_a + JOIN entities eb ON eb.id = r.entity_b + WHERE r.entity_a = ? OR r.entity_b = ? + ORDER BY r.relation, other_name`, + ) + .all(entityId, entityId, entityId, entityId, entityId) as EntityRelationResolved[]; + return rows; +} + +/** + * Look up specific relation(s) between two entities. If `relation` is provided, + * returns at most one row; otherwise returns all relations between the pair. + */ +export function getRelation( + entityA: string, + entityB: string, + relation?: RelationType, +): EntityRelation[] { + if (relation) { + const row = db() + .query( + `SELECT * FROM entity_relations + WHERE ((entity_a = ? AND entity_b = ?) OR (entity_a = ? AND entity_b = ?)) + AND relation = ? + LIMIT 1`, + ) + .get(entityA, entityB, entityB, entityA, relation) as EntityRelation | null; + return row ? [row] : []; + } + return db() + .query( + `SELECT * FROM entity_relations + WHERE (entity_a = ? AND entity_b = ?) OR (entity_a = ? AND entity_b = ?) + ORDER BY relation`, + ) + .all(entityA, entityB, entityB, entityA) as EntityRelation[]; +} + +/** + * Format relations for an entity as a concise string for prompt injection. + * Example: "friend of Melkey, colleague of Alice" + */ +export function formatRelationsForPrompt(entityId: string): string { + const rels = relationsFor(entityId); + if (!rels.length) return ""; + return rels.map((r) => `${r.relation} of ${r.other_name}`).join(", "); +} + // --------------------------------------------------------------------------- // Knowledge–Entity References // --------------------------------------------------------------------------- @@ -646,20 +963,91 @@ export function knowledgeForEntity(entityId: string): string[] { return rows.map((r) => r.knowledge_id); } +// --------------------------------------------------------------------------- +// Session injection — hybrid cap-based entity selection +// --------------------------------------------------------------------------- + +/** + * Select entities to inject into the agent system prompt. + * + * - If total count ≤ maxEntityInject (default 30): return all + * - If count exceeds cap: + * - Always include: self entity + entities with direct relationships to self + * - Relevance-rank the rest by: knowledge ref count, recency of linked knowledge + * - Return up to `maxEntityInject` entities + * + * The curator always uses `forProject()` directly (needs the full list). + */ +export function entitiesForSession( + projectPath: string, + maxInject?: number, +): EntityWithAliases[] { + const cap = maxInject ?? config().knowledge.maxEntityInject; + if (cap === 0) return []; + + const all = forProject(projectPath); + if (all.length <= cap) return all; + + // Always include self entity + entities related to self + const selfEntity = all.find((e) => e.entity_type === "self"); + const alwaysInclude = new Set(); + if (selfEntity) { + alwaysInclude.add(selfEntity.id); + const selfRels = relationsFor(selfEntity.id); + for (const r of selfRels) { + alwaysInclude.add(r.other_id); + } + } + + const guaranteed = all.filter((e) => alwaysInclude.has(e.id)); + const remaining = all.filter((e) => !alwaysInclude.has(e.id)); + + if (guaranteed.length >= cap) { + return guaranteed.slice(0, cap); + } + + // Relevance-rank remaining by knowledge ref count (more refs = more relevant) + const slots = cap - guaranteed.length; + const scored = remaining.map((e) => { + const refCount = knowledgeForEntity(e.id).length; + return { entity: e, score: refCount }; + }); + scored.sort((a, b) => b.score - a.score); + + return [...guaranteed, ...scored.slice(0, slots).map((s) => s.entity)]; +} + // --------------------------------------------------------------------------- // Formatting for prompts // --------------------------------------------------------------------------- /** * Format entities for injection into the curator prompt or system prompt. - * Groups by type, includes aliases. + * Groups by type, includes aliases, metadata brief, and relationship tags. + * + * The self entity is marked with " — you (the user)" and other entities + * that have relationships with the self entity get a `[relation]` tag. */ export function formatForPrompt(entities: EntityWithAliases[]): string { if (!entities.length) return ""; + // Build a map of self-entity relationships for tagging + const selfEntity = entities.find((e) => e.entity_type === "self"); + const selfRelMap = new Map(); // entityId → [relation names] + if (selfEntity) { + const selfRels = relationsFor(selfEntity.id); + for (const r of selfRels) { + const rels = selfRelMap.get(r.other_id) ?? []; + rels.push(r.relation); + selfRelMap.set(r.other_id, rels); + } + } + + // Group entities — show "self" under "person" since it's a person const grouped: Record = {}; for (const e of entities) { - const group = grouped[e.entity_type] ?? (grouped[e.entity_type] = []); + const displayType = e.entity_type === "self" ? "person" : e.entity_type; + const group = grouped[displayType] ?? (grouped[displayType] = []); group.push(e); } @@ -671,7 +1059,20 @@ export function formatForPrompt(entities: EntityWithAliases[]): string { .filter((a) => a.alias_value !== e.canonical_name) // skip canonical dupe .map((a) => `${a.alias_type}:${a.alias_value}`); const aliasInfo = aliasStrs.length ? ` (aliases: ${aliasStrs.join(", ")})` : ""; - lines.push(` - [${e.id}] ${e.canonical_name}${aliasInfo}`); + + // Self-entity marker + const selfMarker = e.entity_type === "self" ? " — you (the user)" : ""; + + // Metadata brief (role/description) + const metaInfo = e.entity_type === "self" ? "" : formatMetadataBrief(e.metadata); + + // Relationship tags from self entity (e.g. [friend]) + const relTags = selfRelMap.get(e.id); + const relInfo = relTags?.length ? ` [${relTags.join(", ")}]` : ""; + + lines.push( + ` - [${e.id}] ${e.canonical_name}${aliasInfo}${selfMarker}${metaInfo}${relInfo}`, + ); } } diff --git a/packages/core/src/git.ts b/packages/core/src/git.ts index 4c78ea76..a5f22e9e 100644 --- a/packages/core/src/git.ts +++ b/packages/core/src/git.ts @@ -146,3 +146,56 @@ export function getGitRemote(path: string): string | null { return null; } } + +// --------------------------------------------------------------------------- +// Git user identity +// --------------------------------------------------------------------------- + +/** Cached git user identity (process-lifetime, like gitRemoteCache). */ +const gitUserCache = new Map(); + +/** + * Clear the in-memory git user cache. For test isolation. + */ +export function clearGitUserCache(): void { + gitUserCache.clear(); +} + +/** + * Get the git user.name and user.email for a repository at the given path. + * + * Results are cached in-memory for the process lifetime. + * Returns `{ name: null, email: null }` if not in a git repo or git is not installed. + * Skipped in hosted mode — never run git subprocesses with client-controlled cwd. + */ +export function getGitUser(path: string): { name: string | null; email: string | null } { + if (isHostedMode()) return { name: null, email: null }; + + const cached = gitUserCache.get(path); + if (cached !== undefined) return cached; + + const result = { name: null as string | null, email: null as string | null }; + try { + result.name = execSync("git config user.name", { + cwd: path, + encoding: "utf-8", + timeout: 5000, + stdio: ["pipe", "pipe", "pipe"], + }).trim() || null; + } catch { + // git not installed, not a repo, or user.name not set + } + try { + result.email = execSync("git config user.email", { + cwd: path, + encoding: "utf-8", + timeout: 5000, + stdio: ["pipe", "pipe", "pipe"], + }).trim() || null; + } catch { + // git not installed, not a repo, or user.email not set + } + + gitUserCache.set(path, result); + return result; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 36224d73..bb5c30a1 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -89,7 +89,7 @@ export { getInstanceId, close, } from "./db"; -export { normalizeRemoteUrl, getGitRemote, clearGitRemoteCache } from "./git"; +export { normalizeRemoteUrl, getGitRemote, clearGitRemoteCache, getGitUser, clearGitUserCache } from "./git"; export { enableHostedMode, isHostedMode } from "./hosted"; export { transform, diff --git a/packages/core/src/prompt.ts b/packages/core/src/prompt.ts index 9d5e8fb9..5ab21c5c 100644 --- a/packages/core/src/prompt.ts +++ b/packages/core/src/prompt.ts @@ -417,25 +417,45 @@ ENTITY GROUNDING — resolve ambiguous references to canonical names: - If you detect a person, service, tool, organization, repo, or infrastructure component NOT in the known entities list, include it in a top-level "entities" field in your response: { - "ops": [ ... ], // knowledge operations (same format as before) - "entities": [ // NEW — detected entities not already known + "ops": [ ... ], + "entities": [ { "type": "person" | "org" | "service" | "tool" | "repo" | "infra", "canonical_name": "Full Canonical Name", "aliases": [ { "type": "name" | "email" | "github" | "slack" | "nickname" | "url" | "domain", "value": "..." } - ] + ], + "metadata": { + "description": "brief factual description (e.g. 'CI/CD platform', 'Twitch streamer')", + "role": "relationship/role relative to user (e.g. 'backend lead', 'my manager', 'contractor')" + } + } + ], + "relations": [ + { + "entity_a": "Canonical Name A", + "entity_b": "Canonical Name B", + "relation": "friend" | "colleague" | "manager" | "report" | "collaborator" | "client" | "mentor" | "partner", + "metadata": { "context": "optional note about the relationship" } } ] } +- Include metadata only when the conversation provides clear context about an entity's + role or description. Omit metadata fields you're unsure about — don't guess. +- For EXISTING entities: if the conversation reveals new metadata (role, description) + for a known entity, include that entity in "entities" with only the new metadata fields. + Use the exact canonical_name so the system can merge the metadata. - Only propose new entities when you are confident they are real, recurring references — not one-off mentions of generic concepts. People, services, and tools referenced by name are good candidates. Generic phrases like "the database" or "the CI" are not unless they map to a specific known service. - If the entity list is provided and a mention matches a known entity, use its canonical name in knowledge entries — do not propose a new entity. +- Only create relations when the conversation explicitly states a relationship. + "Melkey and I are friends" → relation. "I talked to Melkey" → no relation (just a mention). + Use the user's canonical name (marked "you (the user)" in the entity list) for self-references. -If nothing warrants extraction, return: { "ops": [], "entities": [] } +If nothing warrants extraction, return: { "ops": [], "entities": [], "relations": [] } The response may also be a plain JSON array of ops (backward compatible): [] Output ONLY valid JSON. No markdown fences, no explanation, no preamble.`; @@ -478,7 +498,9 @@ IMPORTANT: 7. If a user CHANGED a preference ("switched from X to Y", "no longer use X", "moved to Y"), find the existing entry about X and UPDATE it — do not leave contradictory entries. 8. Resolve ambiguous references (pronouns, nicknames, abbreviations) to canonical names from - the entity list. If you detect new recurring entities, include them in the "entities" field.`; + the entity list. If you detect new recurring entities, include them in the "entities" field. +9. If the conversation reveals relationships between entities (friend, colleague, manager, etc.), + include them in the "relations" field. Only explicit statements — not inferred from context.`; } /** diff --git a/packages/core/test/db.test.ts b/packages/core/test/db.test.ts index a36ecefe..d34d9143 100644 --- a/packages/core/test/db.test.ts +++ b/packages/core/test/db.test.ts @@ -23,7 +23,7 @@ describe("db", () => { const row = db().query("SELECT version FROM schema_version").get() as { version: number; }; - expect(row.version).toBe(27); + expect(row.version).toBe(28); }); test("distillation_fts virtual table exists", () => { diff --git a/packages/core/test/entities.test.ts b/packages/core/test/entities.test.ts new file mode 100644 index 00000000..1ea5a005 --- /dev/null +++ b/packages/core/test/entities.test.ts @@ -0,0 +1,625 @@ +import { describe, test, expect, beforeEach } from "bun:test"; +import { db } from "../src/db"; +import * as entities from "../src/entities"; +import { parseResponse, applyOps } from "../src/curator"; + +const PROJECT = "/test/entities/project"; +const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/; + +function cleanup() { + const d = db(); + d.exec("DELETE FROM entity_relations"); + d.exec("DELETE FROM knowledge_entity_refs"); + d.exec("DELETE FROM entity_aliases"); + d.exec("DELETE FROM entities"); +} + +describe("entities", () => { + beforeEach(cleanup); + + // --------------------------------------------------------------------------- + // Metadata + // --------------------------------------------------------------------------- + + describe("metadata", () => { + test("create with metadata stores and retrieves it", () => { + const result = entities.create({ + projectPath: PROJECT, + entityType: "person", + canonicalName: "Alice", + metadata: { role: "backend lead", description: "works on auth" }, + }); + expect(result.created).toBe(true); + const entity = entities.get(result.id); + expect(entity).not.toBeNull(); + expect(entity!.metadata).not.toBeNull(); + const meta = JSON.parse(entity!.metadata!); + expect(meta.role).toBe("backend lead"); + expect(meta.description).toBe("works on auth"); + }); + + test("create without metadata stores null", () => { + const result = entities.create({ + projectPath: PROJECT, + entityType: "person", + canonicalName: "Bob", + }); + const entity = entities.get(result.id); + expect(entity!.metadata).toBeNull(); + }); + + test("dedup merges metadata — existing non-empty values win", () => { + const first = entities.create({ + projectPath: PROJECT, + entityType: "person", + canonicalName: "Charlie", + metadata: { role: "frontend dev", description: "works on UI" }, + }); + expect(first.created).toBe(true); + + // Second create with same name — should dedup and merge + const second = entities.create({ + projectPath: PROJECT, + entityType: "person", + canonicalName: "Charlie", + metadata: { role: "backend dev", notes: "joined in 2024" }, + }); + expect(second.created).toBe(false); + expect(second.id).toBe(first.id); + + const entity = entities.get(first.id); + const meta = JSON.parse(entity!.metadata!); + expect(meta.role).toBe("frontend dev"); // existing wins + expect(meta.description).toBe("works on UI"); // preserved + expect(meta.notes).toBe("joined in 2024"); // new key fills gap + }); + + test("dedup with null incoming metadata does not clobber existing", () => { + const first = entities.create({ + projectPath: PROJECT, + entityType: "person", + canonicalName: "Dana", + metadata: { role: "designer" }, + }); + const second = entities.create({ + projectPath: PROJECT, + entityType: "person", + canonicalName: "Dana", + }); + expect(second.id).toBe(first.id); + const entity = entities.get(first.id); + const meta = JSON.parse(entity!.metadata!); + expect(meta.role).toBe("designer"); // unchanged + }); + + test("update replaces metadata", () => { + const result = entities.create({ + projectPath: PROJECT, + entityType: "person", + canonicalName: "Eve", + metadata: { role: "intern" }, + }); + entities.update(result.id, { metadata: { role: "senior", description: "promoted" } }); + const entity = entities.get(result.id); + const meta = JSON.parse(entity!.metadata!); + expect(meta.role).toBe("senior"); + expect(meta.description).toBe("promoted"); + }); + }); + + // --------------------------------------------------------------------------- + // mergeMetadata + // --------------------------------------------------------------------------- + + describe("mergeMetadata", () => { + test("null existing + incoming → incoming", () => { + const result = entities.mergeMetadata(null, { role: "dev" }); + expect(result).toEqual({ role: "dev" }); + }); + + test("existing + undefined incoming → existing", () => { + const result = entities.mergeMetadata('{"role":"dev"}', undefined); + expect(result).toEqual({ role: "dev" }); + }); + + test("existing + empty incoming → existing", () => { + const result = entities.mergeMetadata('{"role":"dev"}', {}); + expect(result).toEqual({ role: "dev" }); + }); + + test("existing wins on conflict", () => { + const result = entities.mergeMetadata( + '{"role":"senior","description":"auth team"}', + { role: "junior", notes: "new hire" }, + ); + expect(result).toEqual({ + role: "senior", + description: "auth team", + notes: "new hire", + }); + }); + + test("null existing + empty incoming → null", () => { + const result = entities.mergeMetadata(null, {}); + expect(result).toBeNull(); + }); + + test("null + undefined → null", () => { + const result = entities.mergeMetadata(null, undefined); + expect(result).toBeNull(); + }); + + test("existing empty string values are overwritten by incoming", () => { + const result = entities.mergeMetadata( + '{"role":"","description":"auth team"}', + { role: "dev" }, + ); + expect(result!.role).toBe("dev"); // empty string does not win + expect(result!.description).toBe("auth team"); + }); + }); + + // --------------------------------------------------------------------------- + // Type-based cross_project defaults + // --------------------------------------------------------------------------- + + describe("cross_project defaults", () => { + test("person defaults to cross-project", () => { + const result = entities.create({ + projectPath: PROJECT, + entityType: "person", + canonicalName: "CrossPerson", + }); + const entity = entities.get(result.id); + expect(entity!.cross_project).toBe(1); + }); + + test("repo defaults to project-scoped", () => { + const result = entities.create({ + projectPath: PROJECT, + entityType: "repo", + canonicalName: "my-repo", + }); + const entity = entities.get(result.id); + expect(entity!.cross_project).toBe(0); + }); + + test("infra defaults to project-scoped", () => { + const result = entities.create({ + projectPath: PROJECT, + entityType: "infra", + canonicalName: "staging-server", + }); + const entity = entities.get(result.id); + expect(entity!.cross_project).toBe(0); + }); + + test("explicit crossProject overrides default", () => { + const result = entities.create({ + projectPath: PROJECT, + entityType: "repo", + canonicalName: "shared-repo", + crossProject: true, + }); + const entity = entities.get(result.id); + expect(entity!.cross_project).toBe(1); + }); + + test("self is always cross-project", () => { + const result = entities.create({ + projectPath: PROJECT, + entityType: "self", + canonicalName: "TestUser", + }); + const entity = entities.get(result.id); + expect(entity!.cross_project).toBe(1); + }); + }); + + // --------------------------------------------------------------------------- + // Self-entity + // --------------------------------------------------------------------------- + + describe("self entity", () => { + test("getSelfEntity returns null when no self entity exists", () => { + expect(entities.getSelfEntity()).toBeNull(); + }); + + test("create self entity and retrieve it", () => { + const result = entities.create({ + projectPath: PROJECT, + entityType: "self", + canonicalName: "Test User", + metadata: { description: "developer" }, + }); + expect(result.created).toBe(true); + + const self = entities.getSelfEntity(); + expect(self).not.toBeNull(); + expect(self!.entity_type).toBe("self"); + expect(self!.canonical_name).toBe("Test User"); + }); + }); + + // --------------------------------------------------------------------------- + // Relations + // --------------------------------------------------------------------------- + + describe("relations", () => { + test("addRelation creates a relation between two entities", () => { + const a = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "RelA" }); + const b = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "RelB" }); + + const relId = entities.addRelation(a.id, b.id, "friend", { source: "manual" }); + expect(relId).not.toBeNull(); + expect(relId).toMatch(UUID_RE); + }); + + test("addRelation rejects duplicate relation", () => { + const a = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "DupA" }); + const b = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "DupB" }); + + const first = entities.addRelation(a.id, b.id, "colleague"); + expect(first).not.toBeNull(); + const second = entities.addRelation(a.id, b.id, "colleague"); + expect(second).toBeNull(); + }); + + test("multiple relation types between same pair", () => { + const a = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "MultiA" }); + const b = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "MultiB" }); + + expect(entities.addRelation(a.id, b.id, "friend")).not.toBeNull(); + expect(entities.addRelation(a.id, b.id, "colleague")).not.toBeNull(); + + const rels = entities.relationsFor(a.id); + expect(rels.length).toBe(2); + }); + + test("relationsFor returns relations from both sides", () => { + const a = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "SideA" }); + const b = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "SideB" }); + + entities.addRelation(a.id, b.id, "friend"); + + const relsA = entities.relationsFor(a.id); + expect(relsA.length).toBe(1); + expect(relsA[0].other_name).toBe("SideB"); + + const relsB = entities.relationsFor(b.id); + expect(relsB.length).toBe(1); + expect(relsB[0].other_name).toBe("SideA"); + }); + + test("removeRelation deletes a relation", () => { + const a = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "RmA" }); + const b = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "RmB" }); + + const relId = entities.addRelation(a.id, b.id, "mentor")!; + expect(entities.relationsFor(a.id).length).toBe(1); + + entities.removeRelation(relId); + expect(entities.relationsFor(a.id).length).toBe(0); + }); + + test("getRelation finds specific relation between pair", () => { + const a = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "GetA" }); + const b = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "GetB" }); + + entities.addRelation(a.id, b.id, "partner"); + const rels = entities.getRelation(a.id, b.id, "partner"); + expect(rels.length).toBe(1); + expect(rels[0].relation).toBe("partner"); + }); + + test("getRelation finds bidirectionally", () => { + const a = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "BiA" }); + const b = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "BiB" }); + + entities.addRelation(a.id, b.id, "friend"); + // Query with reversed order + const rels = entities.getRelation(b.id, a.id, "friend"); + expect(rels.length).toBe(1); + }); + + test("removing entity cleans up relations", () => { + const a = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "CleanA" }); + const b = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "CleanB" }); + + entities.addRelation(a.id, b.id, "colleague"); + entities.remove(a.id); + expect(entities.relationsFor(b.id).length).toBe(0); + }); + + test("addRelation with metadata", () => { + const a = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "MetaRelA" }); + const b = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "MetaRelB" }); + + entities.addRelation(a.id, b.id, "friend", { metadata: { context: "met at conference" } }); + const rels = entities.getRelation(a.id, b.id, "friend"); + expect(rels.length).toBe(1); + const meta = JSON.parse(rels[0].metadata!); + expect(meta.context).toBe("met at conference"); + }); + + test("formatRelationsForPrompt produces concise output", () => { + const a = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "FmtA" }); + const b = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "FmtB" }); + const c = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "FmtC" }); + + entities.addRelation(a.id, b.id, "friend"); + entities.addRelation(a.id, c.id, "colleague"); + + const result = entities.formatRelationsForPrompt(a.id); + expect(result).toContain("friend of FmtB"); + expect(result).toContain("colleague of FmtC"); + }); + }); + + // --------------------------------------------------------------------------- + // formatForPrompt + // --------------------------------------------------------------------------- + + describe("formatForPrompt", () => { + test("includes metadata brief for non-self entities", () => { + const result = entities.create({ + projectPath: PROJECT, + entityType: "person", + canonicalName: "PromptPerson", + metadata: { role: "backend lead", description: "works on infra" }, + }); + const all = entities.forProject(PROJECT); + const output = entities.formatForPrompt(all); + expect(output).toContain("PromptPerson"); + expect(output).toContain("backend lead"); + expect(output).toContain('"works on infra"'); + }); + + test("self entity gets 'you (the user)' marker", () => { + entities.create({ + projectPath: PROJECT, + entityType: "self", + canonicalName: "TestSelf", + }); + const all = entities.forProject(PROJECT); + const output = entities.formatForPrompt(all); + expect(output).toContain("TestSelf"); + expect(output).toContain("you (the user)"); + }); + + test("self entity is grouped under 'person'", () => { + entities.create({ + projectPath: PROJECT, + entityType: "self", + canonicalName: "GroupSelf", + }); + const all = entities.forProject(PROJECT); + const output = entities.formatForPrompt(all); + expect(output).toContain("person:"); + expect(output).not.toContain("self:"); + }); + + test("relationship tags shown for entities related to self", () => { + const self = entities.create({ + projectPath: PROJECT, + entityType: "self", + canonicalName: "RelSelf", + }); + const other = entities.create({ + projectPath: PROJECT, + entityType: "person", + canonicalName: "RelOther", + }); + entities.addRelation(self.id, other.id, "friend"); + + const all = entities.forProject(PROJECT); + const output = entities.formatForPrompt(all); + expect(output).toContain("[friend]"); + }); + + test("metadata notes not shown in prompt", () => { + entities.create({ + projectPath: PROJECT, + entityType: "person", + canonicalName: "NotesPerson", + metadata: { notes: "internal detail not for prompt" }, + }); + const all = entities.forProject(PROJECT); + const output = entities.formatForPrompt(all); + expect(output).not.toContain("internal detail not for prompt"); + }); + + test("empty entities returns empty string", () => { + expect(entities.formatForPrompt([])).toBe(""); + }); + }); + + // --------------------------------------------------------------------------- + // entitiesForSession + // --------------------------------------------------------------------------- + + describe("entitiesForSession", () => { + test("returns all entities when count <= cap", () => { + entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "Sess1" }); + entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "Sess2" }); + + const result = entities.entitiesForSession(PROJECT, 30); + expect(result.length).toBe(2); + }); + + test("returns empty when maxInject is 0", () => { + entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "Zero1" }); + const result = entities.entitiesForSession(PROJECT, 0); + expect(result.length).toBe(0); + }); + + test("self entity always included when over cap", () => { + const self = entities.create({ + projectPath: PROJECT, + entityType: "self", + canonicalName: "CapSelf", + }); + // Create enough entities to exceed cap of 2 + entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "Cap1" }); + entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "Cap2" }); + entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "Cap3" }); + + const result = entities.entitiesForSession(PROJECT, 2); + expect(result.length).toBe(2); + const selfIncluded = result.some((e) => e.entity_type === "self"); + expect(selfIncluded).toBe(true); + }); + + test("entities related to self are prioritized when over cap", () => { + const self = entities.create({ + projectPath: PROJECT, + entityType: "self", + canonicalName: "PriSelf", + }); + const friend = entities.create({ + projectPath: PROJECT, + entityType: "person", + canonicalName: "PriFriend", + }); + entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "PriOther1" }); + entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "PriOther2" }); + + entities.addRelation(self.id, friend.id, "friend"); + + const result = entities.entitiesForSession(PROJECT, 2); + expect(result.length).toBe(2); + const ids = result.map((e) => e.id); + expect(ids).toContain(self.id); + expect(ids).toContain(friend.id); + }); + }); + + // --------------------------------------------------------------------------- + // Curator integration: parseResponse + applyOps + // --------------------------------------------------------------------------- + + describe("curator integration", () => { + test("parseResponse handles entities with metadata", () => { + const response = parseResponse(JSON.stringify({ + ops: [], + entities: [ + { + type: "person", + canonical_name: "CuratorPerson", + aliases: [{ type: "github", value: "@curator" }], + metadata: { role: "reviewer", description: "code reviewer" }, + }, + ], + relations: [], + })); + expect(response.entities.length).toBe(1); + expect(response.entities[0].metadata).toEqual({ role: "reviewer", description: "code reviewer" }); + }); + + test("parseResponse filters invalid metadata values", () => { + const response = parseResponse(JSON.stringify({ + ops: [], + entities: [ + { + type: "person", + canonical_name: "FilterPerson", + metadata: { role: "valid", bad: 123, empty: "", toolong: "x".repeat(501) }, + }, + ], + relations: [], + })); + expect(response.entities[0].metadata).toEqual({ role: "valid" }); + }); + + test("parseResponse handles relations", () => { + const response = parseResponse(JSON.stringify({ + ops: [], + entities: [], + relations: [ + { entity_a: "Alice", entity_b: "Bob", relation: "friend" }, + { entity_a: "Alice", entity_b: "Bob", relation: "invalid_type" }, // filtered + ], + })); + expect(response.relations.length).toBe(1); + expect(response.relations[0].relation).toBe("friend"); + }); + + test("applyOps creates entities with metadata", () => { + const result = applyOps([], { + projectPath: PROJECT, + detectedEntities: [ + { + type: "service", + canonical_name: "ApplyService", + metadata: { description: "CI/CD platform" }, + }, + ], + }); + expect(result.entitiesCreated).toBe(1); + + const resolved = entities.resolve("ApplyService"); + expect(resolved).not.toBeNull(); + const meta = JSON.parse(resolved!.metadata!); + expect(meta.description).toBe("CI/CD platform"); + }); + + test("applyOps creates relations between known entities", () => { + const a = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "OpRelA" }); + const b = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "OpRelB" }); + + const result = applyOps([], { + projectPath: PROJECT, + detectedRelations: [ + { entity_a: "OpRelA", entity_b: "OpRelB", relation: "colleague" }, + ], + }); + expect(result.relationsCreated).toBe(1); + + const rels = entities.getRelation(a.id, b.id, "colleague"); + expect(rels.length).toBe(1); + }); + + test("applyOps skips relations for unknown entities", () => { + entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "KnownPerson" }); + + const result = applyOps([], { + projectPath: PROJECT, + detectedRelations: [ + { entity_a: "KnownPerson", entity_b: "UnknownPerson", relation: "friend" }, + ], + }); + expect(result.relationsCreated).toBe(0); + }); + + test("legacy array format still works", () => { + const response = parseResponse(JSON.stringify([ + { op: "create", category: "decision", title: "test", content: "test content", scope: "project" }, + ])); + expect(response.ops.length).toBe(1); + expect(response.entities.length).toBe(0); + expect(response.relations.length).toBe(0); + }); + }); + + // --------------------------------------------------------------------------- + // Merge with relations + // --------------------------------------------------------------------------- + + describe("merge", () => { + test("merge moves relations from source to target", () => { + const target = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "MergeTarget" }); + const source = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "MergeSource" }); + const other = entities.create({ projectPath: PROJECT, entityType: "person", canonicalName: "MergeOther" }); + + entities.addRelation(source.id, other.id, "friend"); + entities.merge(target.id, source.id); + + // Source is deleted + expect(entities.get(source.id)).toBeNull(); + // Relation moved to target + const rels = entities.relationsFor(target.id); + expect(rels.length).toBe(1); + expect(rels[0].other_name).toBe("MergeOther"); + }); + }); +}); diff --git a/packages/gateway/src/cli/entity.ts b/packages/gateway/src/cli/entity.ts index e726e3ae..965e5d7d 100644 --- a/packages/gateway/src/cli/entity.ts +++ b/packages/gateway/src/cli/entity.ts @@ -5,8 +5,11 @@ * list List all entities with aliases * show Show full detail for an entity * add Create a new entity + * edit Edit an entity * alias add --type --value Add an alias to an entity * alias rm Remove an alias + * relation add --relation Add a relation + * relation rm Remove a relation * merge Merge two entities * search Search entities by name or alias * delete Delete an entity @@ -140,6 +143,16 @@ async function cmdShow( } } } + + // Show relationships + const relations = entities.relationsFor(entity.id); + if (relations.length > 0) { + console.log(`\nRelationships (${relations.length}):`); + for (const r of relations) { + const metaStr = r.metadata ? ` ${r.metadata}` : ""; + console.log(` ${r.relation}: ${r.other_name} (${r.other_type})${metaStr} (${r.id.slice(0, 12)})`); + } + } } async function cmdAdd( @@ -166,11 +179,26 @@ async function cmdAdd( const projectPath = resolve((flags.project as string) ?? process.cwd()); const cross = flags.cross !== false; // default true + let metadata: Record | undefined; + if (flags.metadata) { + try { + metadata = JSON.parse(flags.metadata as string); + if (typeof metadata !== "object" || Array.isArray(metadata) || metadata === null) { + console.error("--metadata must be a JSON object"); + process.exit(1); + } + } catch { + console.error("--metadata must be valid JSON"); + process.exit(1); + } + } + const result = entities.create({ projectPath, entityType: entityType as (typeof entities.ENTITY_TYPES)[number], canonicalName: name, crossProject: cross, + metadata, }); if (result.created) { @@ -182,6 +210,66 @@ async function cmdAdd( console.log(` Name: ${name}`); } +async function cmdEdit( + args: string[], + flags: Record, +): Promise { + const id = args[0]; + if (!id) { + console.error("Usage: lore entity edit [--name ] [--metadata ] [--cross]"); + process.exit(1); + } + + const { entities, db } = await import("@loreai/core"); + + // Support prefix matching on ID + let entity = entities.get(id); + if (!entity && id.length < 36) { + const match = db() + .query("SELECT id FROM entities WHERE id LIKE ? LIMIT 1") + .get(`${id}%`) as { id: string } | null; + if (match) entity = entities.get(match.id); + } + if (!entity) { + console.error(`Entity not found: ${id}`); + process.exit(1); + } + + const updates: Record = {}; + + if (flags.name) { + updates.canonicalName = flags.name as string; + } + + if (flags.cross !== undefined) { + updates.crossProject = flags.cross !== false; + } + + if (flags.metadata) { + let parsed: Record; + try { + parsed = JSON.parse(flags.metadata as string); + if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) { + console.error("--metadata must be a JSON object"); + process.exit(1); + } + } catch { + console.error("--metadata must be valid JSON"); + process.exit(1); + } + const merged = entities.mergeMetadata(entity.metadata, parsed!); + updates.metadata = JSON.stringify(merged); + } + + if (Object.keys(updates).length === 0) { + console.error("No changes specified. Use --name, --metadata, or --cross."); + process.exit(1); + } + + entities.update(entity.id, updates); + console.log(`Updated entity: ${entity.canonical_name} (${entity.id})`); +} + async function cmdAliasAdd( args: string[], flags: Record, @@ -234,6 +322,93 @@ async function cmdAliasRm( console.log(`Removed alias: ${aliasId}`); } +async function cmdRelationAdd( + args: string[], + flags: Record, +): Promise { + const idA = args[0]; + const idB = args[1]; + const relation = flags.relation as string; + + if (!idA || !idB || !relation) { + console.error("Usage: lore entity relation add --relation [--metadata ]"); + process.exit(1); + } + + const { entities, db } = await import("@loreai/core"); + + const validRelations = entities.RELATION_TYPES as readonly string[]; + if (!validRelations.includes(relation)) { + console.error(`Invalid relation type: ${relation}`); + console.error(`Valid types: ${entities.RELATION_TYPES.join(", ")}`); + process.exit(1); + } + + // Prefix matching on both IDs + let entityA = entities.get(idA); + if (!entityA && idA.length < 36) { + const match = db() + .query("SELECT id FROM entities WHERE id LIKE ? LIMIT 1") + .get(`${idA}%`) as { id: string } | null; + if (match) entityA = entities.get(match.id); + } + if (!entityA) { + console.error(`Entity A not found: ${idA}`); + process.exit(1); + } + + let entityB = entities.get(idB); + if (!entityB && idB.length < 36) { + const match = db() + .query("SELECT id FROM entities WHERE id LIKE ? LIMIT 1") + .get(`${idB}%`) as { id: string } | null; + if (match) entityB = entities.get(match.id); + } + if (!entityB) { + console.error(`Entity B not found: ${idB}`); + process.exit(1); + } + + let relMetadata: Record | undefined; + if (flags.metadata) { + try { + const parsed = JSON.parse(flags.metadata as string); + if (typeof parsed !== "object" || Array.isArray(parsed) || parsed === null) { + console.error("--metadata must be a JSON object"); + process.exit(1); + } + relMetadata = parsed as Record; + } catch { + console.error("--metadata must be valid JSON"); + process.exit(1); + } + } + + const relId = entities.addRelation( + entityA.id, + entityB.id, + relation as (typeof entities.RELATION_TYPES)[number], + { metadata: relMetadata, source: "manual" }, + ); + console.log(`Added relation: ${entityA.canonical_name} —[${relation}]→ ${entityB.canonical_name}`); + console.log(` Relation ID: ${relId}`); +} + +async function cmdRelationRm( + args: string[], + _flags: Record, +): Promise { + const relationId = args[0]; + if (!relationId) { + console.error("Usage: lore entity relation rm "); + process.exit(1); + } + + const { entities } = await import("@loreai/core"); + entities.removeRelation(relationId); + console.log(`Removed relation: ${relationId}`); +} + async function cmdMerge( args: string[], _flags: Record, @@ -324,8 +499,11 @@ Subcommands: list List all entities show Show entity detail with aliases add Create a new entity + edit Edit an entity alias add --type --value Add an alias alias rm Remove an alias + relation add --relation Add a relation + relation rm Remove a relation merge Merge two entities search Search entities delete Delete an entity @@ -337,6 +515,9 @@ Options: --project Project path (default: cwd) --all List all entities (ignore project scope) --json Output as JSON (list only) + --metadata JSON metadata (add, edit, relation add) + --name New name (edit only) + --cross Cross-project flag (add, edit) `.trim(); export async function commandEntity( @@ -356,6 +537,9 @@ export async function commandEntity( case "add": await cmdAdd(subArgs, values); break; + case "edit": + await cmdEdit(subArgs, values); + break; case "alias": { const aliasCmd = subArgs[0]; const aliasArgs = subArgs.slice(1); @@ -370,6 +554,20 @@ export async function commandEntity( } break; } + case "relation": { + const relCmd = subArgs[0]; + const relArgs = subArgs.slice(1); + if (relCmd === "add") { + await cmdRelationAdd(relArgs, values); + } else if (relCmd === "rm" || relCmd === "remove") { + await cmdRelationRm(relArgs, values); + } else { + console.error(`Unknown relation subcommand: ${relCmd}`); + console.log("Usage: lore entity relation add|rm ..."); + process.exit(1); + } + break; + } case "merge": await cmdMerge(subArgs, values); break; diff --git a/packages/gateway/src/ui.ts b/packages/gateway/src/ui.ts index 0b7b9685..fdbbfa30 100644 --- a/packages/gateway/src/ui.ts +++ b/packages/gateway/src/ui.ts @@ -2476,9 +2476,38 @@ function pageEntity(id: string): string | null { body += `
Cross-project: ${entity.cross_project ? "Yes" : "No"}
`; body += `
Created: ${formatDate(entity.created_at)}
`; body += `
Updated: ${formatDate(entity.updated_at)}
`; + // Metadata section + let parsedMeta: Record = {}; if (entity.metadata) { - body += `
Metadata:
${esc(entity.metadata)}
`; + try { parsedMeta = JSON.parse(entity.metadata); } catch { /* ignore */ } } + const hasMetadata = Object.keys(parsedMeta).length > 0; + if (hasMetadata) { + body += `

Metadata

`; + if (typeof parsedMeta.role === "string" && parsedMeta.role) { + body += `
Role: ${esc(parsedMeta.role)}
`; + } + if (typeof parsedMeta.description === "string" && parsedMeta.description) { + body += `
Description: ${esc(parsedMeta.description)}
`; + } + if (typeof parsedMeta.notes === "string" && parsedMeta.notes) { + body += `
Notes: ${esc(parsedMeta.notes)}
`; + } + // Show any extra keys as raw JSON + const { role, description, notes, ...extra } = parsedMeta as Record; + if (Object.keys(extra).length > 0) { + body += `
Other:
${esc(JSON.stringify(extra, null, 2))}
`; + } + } + + // Metadata edit form + body += `

Edit Metadata

`; + body += `
`; + body += ``; + body += ``; + body += ``; + body += ``; + body += `
`; // Aliases const displayAliases = entity.aliases.filter((a) => a.alias_value !== entity.canonical_name); @@ -2499,6 +2528,22 @@ function pageEntity(id: string): string | null { body += `

Aliases

No additional aliases (only the canonical name).

`; } + // Relationships + const relations = entities.relationsFor(entity.id); + if (relations.length > 0) { + body += `

Relationships (${relations.length})

`; + body += ` + `; + for (const r of relations) { + body += ` + + + + `; + } + body += `
RelationEntityType
${badge(r.relation)}${esc(r.other_name)}${badge(r.other_type)}
`; + } + // Linked knowledge entries const knowledgeIds = entities.knowledgeForEntity(entity.id); if (knowledgeIds.length > 0) { @@ -2631,6 +2676,27 @@ export async function handleUIRequest( return redirect("/ui/entities"); } + // Update entity metadata + const updateEntityMeta = matchRoute(pathname, "/ui/api/update/entity/:id/metadata"); + if (updateEntityMeta) { + const entity = entities.get(updateEntityMeta.id); + if (!entity) return redirect("/ui/entities"); + const formData = await req.formData(); + const existing = entity.metadata ? JSON.parse(entity.metadata) : {}; + const role = (formData.get("role") as string)?.trim() || undefined; + const description = (formData.get("description") as string)?.trim() || undefined; + const notes = (formData.get("notes") as string)?.trim() || undefined; + const metadata: Record = { ...existing }; + // Update known fields — set to value or remove if empty + if (role !== undefined) metadata.role = role; else delete metadata.role; + if (description !== undefined) metadata.description = description; else delete metadata.description; + if (notes !== undefined) metadata.notes = notes; else delete metadata.notes; + entities.update(updateEntityMeta.id, { + metadata: Object.keys(metadata).length > 0 ? metadata : {}, + }); + return redirect(`/ui/entities/${updateEntityMeta.id}`); + } + // Delete knowledge const delKnowledge = matchRoute(pathname, "/ui/api/delete/knowledge/:id"); if (delKnowledge) { From dc4cd9d6fec68ada84fc3778b9d1aa46cf9ff882 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Sun, 24 May 2026 17:47:35 +0000 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20address=20review=20findings=20?= =?UTF-8?q?=E2=80=94=203=20critical=20+=201=20medium?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move metadata merge inside BEGIN IMMEDIATE transaction (race condition) - Fix cmdEdit double-stringify: pass Record to update(), not JSON string - Self entity created with project_id=NULL (global) to prevent duplicates - Add self-relation guard in addRelation (entity_a != entity_b) --- packages/core/src/entities.ts | 14 ++++++++++---- packages/gateway/src/cli/entity.ts | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/packages/core/src/entities.ts b/packages/core/src/entities.ts index 1cfe4b2d..354b4626 100644 --- a/packages/core/src/entities.ts +++ b/packages/core/src/entities.ts @@ -163,8 +163,7 @@ export function create(input: { .get(input.canonicalName) as { id: string; metadata: string | null } | null); if (existing) { - d.exec("COMMIT"); - // Merge metadata into the existing entity (incoming fills gaps, existing wins) + // Merge metadata inside the transaction to avoid race conditions if (input.metadata && Object.keys(input.metadata).length > 0) { const merged = mergeMetadata(existing.metadata, input.metadata); if (merged) { @@ -172,7 +171,9 @@ export function create(input: { .run(JSON.stringify(merged), Date.now(), existing.id); } } - // Add any new aliases to the existing entity (outside transaction) + d.exec("COMMIT"); + // Add any new aliases to the existing entity (outside transaction — + // addAlias has its own error handling for UNIQUE constraint violations) if (input.aliases?.length) { for (const alias of input.aliases) { addAlias(existing.id, alias.type, alias.value, alias.source); @@ -346,7 +347,8 @@ export function ensureSelfEntity(projectPath: string): EntityWithAliases | null } const result = create({ - projectPath, + // No projectPath — self entity is global (project_id=NULL) so it's + // visible across all projects and dedup works correctly. entityType: "self", canonicalName: name, aliases, @@ -817,6 +819,10 @@ export function addRelation( relation: RelationType, opts?: { metadata?: Record; source?: string }, ): string | null { + if (entityA === entityB) { + log.info(`skipping self-referential relation: ${entityA} (${relation})`); + return null; + } if (!RELATION_TYPES.includes(relation)) { throw new Error(`invalid relation type: ${relation}`); } diff --git a/packages/gateway/src/cli/entity.ts b/packages/gateway/src/cli/entity.ts index 965e5d7d..2bf6cc44 100644 --- a/packages/gateway/src/cli/entity.ts +++ b/packages/gateway/src/cli/entity.ts @@ -258,7 +258,7 @@ async function cmdEdit( process.exit(1); } const merged = entities.mergeMetadata(entity.metadata, parsed!); - updates.metadata = JSON.stringify(merged); + updates.metadata = merged ?? {}; } if (Object.keys(updates).length === 0) {