diff --git a/Cargo.lock b/Cargo.lock index a7ae7f7..1063f1b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1329,7 +1329,7 @@ dependencies = [ [[package]] name = "icm-cli" -version = "0.10.49" +version = "0.10.50" dependencies = [ "anyhow", "axum", diff --git a/README.md b/README.md index 7486e15..c1dd948 100644 --- a/README.md +++ b/README.md @@ -92,8 +92,9 @@ it end-to-end: facts seeded through ICM are recalled with 100% accuracy by Claude Code, Gemini CLI, Copilot CLI, Cursor Agent, and Aider — **98% cross-agent efficiency** on the standard test. -If you want isolation (per-project, per-tool, etc.) pass `--db ` -or set `ICM_DB_PATH`; each path is an independent corpus. +If you want isolation (per-project, per-tool, etc.) pass `--db `, +set `ICM_DB`, or use `icm init --per-project` to create a project-local +database under `.icm/`; each path is an independent corpus. ## Install @@ -116,10 +117,18 @@ Re-run the install command to upgrade to the latest release. To pin a version, p ## Setup ```bash -# Auto-detect and configure all supported tools +# Auto-detect and configure all supported tools (global database) icm init + +# Per-project database (stores memories in .icm/memories.db) +icm init --per-project ``` +`--per-project` creates a project-local `.icm/config.toml` at the git +root, so all `icm` commands run from within the project automatically +use an isolated database. Combine with global `icm init` — global +settings (tools, hooks) are unaffected; only the database is scoped. + Configures **17 tools** in one command ([full integration guide](docs/integrations.md)): | Tool | MCP | Hooks | CLI | Skills | @@ -427,12 +436,25 @@ Changing the model automatically re-creates the vector index (existing embedding Single SQLite file. No external services, no network dependency. +Default (global) database location: + ``` ~/Library/Application Support/dev.icm.icm/memories.db # macOS ~/.local/share/dev.icm.icm/memories.db # Linux C:\Users\\AppData\Local\icm\icm\data\memories.db # Windows ``` +Per-project database (created by `icm init --per-project`): + +``` +/.icm/memories.db +``` + +ICM auto-detects a project-local `.icm/config.toml` from the current +working directory. A relative `[store].path` is resolved against the +git root, so all `icm` commands within the project tree use the +scoped database without needing `--db` on every invocation. + ### Configuration ```bash diff --git a/config/default.toml b/config/default.toml index eb8ee8a..70c69e0 100644 --- a/config/default.toml +++ b/config/default.toml @@ -5,11 +5,22 @@ # Linux: ~/.config/icm/config.toml # Windows: C:\Users\\AppData\Roaming\icm\icm\config\config.toml # Or override with: ICM_CONFIG=/path/to/config.toml +# +# Database path resolution (highest priority first): +# 1. --db CLI flag +# 2. ICM_DB environment variable +# 3. [store].path in this config file +# 4. /.icm/config.toml [store].path (auto-detected via git root) +# 5. /.icm/memories.db (auto-detected via git root) +# 6. Platform default data directory (see src/main.rs default_db_path) [store] # SQLite database path (default: platform data dir). # Uncomment to use a custom location: # path = "/custom/path/to/memories.db" +# +# For per-project databases, set ICM_DB or create a .icm/config.toml +# at your project root via `icm init --per-project`. [memory] default_importance = "medium" diff --git a/crates/icm-cli/src/main.rs b/crates/icm-cli/src/main.rs index 02662f2..199eaa3 100644 --- a/crates/icm-cli/src/main.rs +++ b/crates/icm-cli/src/main.rs @@ -308,7 +308,8 @@ enum Commands { /// Also write project-level instruction files into the current /// directory (`CLAUDE.md`, `AGENTS.md`, `.windsurfrules`, - /// `.aider.conventions.md`, `.github/copilot-instructions.md`). + /// `.aider.conventions.md`, `.github/copilot-instructions.md`) + /// and set up a project-local database under `.icm/`. /// Default behavior writes only to global per-tool paths /// (`~/.claude/CLAUDE.md`, `~/.codex/AGENTS.md`, etc.) so init /// doesn't pollute every project tree. @@ -1044,9 +1045,93 @@ fn default_db_path() -> PathBuf { .unwrap_or_else(|| PathBuf::from("memories.db")) } -fn open_store(db: Option, embedding_dims: usize) -> Result { - let path = db.unwrap_or_else(default_db_path); - SqliteStore::with_dims(&path, embedding_dims).context("failed to open database") +/// Detect the project root (git repository root) from the current directory. +fn detect_project_root() -> Option { + std::process::Command::new("git") + .args(["rev-parse", "--show-toplevel"]) + .output() + .ok() + .and_then(|output| { + if output.status.success() { + let path = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if path.is_empty() { None } else { Some(PathBuf::from(path)) } + } else { + None + } + }) +} + +/// Resolve database path using hierarchical resolution: +/// +/// 1. `--db` CLI flag (highest priority) +/// 2. `$ICM_DB` environment variable +/// 3. Global config `[store].path` from config file +/// 4. Project-local `.icm/config.toml` `[store].path` at git root +/// 5. Project-local `.icm/memories.db` at git root (if file exists) +/// 6. Default platform data directory +fn resolve_db_path(cli_db: Option, cfg: &config::Config) -> PathBuf { + // 1. --db CLI flag + if let Some(db) = cli_db { + return db; + } + + // 2. $ICM_DB env var + if let Ok(env_db) = std::env::var("ICM_DB") { + let path = PathBuf::from(env_db); + if !path.as_os_str().is_empty() { + return path; + } + } + + // 3. Global config [store].path + if let Some(config_path) = &cfg.store.path { + let path = PathBuf::from(config_path); + if !path.as_os_str().is_empty() { + return path; + } + } + + // 4. Project-local .icm/ directory (at git root) + if let Some(project_root) = detect_project_root() { + let icm_dir = project_root.join(".icm"); + if icm_dir.is_dir() { + // 4a. .icm/config.toml with [store].path + let project_cfg = icm_dir.join("config.toml"); + if project_cfg.exists() { + if let Ok(content) = std::fs::read_to_string(&project_cfg) { + if let Ok(value) = content.parse::() { + if let Some(path_str) = value + .get("store") + .and_then(|s| s.get("path")) + .and_then(|p| p.as_str()) + { + let path = if Path::new(path_str).is_absolute() { + PathBuf::from(path_str) + } else { + project_root.join(path_str) + }; + if !path.as_os_str().is_empty() { + return path; + } + } + } + } + } + + // 4b. .icm/memories.db (if file exists) + let project_db = icm_dir.join("memories.db"); + if project_db.exists() { + return project_db; + } + } + } + + // 5. Default platform data dir + default_db_path() +} + +fn open_store(db: PathBuf, embedding_dims: usize) -> Result { + SqliteStore::with_dims(&db, embedding_dims).context("failed to open database") } #[cfg(feature = "embeddings")] @@ -1112,7 +1197,9 @@ fn main() -> Result<()> { } } let cli_db: Option = cli.db.into_iter().next(); - let db_path = cli_db.clone().unwrap_or_else(default_db_path); + let db_path = resolve_db_path(cli_db.clone(), &cfg); + // Keep cli_db for later config display (it's cloned so the + // original is still available for cmd_config below). // `icm uninstall` must NOT open the SQLite store: a default // `open_store` call would recreate the DB directory and WAL/SHM files @@ -1125,7 +1212,7 @@ fn main() -> Result<()> { std::process::exit(code); } - let store = open_store(cli_db, embedding_dims)?; + let store = open_store(db_path.clone(), embedding_dims)?; match command { Commands::Store { @@ -1364,7 +1451,7 @@ fn main() -> Result<()> { mode, force, per_project, - } => cmd_init(mode, force, per_project), + } => cmd_init(mode, force, per_project, &db_path), Commands::Doctor => cmd_doctor(), Commands::Uninstall(_) => unreachable!("dispatched before open_store"), Commands::Extract { @@ -1428,7 +1515,7 @@ fn main() -> Result<()> { println!("{result}"); Ok(()) } - Commands::Config => cmd_config(), + Commands::Config => cmd_config(cli_db, &cfg), Commands::Upgrade { apply, check } => upgrade::cmd_upgrade(apply, check), Commands::Bench { count } => cmd_bench(count), Commands::BenchRecall { @@ -3320,7 +3407,7 @@ pub(crate) fn cmd_matches_icm_pattern(cmd: &str, pattern: &str) -> bool { cmd.contains(&format!("{pattern}.exe")) } -fn cmd_init(mode: InitMode, force: bool, per_project: bool) -> Result<()> { +fn cmd_init(mode: InitMode, force: bool, per_project: bool, db_path: &Path) -> Result<()> { let icm_bin = std::env::current_exe().context("cannot determine icm binary path")?; let icm_bin_str = portable_command_path(&icm_bin); let home = home_dir_str()?; @@ -4042,9 +4129,35 @@ Do this BEFORE responding to the user. Not optional. manifest.save(&manifest_path)?; } + // --- Project-local .icm/ setup --- + // When --per-project is set, create a project-local database config + // so ICM uses a separate database per project. This creates: + // /.icm/config.toml with [store] path = ".icm/memories.db" + // On subsequent invocations, the resolver will pick this up. + if per_project { + let project_root = detect_project_root() + .or_else(|| std::env::current_dir().ok()); + if let Some(root) = project_root { + let icm_dir = root.join(".icm"); + if !icm_dir.is_dir() { + std::fs::create_dir_all(&icm_dir) + .with_context(|| format!("creating {}", icm_dir.display()))?; + let project_cfg = icm_dir.join("config.toml"); + std::fs::write( + &project_cfg, + "[store]\npath = \".icm/memories.db\"\n", + ) + .with_context(|| format!("writing {}", project_cfg.display()))?; + println!("[project] created project-local .icm/ at {}", root.display()); + } else { + println!("[project] .icm/ already exists at {}", root.display()); + } + } + } + println!(); println!(" binary: {icm_bin_str}"); - println!(" db: {}", default_db_path().display()); + println!(" db: {}", db_path.display()); if !manifest.is_empty() { println!( " manifest: {} ({} entr{})", @@ -5064,18 +5177,51 @@ fn inject_opencode_mcp_server(config_path: &Path, name: &str, icm_bin: &str) -> Ok("configured".into()) } -fn cmd_config() -> Result<()> { - let cfg = config::load_config()?; +fn cmd_config(cli_db: Option, cfg: &config::Config) -> Result<()> { println!("Config: {}", config::show_config_path()); println!(); println!("[store]"); - println!( - " path = {}", - cfg.store - .path - .as_deref() - .unwrap_or("(default platform path)") - ); + let env_db = std::env::var("ICM_DB").ok(); + let project_root = detect_project_root(); + let resolved = resolve_db_path(cli_db, cfg); + println!(" resolved = {}", resolved.display()); + println!(" path (config) = {}", cfg.store.path.as_deref().unwrap_or("(not set)")); + if let Some(ref env) = env_db { + println!(" ICM_DB (env) = {env}"); + } else { + println!(" ICM_DB (env) = (not set)"); + } + if let Some(root) = &project_root { + println!(); + println!("[project]"); + println!(" root = {}", root.display()); + let icm_dir = root.join(".icm"); + if icm_dir.is_dir() { + println!(" .icm/ exists"); + let project_cfg = icm_dir.join("config.toml"); + if project_cfg.exists() { + if let Ok(content) = std::fs::read_to_string(&project_cfg) { + if let Ok(value) = content.parse::() { + if let Some(path_str) = value + .get("store") + .and_then(|s| s.get("path")) + .and_then(|p| p.as_str()) + { + println!(" .icm/config.toml [store].path = {path_str}"); + } + } + } + } + let project_db = icm_dir.join("memories.db"); + if project_db.exists() { + println!(" .icm/memories.db exists"); + } else { + println!(" .icm/memories.db (not found)"); + } + } else { + println!(" .icm/ (not found)"); + } + } println!(); println!("[memory]"); println!(" default_importance = {}", cfg.memory.default_importance); diff --git a/docs/architecture.md b/docs/architecture.md index bce6bf7..a2e238e 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -402,6 +402,66 @@ Pattern-based scoring. Each sentence gets a score from keyword matches: Sentences below threshold are dropped. Dedup via Jaccard similarity (>0.6 = skip). +## Database Path Resolution + +ICM determines the active SQLite database path through a 6-level hierarchical resolution chain. Each level overrides the ones below it: + +``` +Priority 1: --db CLI flag (highest) +Priority 2: ICM_DB environment variable +Priority 3: Global config [store].path (~/.config/icm/config.toml) +Priority 4: Project-local .icm/config.toml [store].path (detected via git root) +Priority 5: Project-local .icm/memories.db (auto-detected at git root, file must exist) +Priority 6: Default platform data directory (lowest) +``` + +### Resolution flow + +`resolve_db_path()` is called at startup (before `open_store()`): + +``` +resolve_db_path(cli_db, cfg) + ├─ cli_db present? → return it (level 1) + ├─ $ICM_DB set? → return it (level 2) + ├─ cfg.store.path set? → return it (level 3) + ├─ detect_project_root() ok? + │ ├─ .icm/config.toml has [store].path? → return it (level 4) + │ └─ .icm/memories.db exists? → return it (level 5) + └─ default_db_path() → return it (level 6) +``` + +`detect_project_root()` shells out to `git rev-parse --show-toplevel`. If the current directory is not inside a git repository, levels 4-5 are skipped. + +### Per-project database + +`icm init --per-project` creates `/.icm/config.toml`: + +```toml +[store] +path = "memories.db" +``` + +A relative path is resolved against the git root. On subsequent invocations, the resolver picks up level 4 and uses the project-local database. All `icm` commands within that project tree then use the scoped database without needing `--db` on every call. + +Combine with global `icm init` — global settings (tools, hooks) are unaffected; only the database is scoped. + +### Config display + +`icm config` shows the full resolution chain with source tracing: + +``` +[store] + resolved = /home/user/project/.icm/memories.db + path (config) = memories.db + ICM_DB (env) = (not set) + +[project] + root = /home/user/project + .icm/ exists + .icm/config.toml [store].path = memories.db + .icm/memories.db exists +``` + ## Build ```bash diff --git a/docs/guide.md b/docs/guide.md index a292d2a..7451ce3 100644 --- a/docs/guide.md +++ b/docs/guide.md @@ -27,9 +27,26 @@ Re-running the install command upgrades an existing installation in place. ### 2. Setup ```bash +# Global setup — one database for all projects icm init + +# Per-project setup — isolated database per project +icm init --per-project ``` +`--per-project` creates `.icm/config.toml` at the project's git root. All `icm` commands within that project tree automatically use the project-local database, without needing `--db` on every invocation. The database path is resolved hierarchically: + +| Priority | Source | Example | +|----------|--------|---------| +| 1 | `--db` CLI flag | `icm --db /tmp/test.db store ...` | +| 2 | `ICM_DB` env var | `ICM_DB=/tmp/test.db icm recall ...` | +| 3 | Global config | `~/.config/icm/config.toml [store].path` | +| 4 | `.icm/config.toml` | `/.icm/config.toml [store].path` | +| 5 | `.icm/memories.db` | Auto-detected at git root (if file exists) | +| 6 | Platform default | `~/.local/share/icm/memories.db` | + +Run `icm config` to see the active resolution chain with source tracing. + This auto-detects your AI tools and configures the MCP server. Supports 14 tools: Claude Code, Claude Desktop, Cursor, Windsurf, VS Code, Gemini, Zed, Amp, Amazon Q, Cline, Roo Code, Kilo Code, Codex CLI, OpenCode. ### 3. Use diff --git a/plugins/opencode-icm.ts b/plugins/opencode-icm.ts index d7f5f82..0d91223 100644 --- a/plugins/opencode-icm.ts +++ b/plugins/opencode-icm.ts @@ -15,8 +15,11 @@ import type { Plugin } from "@opencode-ai/plugin" import { execFileSync, spawn } from "child_process" +// Tools whose output is not worth extracting (file writes, questions, tracking) +const NOISE_TOOLS = new Set(["Edit", "Write", "Question", "skill", "todowrite", "notify", "notification"]) + // Capture tool output every N tool calls... -const EXTRACT_EVERY = 3 +const EXTRACT_EVERY = 10 // ...but only drain the extraction queue (the step that loads the // fastembed model) once per N enqueues. Issue #239: running a full // `icm extract` on every 3rd tool call reloaded the embedding model @@ -30,13 +33,14 @@ let enqueueCount = 0 /// Enqueue raw text for deferred extraction. `icm extract --enqueue` /// only writes a queue row — it never loads the embedding model. -function icmEnqueue(project: string, input: string): void { +function icmEnqueue(project: string, input: string, cwd?: string): void { try { execFileSync("icm", ["extract", "--enqueue", "-p", project], { encoding: "utf-8", timeout: 10000, input, stdio: ["pipe", "pipe", "pipe"], + cwd, }) } catch { // silent — extraction is best-effort @@ -46,11 +50,12 @@ function icmEnqueue(project: string, input: string): void { /// Drain the pending-extraction queue in a detached background process. /// Fire-and-forget: the chat turn never waits on it, and the heavy /// fastembed model load happens at most once per drain. -function icmDrainDetached(): void { +function icmDrainDetached(cwd?: string): void { try { const child = spawn("icm", ["extract-pending", "--limit", "30"], { detached: true, stdio: "ignore", + cwd, }) child.unref() } catch { @@ -61,12 +66,13 @@ function icmDrainDetached(): void { /// Capture stdout of `icm ` synchronously. Returns the empty string on /// any failure so a missing/old binary or empty memory store can never break /// a chat turn. -function icmCapture(args: string[]): string { +function icmCapture(args: string[], cwd?: string): string { try { const out = execFileSync("icm", args, { encoding: "utf-8", timeout: 10000, stdio: ["ignore", "pipe", "pipe"], + cwd, }) return String(out).trim() } catch { @@ -98,6 +104,7 @@ export const IcmPlugin: Plugin = async ({ $, directory }) => { "tool.execute.after": async (input: any, result: any) => { const tool = String(input?.tool ?? "") if (!tool || tool.startsWith("icm") || tool.startsWith("mcp__icm__")) return + if (NOISE_TOOLS.has(tool)) return toolCallCount++ if (toolCallCount < EXTRACT_EVERY) return @@ -110,11 +117,11 @@ export const IcmPlugin: Plugin = async ({ $, directory }) => { // Enqueue only (cheap, no model load), then drain once per // DRAIN_EVERY enqueues in a detached process — see issue #239. - icmEnqueue(project, output.slice(0, 8000)) + icmEnqueue(project, output.slice(0, 8000), directory) enqueueCount++ if (enqueueCount >= DRAIN_EVERY) { enqueueCount = 0 - icmDrainDetached() + icmDrainDetached(directory) } }, @@ -141,9 +148,9 @@ export const IcmPlugin: Plugin = async ({ $, directory }) => { // Compaction is a natural flush point: enqueue the conversation // slice and drain the whole queue once in a detached process. - icmEnqueue(project, text) + icmEnqueue(project, text, directory) enqueueCount = 0 - icmDrainDetached() + icmDrainDetached(directory) }, // Layer 2: log on session creation. OpenCode's `session.created` hook @@ -168,13 +175,13 @@ export const IcmPlugin: Plugin = async ({ $, directory }) => { injectedSessions.add(sessionID) // Wake-up pack: critical/high-importance facts + preferences. - const wakeUp = icmCapture(["wake-up", "--project", project]) + const wakeUp = icmCapture(["wake-up", "--project", project], directory) if (wakeUp) { output.system.push(wakeUp) } // Project-scoped recall: top-N relevant memories for this project. - const ctx = icmCapture(["recall-project", "--limit", "5"]) + const ctx = icmCapture(["recall-project", "--project", project, "--limit", "5"], directory) if (ctx) { output.system.push(ctx) console.error(