diff --git a/.gitignore b/.gitignore
index 1c558120..a550384a 100755
--- a/.gitignore
+++ b/.gitignore
@@ -120,3 +120,6 @@ test-code/
 localtestmcp/
 *.csv
 *.pickle
+
+# Personal dev notes (not tracked)
+docs/dev/
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 3e1a508b..1fedac26 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -18,5 +18,10 @@
     "**/*.egg-info/**": true,
     "**/build/**": true,
     "**/dist/**": true
-  }
+  },
+  "accessibility.signals.terminalBell": {
+    "sound": "on",
+    "announcement": "auto"
+  },
+  "cmake.sourceDirectory": "/Users/yichuan/Desktop/code/LEANN/leann/packages/leann-backend-hnsw"
 }
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
new file mode 100644
index 00000000..f92cb738
--- /dev/null
+++ b/docs/CHANGELOG.md
@@ -0,0 +1,27 @@
+# Changelog
+
+All notable changes to LEANN are documented here. Append-only, newest entries at the bottom.
+
+Format: `## YYYY-MM-DD: <short summary>` followed by bullet points.
+
+## 2026-03-05: IVF backend incremental update support
+
+- Added `leann-backend-ivf` with FAISS IndexIVFFlat + DirectMap.Hashtable.
+- IVF supports in-place `add_vectors` and `remove_ids` without full rebuild.
+- `leann build` is now idempotent: re-running on an existing index does incremental update (add new, remove deleted, re-index modified files).
+- Fixed incremental build chunking inconsistency and shared metadata dict bug.
+- Fixed IVF incremental update duplicate chunks from stale `passages.jsonl`.
+
+## 2026-03-05: MCP server v2 — build, status, and structured search
+
+- Added `leann_build` MCP tool: build or incrementally update indexes directly from Claude Code.
+- Added `leann_status` MCP tool: inspect index details (backend, embedding model, chunk/file count, size).
+- `leann_search` now uses `--json` output with file paths always included, formatted as markdown code blocks.
+- Fixed `float32` JSON serialization bug in `leann search --json`.
+- Cleaned up MCP tool descriptions (concise, no emoji).
+
+## 2026-03-05: Documentation — roadmap, vision, and dev guidelines
+
+- Rewrote `docs/roadmap.md` with current P0/P1 priorities from GitHub issue #237.
+- Added `docs/ultimate_goal.md` — long-term vision (personal data platform, best code retrieval MCP, multimodal, local-first).
+- Added self-contained documentation principle and dev doc maintenance rules to `CLAUDE.md`.
diff --git a/docs/issue-proposals/smart-embedding-default.md b/docs/issue-proposals/smart-embedding-default.md
new file mode 100644
index 00000000..41dffa5f
--- /dev/null
+++ b/docs/issue-proposals/smart-embedding-default.md
@@ -0,0 +1,41 @@
+# Smart default embedding model based on platform and corpus size
+
+## Summary
+
+Propose platform- and corpus-aware default embedding model selection for `leann build` when `--embedding-model` is not explicitly specified. This would improve out-of-the-box experience for different deployment scenarios (macOS CPU, NVIDIA GPU, etc.) without changing behavior when users pass an explicit model.
+
+## Motivation
+
+- **Current default**: `facebook/contriever` (~420MB, 768 dim) — heavy for CPU-only builds on large corpora
+- **macOS users** often hit slow builds on 20K+ chunks; lighter models like `all-MiniLM-L6-v2` (~90MB) are much faster
+- **NVIDIA GPU users** can leverage stronger models; smaller corpora benefit from quality (e.g. Qwen3-Embedding-0.6B), larger ones from balanced models (e.g. bge-base-en-v1.5)
+
+## Proposed logic
+
+| Platform | Chunk count | Default model |
+|----------|-------------|---------------|
+| **macOS** | ≥ 20,000 | `sentence-transformers/all-MiniLM-L6-v2` |
+| **macOS** | < 20,000 | `intfloat/e5-small-v2` |
+| **NVIDIA GPU** | < 5,000 | `Qwen/Qwen3-Embedding-0.6B` |
+| **NVIDIA GPU** | ≥ 5,000 | `BAAI/bge-base-en-v1.5` |
+| **Other** | any | `facebook/contriever` (unchanged) |
+
+## Implementation notes
+
+1. **Platform detection**: `torch.cuda.is_available()` for NVIDIA; `sys.platform == "darwin"` for macOS
+2. **Chunk count**: Known only after loading/chunking; may need to either:
+   - Do a lightweight pre-scan (e.g. file count × rough chunks per file), or
+   - Defer default choice until after first chunking pass (and cache for incremental)
+3. **Explicit override**: If user passes `--embedding-model`, always use it; this logic applies only when the flag is omitted
+
+## Model references
+
+- `sentence-transformers/all-MiniLM-L6-v2`: ~90MB, 384 dim, fast on CPU
+- `intfloat/e5-small-v2`: ~90MB, 384 dim
+- `Qwen/Qwen3-Embedding-0.6B`: 0.6B params, 1024 dim, strong retrieval
+- `BAAI/bge-base-en-v1.5`: ~110M params, 768 dim, good MTEB scores
+
+## Open questions
+
+- Should we add a `--embedding-model auto` to explicitly opt into this logic?
+- Pre-scan vs post-chunk decision: trade-off between accuracy and implementation complexity
diff --git a/packages/leann-core/src/leann/cli.py b/packages/leann-core/src/leann/cli.py
index 5a178f95..3b6ed110 100644
--- a/packages/leann-core/src/leann/cli.py
+++ b/packages/leann-core/src/leann/cli.py
@@ -2540,7 +2540,7 @@ async def search_documents(self, args):
             json_results = [
                 {
                     "id": r.id,
-                    "score": r.score,
+                    "score": float(r.score),
                     "text": r.text,
                     "metadata": r.metadata,
                 }
diff --git a/packages/leann-core/src/leann/mcp.py b/packages/leann-core/src/leann/mcp.py
index 8ccde94b..dc1fe0da 100755
--- a/packages/leann-core/src/leann/mcp.py
+++ b/packages/leann-core/src/leann/mcp.py
@@ -5,138 +5,331 @@
 import sys
 
 
+def _run_leann(*args, timeout=120):
+    """Run a leann CLI command and return (returncode, stdout, stderr)."""
+    result = subprocess.run(
+        ["leann", *args],
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+    )
+    return result.returncode, result.stdout, result.stderr
+
+
+def _make_result(request_id, content_text):
+    return {
+        "jsonrpc": "2.0",
+        "id": request_id,
+        "result": {"content": [{"type": "text", "text": content_text}]},
+    }
+
+
+def _make_error(request_id, message):
+    return {
+        "jsonrpc": "2.0",
+        "id": request_id,
+        "error": {"code": -1, "message": message},
+    }
+
+
+TOOLS = [
+    {
+        "name": "leann_search",
+        "description": (
+            "Semantic code search across an indexed codebase. Returns matching code "
+            "chunks with file paths, scores, and surrounding context.\n\n"
+            "Use this to find relevant code before making changes — understand existing "
+            "patterns, locate implementations, and discover related files.\n\n"
+            "Examples: 'authentication middleware', 'database connection pooling', "
+            "'error handling in API routes', 'how are embeddings computed'"
+        ),
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "index_name": {
+                    "type": "string",
+                    "description": "Name of the LEANN index to search. Use leann_list to see available indexes.",
+                },
+                "query": {
+                    "type": "string",
+                    "description": "Natural language or technical search query.",
+                },
+                "top_k": {
+                    "type": "integer",
+                    "default": 5,
+                    "minimum": 1,
+                    "maximum": 20,
+                    "description": "Number of results to return (default 5).",
+                },
+                "complexity": {
+                    "type": "integer",
+                    "default": 32,
+                    "minimum": 16,
+                    "maximum": 128,
+                    "description": "Search precision level (default 32, use 64+ for thorough search).",
+                },
+            },
+            "required": ["index_name", "query"],
+        },
+    },
+    {
+        "name": "leann_list",
+        "description": "List all available LEANN indexes across projects. Shows index names, status, size, and location.",
+        "inputSchema": {"type": "object", "properties": {}},
+    },
+    {
+        "name": "leann_build",
+        "description": (
+            "Build or incrementally update a LEANN index for a codebase. "
+            "If the index already exists, only new/modified/deleted files are processed "
+            "(incremental update). Use this to keep the index current after code changes.\n\n"
+            "Provide file paths or directories to index. For git repos, pass the output "
+            "of 'git ls-files' as individual paths."
+        ),
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "index_name": {
+                    "type": "string",
+                    "description": "Name for the index (e.g., 'my-project'). Defaults to current directory name if omitted.",
+                },
+                "docs": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "List of file paths or directories to index.",
+                },
+                "backend_name": {
+                    "type": "string",
+                    "enum": ["hnsw", "ivf"],
+                    "default": "ivf",
+                    "description": "Index backend. 'ivf' supports incremental updates (recommended). 'hnsw' is faster for search but limited incremental support.",
+                },
+                "force": {
+                    "type": "boolean",
+                    "default": False,
+                    "description": "Force full rebuild instead of incremental update.",
+                },
+            },
+            "required": ["docs"],
+        },
+    },
+    {
+        "name": "leann_status",
+        "description": (
+            "Show detailed status of a LEANN index: backend type, embedding model, "
+            "number of chunks, file count, index size, and whether the index is up to date."
+        ),
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "index_name": {
+                    "type": "string",
+                    "description": "Name of the index to inspect.",
+                },
+            },
+            "required": ["index_name"],
+        },
+    },
+]
+
+
+def handle_search(request_id, args):
+    index_name = args.get("index_name", "")
+    query = args.get("query", "")
+    if not index_name or not query:
+        return _make_result(request_id, "Error: Both index_name and query are required.")
+
+    top_k = args.get("top_k", 5)
+    complexity = args.get("complexity", 32)
+
+    rc, stdout, stderr = _run_leann(
+        "search",
+        index_name,
+        query,
+        f"--top-k={top_k}",
+        f"--complexity={complexity}",
+        "--json",
+        "--show-metadata",
+        "--non-interactive",
+    )
+
+    if rc != 0:
+        return _make_result(request_id, f"Search failed: {stderr.strip()}")
+
+    # Parse JSON results and format for code context
+    try:
+        results = json.loads(stdout)
+    except json.JSONDecodeError:
+        # Fallback to raw output if --json isn't available
+        return _make_result(
+            request_id, stdout if stdout.strip() else f"Search failed: {stderr.strip()}"
+        )
+
+    if not results:
+        return _make_result(request_id, f"No results found for '{query}'.")
+
+    formatted = []
+    for i, r in enumerate(results, 1):
+        meta = r.get("metadata", {})
+        file_path = meta.get("file_path") or meta.get("source", "unknown")
+        score = r.get("score", 0)
+        text = r.get("text", "").strip()
+        formatted.append(f"### Result {i} — {file_path} (score: {score:.3f})\n```\n{text}\n```")
+
+    header = f"Found {len(results)} results for '{query}':\n"
+    return _make_result(request_id, header + "\n\n".join(formatted))
+
+
+def handle_list(request_id):
+    rc, stdout, stderr = _run_leann("list")
+    if rc != 0:
+        return _make_result(request_id, f"Error listing indexes: {stderr.strip()}")
+    return _make_result(request_id, stdout)
+
+
+def handle_build(request_id, args):
+    docs = args.get("docs", [])
+    if not docs:
+        return _make_result(
+            request_id, "Error: 'docs' parameter is required (list of file paths or directories)."
+        )
+
+    cmd = ["build"]
+
+    index_name = args.get("index_name")
+    if index_name:
+        cmd.append(index_name)
+
+    cmd.extend(["--docs", *docs])
+
+    backend = args.get("backend_name", "ivf")
+    cmd.extend([f"--backend-name={backend}"])
+
+    if args.get("force", False):
+        cmd.append("--force")
+
+    rc, stdout, stderr = _run_leann(*cmd, timeout=600)
+
+    if rc != 0:
+        return _make_result(request_id, f"Build failed:\n{stderr.strip()}\n{stdout.strip()}")
+
+    return _make_result(request_id, stdout if stdout.strip() else "Build completed successfully.")
+
+
+def handle_status(request_id, args):
+    index_name = args.get("index_name", "")
+    if not index_name:
+        return _make_result(request_id, "Error: index_name is required.")
+
+    from pathlib import Path
+
+    # Check standard location
+    leann_dir = Path.cwd() / ".leann" / "indexes" / index_name
+    meta_path = leann_dir / "documents.leann.meta.json"
+    passages_path = leann_dir / "documents.leann.passages.jsonl"
+
+    if not meta_path.exists():
+        return _make_result(request_id, f"Index '{index_name}' not found at {leann_dir}")
+
+    try:
+        with open(meta_path) as f:
+            meta = json.load(f)
+    except Exception as e:
+        return _make_result(request_id, f"Error reading index metadata: {e}")
+
+    # Count passages
+    num_chunks = 0
+    file_paths = set()
+    if passages_path.exists():
+        with open(passages_path) as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                num_chunks += 1
+                try:
+                    passage = json.loads(line)
+                    meta = passage.get("metadata", {})
+                    fp = meta.get("file_path") or meta.get("source", "")
+                    if fp:
+                        file_paths.add(fp)
+                except json.JSONDecodeError:
+                    pass
+
+    # Calculate total index size
+    total_size = 0
+    if leann_dir.exists():
+        for f in leann_dir.iterdir():
+            if f.is_file():
+                total_size += f.stat().st_size
+
+    size_mb = total_size / (1024 * 1024)
+
+    backend = meta.get("backend_name", "unknown")
+    embedding_model = meta.get("embedding_model", "unknown")
+    embedding_mode = meta.get("embedding_mode", "unknown")
+    dimensions = meta.get("dimensions", "unknown")
+
+    status_lines = [
+        f"Index: {index_name}",
+        f"Backend: {backend}",
+        f"Embedding: {embedding_model} ({embedding_mode})",
+        f"Dimensions: {dimensions}",
+        f"Chunks: {num_chunks}",
+        f"Files indexed: {len(file_paths)}",
+        f"Size: {size_mb:.1f} MB",
+        f"Location: {leann_dir}",
+    ]
+
+    return _make_result(request_id, "\n".join(status_lines))
+
+
 def handle_request(request):
-    if request.get("method") == "initialize":
+    method = request.get("method")
+    request_id = request.get("id")
+
+    if method == "initialize":
         return {
             "jsonrpc": "2.0",
-            "id": request.get("id"),
+            "id": request_id,
             "result": {
                 "capabilities": {"tools": {}},
                 "protocolVersion": "2024-11-05",
-                "serverInfo": {"name": "leann-mcp", "version": "1.0.0"},
+                "serverInfo": {"name": "leann-mcp", "version": "2.0.0"},
             },
         }
 
-    elif request.get("method") == "tools/list":
+    if method == "notifications/initialized":
+        return None
+
+    if method == "tools/list":
         return {
             "jsonrpc": "2.0",
-            "id": request.get("id"),
-            "result": {
-                "tools": [
-                    {
-                        "name": "leann_search",
-                        "description": """🔍 Search code using natural language - like having a coding assistant who knows your entire codebase!
-
-🎯 **Perfect for**:
-- "How does authentication work?" → finds auth-related code
-- "Error handling patterns" → locates try-catch blocks and error logic
-- "Database connection setup" → finds DB initialization code
-- "API endpoint definitions" → locates route handlers
-- "Configuration management" → finds config files and usage
-
-💡 **Pro tip**: Use this before making any changes to understand existing patterns and conventions.""",
-                        "inputSchema": {
-                            "type": "object",
-                            "properties": {
-                                "index_name": {
-                                    "type": "string",
-                                    "description": "Name of the LEANN index to search. Use 'leann_list' first to see available indexes.",
-                                },
-                                "query": {
-                                    "type": "string",
-                                    "description": "Search query - can be natural language (e.g., 'how to handle errors') or technical terms (e.g., 'async function definition')",
-                                },
-                                "top_k": {
-                                    "type": "integer",
-                                    "default": 5,
-                                    "minimum": 1,
-                                    "maximum": 20,
-                                    "description": "Number of search results to return. Use 5-10 for focused results, 15-20 for comprehensive exploration.",
-                                },
-                                "complexity": {
-                                    "type": "integer",
-                                    "default": 32,
-                                    "minimum": 16,
-                                    "maximum": 128,
-                                    "description": "Search complexity level. Use 16-32 for fast searches (recommended), 64+ for higher precision when needed.",
-                                },
-                                "show_metadata": {
-                                    "type": "boolean",
-                                    "default": False,
-                                    "description": "Include file paths and metadata in search results. Useful for understanding which files contain the results.",
-                                },
-                            },
-                            "required": ["index_name", "query"],
-                        },
-                    },
-                    {
-                        "name": "leann_list",
-                        "description": "📋 Show all your indexed codebases - your personal code library! Use this to see what's available for search.",
-                        "inputSchema": {"type": "object", "properties": {}},
-                    },
-                ]
-            },
+            "id": request_id,
+            "result": {"tools": TOOLS},
         }
 
-    elif request.get("method") == "tools/call":
+    if method == "tools/call":
         tool_name = request["params"]["name"]
         args = request["params"].get("arguments", {})
 
         try:
             if tool_name == "leann_search":
-                # Validate required parameters
-                if not args.get("index_name") or not args.get("query"):
-                    return {
-                        "jsonrpc": "2.0",
-                        "id": request.get("id"),
-                        "result": {
-                            "content": [
-                                {
-                                    "type": "text",
-                                    "text": "Error: Both index_name and query are required",
-                                }
-                            ]
-                        },
-                    }
-
-                # Build simplified command with non-interactive flag for MCP compatibility
-                cmd = [
-                    "leann",
-                    "search",
-                    args["index_name"],
-                    args["query"],
-                    f"--top-k={args.get('top_k', 5)}",
-                    f"--complexity={args.get('complexity', 32)}",
-                    "--non-interactive",
-                ]
-                if args.get("show_metadata", False):
-                    cmd.append("--show-metadata")
-                result = subprocess.run(cmd, capture_output=True, text=True)
-
+                return handle_search(request_id, args)
             elif tool_name == "leann_list":
-                result = subprocess.run(["leann", "list"], capture_output=True, text=True)
-
-            return {
-                "jsonrpc": "2.0",
-                "id": request.get("id"),
-                "result": {
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": result.stdout
-                            if result.returncode == 0
-                            else f"Error: {result.stderr}",
-                        }
-                    ]
-                },
-            }
-
+                return handle_list(request_id)
+            elif tool_name == "leann_build":
+                return handle_build(request_id, args)
+            elif tool_name == "leann_status":
+                return handle_status(request_id, args)
+            else:
+                return _make_error(request_id, f"Unknown tool: {tool_name}")
+        except subprocess.TimeoutExpired:
+            return _make_result(request_id, "Error: Command timed out.")
         except Exception as e:
-            return {
-                "jsonrpc": "2.0",
-                "id": request.get("id"),
-                "error": {"code": -1, "message": str(e)},
-            }
+            return _make_error(request_id, str(e))
+
+    return None
 
 
 def main():
diff --git a/packages/leann-mcp/README.md b/packages/leann-mcp/README.md
index 5e2055e1..8af45560 100644
--- a/packages/leann-mcp/README.md
+++ b/packages/leann-mcp/README.md
@@ -37,8 +37,10 @@ claude mcp list | cat
 
 Once connected, you'll have access to these powerful semantic search tools in Claude Code:
 
+- **`leann_search`** - Semantic code search with file paths, scores, and context
 - **`leann_list`** - List all available indexes across your projects
-- **`leann_search`** - Perform semantic searches across code and documents
+- **`leann_build`** - Build or incrementally update an index (keeps it current as code changes)
+- **`leann_status`** - Show index details: backend, embedding model, chunk count, file count, size
 
 
 ## 🎯 Quick Start Example