From 9aa430a88d7216c70d1c991d5365c85d89b742ba Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 16:59:30 +0800
Subject: [PATCH 01/16] feat(config): add ScopeConfig and DefaultScopeConfig
 dataclasses

---
 src/memsearch/config.py | 20 ++++++++++++++++++++
 tests/test_config.py    | 19 +++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/src/memsearch/config.py b/src/memsearch/config.py
index 52c9b8c1..e394cb6e 100644
--- a/src/memsearch/config.py
+++ b/src/memsearch/config.py
@@ -70,6 +70,26 @@ class RerankerConfig:
     model: str = ""  # empty = disabled; set to model ID to enable
 
 
+@dataclass
+class ScopeConfig:
+    """One additional memory scope. See [[scopes]] in TOML."""
+
+    name: str = ""
+    collection: str = ""
+    paths: list[str] = field(default_factory=list)
+    quota: int | None = None
+    uri: str = ""    # empty = inherit [milvus].uri
+    token: str = ""  # empty = inherit [milvus].token
+
+
+@dataclass
+class DefaultScopeConfig:
+    """Tunable settings for the default (single-collection) scope."""
+
+    name: str = "project"
+    quota: int | None = None
+
+
 @dataclass
 class LLMConfig:
     """LLM settings for plugin summarization and compact.
diff --git a/tests/test_config.py b/tests/test_config.py
index 4a8bcdf8..a47fc6d3 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -364,3 +364,22 @@ def test_dict_to_config_accepts_empty_section_dicts() -> None:
     assert cfg.embedding.provider == "openai"
     assert cfg.milvus.collection == "memsearch_chunks"
     assert cfg.watch.debounce_ms == 1500
+
+
+def test_scope_config_defaults():
+    """ScopeConfig should have sensible defaults."""
+    from memsearch.config import ScopeConfig
+    sc = ScopeConfig(name="x", collection="c")
+    assert sc.name == "x"
+    assert sc.collection == "c"
+    assert sc.paths == []
+    assert sc.quota is None
+    assert sc.uri == ""
+    assert sc.token == ""
+
+
+def test_default_scope_config_defaults():
+    from memsearch.config import DefaultScopeConfig
+    ds = DefaultScopeConfig()
+    assert ds.name == "project"
+    assert ds.quota is None

From 5f750ee5250fc9a1f33b9dfe84026f5c74062a03 Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 17:00:04 +0800
Subject: [PATCH 02/16] style: apply ruff format to config.py and
 test_config.py

---
 src/memsearch/config.py | 2 +-
 tests/test_config.py    | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/memsearch/config.py b/src/memsearch/config.py
index e394cb6e..959471c7 100644
--- a/src/memsearch/config.py
+++ b/src/memsearch/config.py
@@ -78,7 +78,7 @@ class ScopeConfig:
     collection: str = ""
     paths: list[str] = field(default_factory=list)
     quota: int | None = None
-    uri: str = ""    # empty = inherit [milvus].uri
+    uri: str = ""  # empty = inherit [milvus].uri
     token: str = ""  # empty = inherit [milvus].token
 
 
diff --git a/tests/test_config.py b/tests/test_config.py
index a47fc6d3..49b48c38 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -369,6 +369,7 @@ def test_dict_to_config_accepts_empty_section_dicts() -> None:
 def test_scope_config_defaults():
     """ScopeConfig should have sensible defaults."""
     from memsearch.config import ScopeConfig
+
     sc = ScopeConfig(name="x", collection="c")
     assert sc.name == "x"
     assert sc.collection == "c"
@@ -380,6 +381,7 @@ def test_scope_config_defaults():
 
 def test_default_scope_config_defaults():
     from memsearch.config import DefaultScopeConfig
+
     ds = DefaultScopeConfig()
     assert ds.name == "project"
     assert ds.quota is None

From d77e3d254c3afedaec9357bedd26872d99719bf0 Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 17:03:14 +0800
Subject: [PATCH 03/16] docs(config): note T2 will register multi-scope
 dataclasses

Add pointer comments above MemSearchConfig and _SECTION_CLASSES to
clarify that ScopeConfig/DefaultScopeConfig are intentionally unwired
and will be integrated in Task 2 of the multi-scope plan.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/memsearch/config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/memsearch/config.py b/src/memsearch/config.py
index 959471c7..be616fe8 100644
--- a/src/memsearch/config.py
+++ b/src/memsearch/config.py
@@ -116,6 +116,7 @@ class PromptsConfig:
     summarize: str = ""  # custom prompt file for plugin session summarization
 
 
+# NOTE: ScopeConfig/DefaultScopeConfig are wired in by Task 2 (multi-scope plan).
 @dataclass
 class MemSearchConfig:
     milvus: MilvusConfig = field(default_factory=MilvusConfig)
@@ -128,6 +129,7 @@ class MemSearchConfig:
     prompts: PromptsConfig = field(default_factory=PromptsConfig)
 
 
+# NOTE: ScopeConfig/DefaultScopeConfig are wired in by Task 2 (multi-scope plan).
 # -- Section name → dataclass mapping for typed reconstruction --
 _SECTION_CLASSES: dict[str, type] = {
     "milvus": MilvusConfig,

From dc0344982ca8c58f36eec44f3968b2efba479ca6 Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 17:05:44 +0800
Subject: [PATCH 04/16] feat(config): wire scopes and default_scope into
 MemSearchConfig

---
 src/memsearch/config.py | 25 ++++++++++++++++++++++---
 tests/test_config.py    | 31 +++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/src/memsearch/config.py b/src/memsearch/config.py
index be616fe8..91d174b2 100644
--- a/src/memsearch/config.py
+++ b/src/memsearch/config.py
@@ -116,7 +116,6 @@ class PromptsConfig:
     summarize: str = ""  # custom prompt file for plugin session summarization
 
 
-# NOTE: ScopeConfig/DefaultScopeConfig are wired in by Task 2 (multi-scope plan).
 @dataclass
 class MemSearchConfig:
     milvus: MilvusConfig = field(default_factory=MilvusConfig)
@@ -127,9 +126,10 @@ class MemSearchConfig:
     reranker: RerankerConfig = field(default_factory=RerankerConfig)
     llm: LLMConfig = field(default_factory=LLMConfig)
     prompts: PromptsConfig = field(default_factory=PromptsConfig)
+    default_scope: DefaultScopeConfig = field(default_factory=DefaultScopeConfig)
+    scopes: list[ScopeConfig] = field(default_factory=list)
 
 
-# NOTE: ScopeConfig/DefaultScopeConfig are wired in by Task 2 (multi-scope plan).
 # -- Section name → dataclass mapping for typed reconstruction --
 _SECTION_CLASSES: dict[str, type] = {
     "milvus": MilvusConfig,
@@ -140,6 +140,7 @@ class MemSearchConfig:
     "reranker": RerankerConfig,
     "llm": LLMConfig,
     "prompts": PromptsConfig,
+    "default_scope": DefaultScopeConfig,
 }
 
 
@@ -174,10 +175,16 @@ def resolve_env_ref(value: str) -> str:
 
 def _resolve_env_refs_in_dict(d: dict[str, Any]) -> dict[str, Any]:
     """Walk a nested config dict and resolve all ``env:`` references."""
-    resolved = {}
+    resolved: dict[str, Any] = {}
     for key, val in d.items():
         if isinstance(val, dict):
             resolved[key] = _resolve_env_refs_in_dict(val)
+        elif isinstance(val, list):
+            resolved[key] = [
+                _resolve_env_refs_in_dict(item) if isinstance(item, dict)
+                else (resolve_env_ref(item) if isinstance(item, str) and item.startswith(_ENV_PREFIX) else item)
+                for item in val
+            ]
         elif isinstance(val, str) and val.startswith(_ENV_PREFIX):
             resolved[key] = resolve_env_ref(val)
         else:
@@ -226,6 +233,18 @@ def _dict_to_config(d: dict[str, Any]) -> MemSearchConfig:
         valid = {f.name for f in fields(cls)}
         filtered = {k: v for k, v in section_data.items() if k in valid}
         kwargs[section_name] = cls(**filtered)
+
+    scopes_raw = d.get("scopes", [])
+    scopes: list[ScopeConfig] = []
+    if isinstance(scopes_raw, list):
+        valid_scope_keys = {f.name for f in fields(ScopeConfig)}
+        for entry in scopes_raw:
+            if not isinstance(entry, dict):
+                continue
+            filtered = {k: v for k, v in entry.items() if k in valid_scope_keys}
+            scopes.append(ScopeConfig(**filtered))
+    kwargs["scopes"] = scopes
+
     return MemSearchConfig(**kwargs)
 
 
diff --git a/tests/test_config.py b/tests/test_config.py
index 49b48c38..fef01d73 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -385,3 +385,34 @@ def test_default_scope_config_defaults():
     ds = DefaultScopeConfig()
     assert ds.name == "project"
     assert ds.quota is None
+
+
+def test_memsearch_config_has_scopes_and_default_scope():
+    cfg = MemSearchConfig()
+    assert cfg.scopes == []
+    assert cfg.default_scope.name == "project"
+
+
+def test_resolve_config_loads_scopes_array(tmp_path, monkeypatch):
+    """[[scopes]] array-of-tables should round-trip into MemSearchConfig.scopes."""
+    import memsearch.config as cfg_mod
+    proj = tmp_path / ".memsearch.toml"
+    proj.write_text(
+        '[default_scope]\nname = "myproj"\nquota = 5\n\n'
+        '[[scopes]]\nname = "global"\ncollection = "ms_global"\n'
+        'paths = ["/tmp/g"]\nquota = 3\n\n'
+        '[[scopes]]\nname = "personal"\ncollection = "ms_personal"\n'
+        'paths = ["/tmp/p"]\n'
+    )
+    monkeypatch.setattr(cfg_mod, "PROJECT_CONFIG_PATH", proj)
+    monkeypatch.setattr(cfg_mod, "GLOBAL_CONFIG_PATH", tmp_path / "global-absent.toml")
+    cfg = cfg_mod.resolve_config()
+    assert cfg.default_scope.name == "myproj"
+    assert cfg.default_scope.quota == 5
+    assert len(cfg.scopes) == 2
+    assert cfg.scopes[0].name == "global"
+    assert cfg.scopes[0].collection == "ms_global"
+    assert cfg.scopes[0].paths == ["/tmp/g"]
+    assert cfg.scopes[0].quota == 3
+    assert cfg.scopes[1].name == "personal"
+    assert cfg.scopes[1].quota is None

From 32409b83c0e186ea71c01d5a0e713f9cbc5debf9 Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 17:06:19 +0800
Subject: [PATCH 05/16] style(config): apply ruff format to Task 2 changes

---
 src/memsearch/config.py | 3 ++-
 tests/test_config.py    | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/memsearch/config.py b/src/memsearch/config.py
index 91d174b2..fb7235b5 100644
--- a/src/memsearch/config.py
+++ b/src/memsearch/config.py
@@ -181,7 +181,8 @@ def _resolve_env_refs_in_dict(d: dict[str, Any]) -> dict[str, Any]:
             resolved[key] = _resolve_env_refs_in_dict(val)
         elif isinstance(val, list):
             resolved[key] = [
-                _resolve_env_refs_in_dict(item) if isinstance(item, dict)
+                _resolve_env_refs_in_dict(item)
+                if isinstance(item, dict)
                 else (resolve_env_ref(item) if isinstance(item, str) and item.startswith(_ENV_PREFIX) else item)
                 for item in val
             ]
diff --git a/tests/test_config.py b/tests/test_config.py
index fef01d73..7e953138 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -396,6 +396,7 @@ def test_memsearch_config_has_scopes_and_default_scope():
 def test_resolve_config_loads_scopes_array(tmp_path, monkeypatch):
     """[[scopes]] array-of-tables should round-trip into MemSearchConfig.scopes."""
     import memsearch.config as cfg_mod
+
     proj = tmp_path / ".memsearch.toml"
     proj.write_text(
         '[default_scope]\nname = "myproj"\nquota = 5\n\n'

From 6876b35b5dd597e1ade857e237b314b4c4b0d7cc Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 17:13:47 +0800
Subject: [PATCH 06/16] feat(config): validate scope paths don't overlap

---
 src/memsearch/config.py | 35 +++++++++++++++++++++++++++++++++++
 tests/test_config.py    | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/src/memsearch/config.py b/src/memsearch/config.py
index fb7235b5..370c48ff 100644
--- a/src/memsearch/config.py
+++ b/src/memsearch/config.py
@@ -156,6 +156,39 @@ class ConfigEnvVarError(KeyError):
     """
 
 
+class ScopePathOverlapError(ValueError):
+    """Raised when two scopes have overlapping paths."""
+
+
+def validate_scope_paths(scopes: list[ScopeConfig]) -> None:
+    """Raise ScopePathOverlapError if any two scopes' paths overlap.
+
+    Path A overlaps path B if A is a parent of B (or vice versa). Read-only
+    scopes (empty paths) cannot conflict.
+    """
+    resolved: list[tuple[str, Path]] = []
+    for sc in scopes:
+        resolved.extend((sc.name, Path(p).expanduser().resolve()) for p in sc.paths)
+    for i, (name_a, path_a) in enumerate(resolved):
+        for name_b, path_b in resolved[i + 1 :]:
+            if name_a == name_b:
+                continue
+            if _is_parent_or_equal(path_a, path_b) or _is_parent_or_equal(path_b, path_a):
+                raise ScopePathOverlapError(
+                    f"Scope paths overlap: scope {name_a!r} path {path_a} conflicts with scope {name_b!r} path {path_b}"
+                )
+
+
+def _is_parent_or_equal(parent: Path, child: Path) -> bool:
+    if parent == child:
+        return True
+    try:
+        child.relative_to(parent)
+        return True
+    except ValueError:
+        return False
+
+
 def resolve_env_ref(value: str) -> str:
     """Resolve an ``env:VAR_NAME`` reference to its environment variable value.
 
@@ -288,6 +321,8 @@ def resolve_config(cli_overrides: dict[str, Any] | None = None) -> MemSearchConf
 
         cfg.embedding.model = DEFAULT_MODELS.get(cfg.embedding.provider, "")
 
+    validate_scope_paths(cfg.scopes)
+
     return cfg
 
 
diff --git a/tests/test_config.py b/tests/test_config.py
index 7e953138..a5596698 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -417,3 +417,38 @@ def test_resolve_config_loads_scopes_array(tmp_path, monkeypatch):
     assert cfg.scopes[0].quota == 3
     assert cfg.scopes[1].name == "personal"
     assert cfg.scopes[1].quota is None
+
+
+def test_validate_scope_paths_rejects_overlap(tmp_path):
+    from memsearch.config import ScopeConfig, ScopePathOverlapError, validate_scope_paths
+
+    a = tmp_path / "shared"
+    a.mkdir()
+    scopes = [
+        ScopeConfig(name="a", collection="ca", paths=[str(a)]),
+        ScopeConfig(name="b", collection="cb", paths=[str(a / "sub")]),
+    ]
+    with pytest.raises(ScopePathOverlapError) as exc:
+        validate_scope_paths(scopes)
+    assert "a" in str(exc.value) and "b" in str(exc.value)
+
+
+def test_validate_scope_paths_allows_disjoint(tmp_path):
+    from memsearch.config import ScopeConfig, validate_scope_paths
+
+    scopes = [
+        ScopeConfig(name="a", collection="ca", paths=[str(tmp_path / "a")]),
+        ScopeConfig(name="b", collection="cb", paths=[str(tmp_path / "b")]),
+    ]
+    validate_scope_paths(scopes)  # must not raise
+
+
+def test_validate_scope_paths_skips_empty_paths():
+    """Read-only scopes (no paths) cannot conflict with anything."""
+    from memsearch.config import ScopeConfig, validate_scope_paths
+
+    scopes = [
+        ScopeConfig(name="a", collection="ca", paths=["/tmp/foo"]),
+        ScopeConfig(name="b", collection="cb", paths=[]),  # read-only
+    ]
+    validate_scope_paths(scopes)

From 2671b02f0b522807471ae9e5f6247f93d62422b5 Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 17:18:15 +0800
Subject: [PATCH 07/16] feat(core): add Scope dataclass
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add frozen `Scope` dataclass with name, collection, paths, quota, uri,
and token fields — first building block for multi-scope blended search.
---
 src/memsearch/core.py   | 18 ++++++++++++++++++
 tests/test_core_unit.py | 15 +++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 tests/test_core_unit.py

diff --git a/src/memsearch/core.py b/src/memsearch/core.py
index b3eabb3b..447042d2 100644
--- a/src/memsearch/core.py
+++ b/src/memsearch/core.py
@@ -5,6 +5,7 @@
 import asyncio
 import logging
 from collections.abc import Callable
+from dataclasses import dataclass, field
 from datetime import date
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
@@ -21,6 +22,23 @@
 logger = logging.getLogger(__name__)
 
 
+@dataclass(frozen=True)
+class Scope:
+    """One memory scope. See spec for full semantics.
+
+    A scope with empty ``paths`` is read-only (search-only, never indexed).
+    ``quota=None`` means "share remaining slots with other unquota'd scopes".
+    ``uri``/``token`` of ``None`` means inherit from the parent ``MemSearch``.
+    """
+
+    name: str
+    collection: str
+    paths: list[str] = field(default_factory=list)
+    quota: int | None = None
+    uri: str | None = None
+    token: str | None = None
+
+
 class MemSearch:
     """High-level API for semantic memory search.
 
diff --git a/tests/test_core_unit.py b/tests/test_core_unit.py
new file mode 100644
index 00000000..5c0a34bf
--- /dev/null
+++ b/tests/test_core_unit.py
@@ -0,0 +1,15 @@
+"""Unit tests for core helpers that don't require Milvus or an embedder."""
+
+from __future__ import annotations
+
+
+def test_scope_dataclass_defaults():
+    from memsearch.core import Scope
+
+    s = Scope(name="x", collection="c")
+    assert s.name == "x"
+    assert s.collection == "c"
+    assert s.paths == []
+    assert s.quota is None
+    assert s.uri is None
+    assert s.token is None

From 1edea02e7515bdbedbd7b3adf33b4013599028eb Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 17:21:45 +0800
Subject: [PATCH 08/16] feat(core): pure dedup+quota helper for multi-scope
 blending

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/memsearch/core.py   |  68 ++++++++++++++++++++++++
 tests/test_core_unit.py | 113 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 181 insertions(+)

diff --git a/src/memsearch/core.py b/src/memsearch/core.py
index 447042d2..6f2067ad 100644
--- a/src/memsearch/core.py
+++ b/src/memsearch/core.py
@@ -39,6 +39,74 @@ class Scope:
     token: str | None = None
 
 
+def _blend_scope_results(
+    per_scope: list[tuple[str, list[dict]]],
+    scope_quotas: dict[str, int | None],
+    default_scope_name: str,
+    scope_order: list[str],
+    top_k: int,
+) -> list[dict]:
+    """Dedup, apply per-scope quotas, return top-K blended.
+
+    Algorithm:
+      1. Tag each hit with its scope name.
+      2. Dedup by chunk_hash; keep highest-scoring; remember winning scope.
+      3. Quota modes:
+         - all scopes have quotas → hard cap per scope, no redistribution
+         - no scopes have quotas → return globally top-K by score
+         - mixed → quota'd capped first; unquota'd share remainder by score
+      4. Tie-break: default scope wins, then ``scope_order`` index.
+    """
+    # 1+2. Tag & dedup
+    seen: dict[str, dict] = {}
+    for scope_name, hits in per_scope:
+        for h in hits:
+            key = h["chunk_hash"]
+            tagged = {**h, "scope": scope_name}
+            existing = seen.get(key)
+            if existing is None or tagged["score"] > existing["score"]:
+                seen[key] = tagged
+
+    scope_rank = {name: i for i, name in enumerate(scope_order)}
+
+    def sort_key(r: dict) -> tuple:
+        # Higher score first; then default scope wins; then config order
+        return (
+            -r["score"],
+            0 if r["scope"] == default_scope_name else 1,
+            scope_rank.get(r["scope"], len(scope_order)),
+        )
+
+    all_hits = sorted(seen.values(), key=sort_key)
+
+    # 3. Quota modes
+    quotas_present = [v for v in scope_quotas.values() if v is not None]
+
+    # All-no-quota: just top-k
+    if not quotas_present:
+        return all_hits[:top_k]
+
+    capped: dict[str, list[dict]] = {n: [] for n in scope_quotas}
+    leftovers: list[dict] = []
+
+    for h in all_hits:
+        sc = h["scope"]
+        q = scope_quotas.get(sc)
+        if q is None:
+            leftovers.append(h)
+        elif len(capped[sc]) < q:
+            capped[sc].append(h)
+        # else: quota'd scope full; drop this hit (no redistribution)
+
+    quota_total = sum(scope_quotas[n] or 0 for n in scope_quotas)
+    remaining_slots = max(0, top_k - quota_total)
+    chosen_leftovers = leftovers[:remaining_slots]
+
+    merged = [h for hits in capped.values() for h in hits] + chosen_leftovers
+    merged.sort(key=sort_key)
+    return merged[:top_k]
+
+
 class MemSearch:
     """High-level API for semantic memory search.
 
diff --git a/tests/test_core_unit.py b/tests/test_core_unit.py
index 5c0a34bf..cb4ddb4e 100644
--- a/tests/test_core_unit.py
+++ b/tests/test_core_unit.py
@@ -13,3 +13,116 @@ def test_scope_dataclass_defaults():
     assert s.quota is None
     assert s.uri is None
     assert s.token is None
+
+
+def _hit(chunk_hash: str, score: float, content: str = "x", source: str = "/x.md") -> dict:
+    return {"chunk_hash": chunk_hash, "score": score, "content": content, "source": source}
+
+
+def test_blend_dedups_keeps_higher_score():
+    from memsearch.core import _blend_scope_results
+
+    result = _blend_scope_results(
+        per_scope=[
+            ("project", [_hit("a", 0.5), _hit("b", 0.3)]),
+            ("global", [_hit("a", 0.9)]),  # same chunk_hash, higher score
+        ],
+        scope_quotas={"project": None, "global": None},
+        default_scope_name="project",
+        scope_order=["project", "global"],
+        top_k=10,
+    )
+    chunk_a = next(r for r in result if r["chunk_hash"] == "a")
+    assert chunk_a["score"] == 0.9
+    assert chunk_a["scope"] == "global"
+
+
+def test_blend_all_quotas_caps_per_scope():
+    from memsearch.core import _blend_scope_results
+
+    result = _blend_scope_results(
+        per_scope=[
+            ("project", [_hit(f"p{i}", 1.0 - i * 0.01) for i in range(10)]),
+            ("global", [_hit(f"g{i}", 0.5 - i * 0.01) for i in range(10)]),
+        ],
+        scope_quotas={"project": 3, "global": 2},
+        default_scope_name="project",
+        scope_order=["project", "global"],
+        top_k=10,
+    )
+    by_scope = {}
+    for r in result:
+        by_scope.setdefault(r["scope"], 0)
+        by_scope[r["scope"]] += 1
+    assert by_scope == {"project": 3, "global": 2}
+
+
+def test_blend_no_quotas_returns_global_top_k():
+    from memsearch.core import _blend_scope_results
+
+    result = _blend_scope_results(
+        per_scope=[
+            ("project", [_hit(f"p{i}", 0.5) for i in range(5)]),
+            ("global", [_hit(f"g{i}", 0.9) for i in range(5)]),
+        ],
+        scope_quotas={"project": None, "global": None},
+        default_scope_name="project",
+        scope_order=["project", "global"],
+        top_k=4,
+    )
+    assert len(result) == 4
+    # global has higher scores; all 4 should be from global
+    assert all(r["scope"] == "global" for r in result)
+
+
+def test_blend_mixed_quotas():
+    """Quota'd scopes filled first (cap), unquota'd share remainder by score."""
+    from memsearch.core import _blend_scope_results
+
+    result = _blend_scope_results(
+        per_scope=[
+            ("project", [_hit(f"p{i}", 0.95) for i in range(10)]),  # high score, no quota
+            ("global", [_hit(f"g{i}", 0.50) for i in range(10)]),  # quota=2
+        ],
+        scope_quotas={"project": None, "global": 2},
+        default_scope_name="project",
+        scope_order=["project", "global"],
+        top_k=5,
+    )
+    by_scope = {r["scope"] for r in result}
+    counts = {s: sum(1 for r in result if r["scope"] == s) for s in by_scope}
+    assert counts == {"project": 3, "global": 2}
+
+
+def test_blend_quota_underfill_does_not_redistribute():
+    from memsearch.core import _blend_scope_results
+
+    result = _blend_scope_results(
+        per_scope=[
+            ("project", [_hit(f"p{i}", 0.9) for i in range(10)]),
+            ("global", [_hit("g0", 0.5)]),  # only 1 hit, quota 5 → 4 empty slots
+        ],
+        scope_quotas={"project": 3, "global": 5},
+        default_scope_name="project",
+        scope_order=["project", "global"],
+        top_k=10,
+    )
+    counts = {s: sum(1 for r in result if r["scope"] == s) for s in {"project", "global"}}
+    assert counts == {"project": 3, "global": 1}  # NOT 3 + 5; project still capped
+
+
+def test_blend_tiebreak_default_scope_wins():
+    from memsearch.core import _blend_scope_results
+
+    result = _blend_scope_results(
+        per_scope=[
+            ("project", [_hit("p", 0.5)]),
+            ("global", [_hit("g", 0.5)]),  # equal score
+        ],
+        scope_quotas={"project": None, "global": None},
+        default_scope_name="project",
+        scope_order=["project", "global"],
+        top_k=2,
+    )
+    assert result[0]["scope"] == "project"
+    assert result[1]["scope"] == "global"

From 0a79264398634aff2646383148b267889a9b541f Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 17:30:08 +0800
Subject: [PATCH 09/16] feat(core): construct one MilvusStore per scope

Add `default_scope_name`, `default_scope_quota`, and `extra_scopes` kwargs
to `MemSearch.__init__`; build `self._stores: dict[str, MilvusStore]` with
one entry per scope; keep `self._store` as a back-compat alias pointing at
the default scope's store. Update `close()` to iterate all stores, with a
`__new__`-safe fallback for test fixtures that bypass `__init__`.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/memsearch/core.py   | 45 +++++++++++++++++++++++++++++++++--------
 tests/test_core_unit.py | 44 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 8 deletions(-)

diff --git a/src/memsearch/core.py b/src/memsearch/core.py
index 6f2067ad..bcf3c9f4 100644
--- a/src/memsearch/core.py
+++ b/src/memsearch/core.py
@@ -146,6 +146,9 @@ def __init__(
         max_chunk_size: int = 1500,
         overlap_lines: int = 2,
         reranker_model: str = "",
+        default_scope_name: str = "project",
+        default_scope_quota: int | None = None,
+        extra_scopes: list[Scope] | None = None,
     ) -> None:
         self._paths = [str(p) for p in (paths or [])]
         self._max_chunk_size = max_chunk_size
@@ -157,14 +160,35 @@ def __init__(
             base_url=embedding_base_url,
             api_key=embedding_api_key,
         )
-        self._store = MilvusStore(
-            uri=milvus_uri,
-            token=milvus_token,
-            collection=collection,
-            dimension=self._embedder.dimension,
-            description=description,
-        )
         self._reranker_model = reranker_model
+        self._default_scope_name = default_scope_name
+        self._default_scope_quota = default_scope_quota
+        self._extra_scopes: list[Scope] = list(extra_scopes or [])
+
+        # Default scope's store (uses parent milvus_uri/token + collection kwarg)
+        self._stores: dict[str, MilvusStore] = {
+            default_scope_name: MilvusStore(
+                uri=milvus_uri,
+                token=milvus_token,
+                collection=collection,
+                dimension=self._embedder.dimension,
+                description=description,
+            )
+        }
+        # Back-compat alias for code that still references self._store
+        self._store = self._stores[default_scope_name]
+
+        # Extra scopes: each gets its own store, optionally on a different Milvus
+        for sc in self._extra_scopes:
+            if sc.name in self._stores:
+                raise ValueError(f"Duplicate scope name: {sc.name!r}")
+            self._stores[sc.name] = MilvusStore(
+                uri=sc.uri or milvus_uri,
+                token=sc.token if sc.token is not None else milvus_token,
+                collection=sc.collection,
+                dimension=self._embedder.dimension,
+                description=description,
+            )
 
     # ------------------------------------------------------------------
     # Indexing
@@ -490,7 +514,12 @@ def store(self) -> MilvusStore:
 
     def close(self) -> None:
         """Release resources."""
-        self._store.close()
+        stores = getattr(self, "_stores", None)
+        if stores is not None:
+            for store in stores.values():
+                store.close()
+        elif (store := getattr(self, "_store", None)) is not None:
+            store.close()
 
     def __enter__(self) -> MemSearch:
         return self
diff --git a/tests/test_core_unit.py b/tests/test_core_unit.py
index cb4ddb4e..a83e2a45 100644
--- a/tests/test_core_unit.py
+++ b/tests/test_core_unit.py
@@ -126,3 +126,47 @@ def test_blend_tiebreak_default_scope_wins():
     )
     assert result[0]["scope"] == "project"
     assert result[1]["scope"] == "global"
+
+
+def test_memsearch_default_only_one_store(tmp_path):
+    """No extra_scopes → exactly one store, named after default_scope_name."""
+    from memsearch.core import MemSearch
+
+    m = MemSearch(milvus_uri=str(tmp_path / "x.db"), embedding_provider="openai", embedding_api_key="fake")
+    try:
+        assert list(m._stores.keys()) == ["project"]
+    finally:
+        m.close()
+
+
+def test_memsearch_extra_scopes_create_per_scope_stores(tmp_path):
+    from memsearch.core import MemSearch, Scope
+
+    m = MemSearch(
+        milvus_uri=str(tmp_path / "x.db"),
+        embedding_provider="openai",
+        embedding_api_key="fake",
+        extra_scopes=[
+            Scope(name="global", collection="ms_global"),
+            Scope(name="personal", collection="ms_personal"),
+        ],
+    )
+    try:
+        assert set(m._stores.keys()) == {"project", "global", "personal"}
+    finally:
+        m.close()
+
+
+def test_memsearch_default_scope_name_override(tmp_path):
+    from memsearch.core import MemSearch
+
+    m = MemSearch(
+        milvus_uri=str(tmp_path / "x.db"),
+        embedding_provider="openai",
+        embedding_api_key="fake",
+        default_scope_name="myproj",
+    )
+    try:
+        assert list(m._stores.keys()) == ["myproj"]
+    finally:
+        m.close()

From 4f7488c47a8dfd1db5db693a9520e42f840795cd Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 17:36:14 +0800
Subject: [PATCH 10/16] feat(core): multi-scope blended search with fan-out,
 dedup, quotas

Replace MemSearch.search body with single-scope fast path (no scope tag,
backwards-compatible) and multi-scope path using asyncio.gather fan-out,
_blend_scope_results dedup+quota logic, and only_scope restriction with
ValueError on unknown names.  Add _seed_scope helper, two_scope_mem
fixture, and four integration tests covering: no-scope-field on
single-scope, scope tagging on multi-scope, only_scope restriction, and
ValueError on unknown scope names.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/memsearch/core.py | 81 ++++++++++++++++++++++++++++++++++-----
 tests/test_core.py    | 89 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 161 insertions(+), 9 deletions(-)

diff --git a/src/memsearch/core.py b/src/memsearch/core.py
index bcf3c9f4..461db23d 100644
--- a/src/memsearch/core.py
+++ b/src/memsearch/core.py
@@ -313,8 +313,9 @@ async def search(
         *,
         top_k: int = 10,
         source_prefix: str | Path | None = None,
+        only_scope: list[str] | None = None,
     ) -> list[dict[str, Any]]:
-        """Semantic search across indexed chunks.
+        """Semantic search across one or more scopes.
 
         Parameters
         ----------
@@ -325,27 +326,89 @@ async def search(
         source_prefix:
             Optional path prefix to scope results. Only chunks whose
             ``source`` starts with this prefix are returned.
+            In multi-scope mode this filter applies only to the default scope.
+        only_scope:
+            If given, restrict the search to the named scope(s).  Raises
+            ``ValueError`` if any name is not a known scope.
 
         Returns
         -------
         list[dict]
             Each dict contains ``content``, ``source``, ``heading``,
-            ``score``, and other metadata.
+            ``score``, and other metadata.  In multi-scope mode each result
+            also carries a ``scope`` field.
         """
-        filter_expr = ""
+        # Single-scope fast path: no extra_scopes → original behavior, no 'scope' tag
+        if not self._extra_scopes:
+            filter_expr = ""
+            if source_prefix is not None:
+                prefix = str(Path(source_prefix).expanduser().resolve())
+                escaped = prefix.replace("\\", "\\\\").replace('"', '\\"')
+                filter_expr = f'source like "{escaped}%"'
+            embeddings = await self._embedder.embed([query])
+            fetch_k = top_k * 3 if self._reranker_model else top_k
+            results = self._store.search(
+                embeddings[0],
+                query_text=query,
+                top_k=fetch_k,
+                filter_expr=filter_expr,
+            )
+            if self._reranker_model and results:
+                from .reranker import rerank
+
+                results = rerank(query, results, model_name=self._reranker_model, top_k=top_k)
+            return results
+
+        # Multi-scope path
+        all_scope_names = list(self._stores.keys())
+        if only_scope is not None:
+            unknown = set(only_scope) - set(all_scope_names)
+            if unknown:
+                raise ValueError(f"unknown scope(s) in only_scope: {sorted(unknown)}")
+            active = [n for n in all_scope_names if n in set(only_scope)]
+        else:
+            active = all_scope_names
+
+        # Source-prefix filter only applies to the default scope
+        default_filter = ""
         if source_prefix is not None:
             prefix = str(Path(source_prefix).expanduser().resolve())
             escaped = prefix.replace("\\", "\\\\").replace('"', '\\"')
-            filter_expr = f'source like "{escaped}%"'
+            default_filter = f'source like "{escaped}%"'
 
         embeddings = await self._embedder.embed([query])
-        fetch_k = top_k * 3 if self._reranker_model else top_k
-        results = self._store.search(embeddings[0], query_text=query, top_k=fetch_k, filter_expr=filter_expr)
-        if self._reranker_model and results:
+        fetch_k_per = max(top_k * 2, 10)  # over-fetch for dedup margin
+
+        async def _fetch(scope_name: str) -> tuple[str, list[dict]]:
+            store = self._stores[scope_name]
+            filt = default_filter if scope_name == self._default_scope_name else ""
+            hits = store.search(embeddings[0], query_text=query, top_k=fetch_k_per, filter_expr=filt)
+            return scope_name, hits
+
+        per_scope = await asyncio.gather(*[_fetch(n) for n in active])
+
+        # Build quota map
+        scope_quotas: dict[str, int | None] = {}
+        for sc in self._extra_scopes:
+            if sc.name in active:
+                scope_quotas[sc.name] = sc.quota
+        if self._default_scope_name in active:
+            scope_quotas[self._default_scope_name] = self._default_scope_quota
+
+        scope_order = [self._default_scope_name] + [s.name for s in self._extra_scopes]
+        merged = _blend_scope_results(
+            per_scope=list(per_scope),
+            scope_quotas=scope_quotas,
+            default_scope_name=self._default_scope_name,
+            scope_order=scope_order,
+            top_k=top_k,
+        )
+
+        if self._reranker_model and merged:
             from .reranker import rerank
 
-            results = rerank(query, results, model_name=self._reranker_model, top_k=top_k)
-        return results
+            merged = rerank(query, merged, model_name=self._reranker_model, top_k=top_k)
+        return merged
 
     # ------------------------------------------------------------------
     # Compact (compress memories)
diff --git a/tests/test_core.py b/tests/test_core.py
index 9db540ba..e26e8ece 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -66,3 +66,92 @@ async def test_index_single_file(mem, sample_dir: Path):
 
     results = await mem.search("list comprehension")
     assert len(results) > 0
+
+
+# ---------------------------------------------------------------------------
+# T7: multi-scope blended search tests
+# ---------------------------------------------------------------------------
+
+from memsearch.chunker import chunk_markdown, compute_chunk_id  # noqa: E402
+
+
+async def _seed_scope(mem, store_name: str, file_path, content: str):
+    """Write content to file_path and upsert its chunks into mem._stores[store_name]."""
+    file_path.write_text(content)
+    text = file_path.read_text()
+    chunks = chunk_markdown(text, source=str(file_path), max_chunk_size=1500, overlap_lines=2)
+    if not chunks:
+        return
+    embeddings = await mem._embedder.embed([c.content for c in chunks])
+    model = mem._embedder.model_name
+    rows = [
+        {
+            "chunk_hash": compute_chunk_id(c.source, c.start_line, c.end_line, c.content_hash, model),
+            "content": c.content,
+            "source": c.source,
+            "heading": c.heading,
+            "heading_level": c.heading_level,
+            "start_line": c.start_line,
+            "end_line": c.end_line,
+            "embedding": e,
+        }
+        for c, e in zip(chunks, embeddings, strict=True)
+    ]
+    mem._stores[store_name].upsert(rows)
+
+
+@pytest.fixture
+def two_scope_mem(tmp_path):
+    from memsearch.core import MemSearch, Scope
+
+    m = MemSearch(
+        milvus_uri=str(tmp_path / "x.db"),
+        collection="ms_proj",
+        extra_scopes=[Scope(name="global", collection="ms_global", quota=2)],
+        default_scope_quota=2,
+    )
+    yield m
+    m.close()
+
+
+@pytest.mark.asyncio
+async def test_search_single_scope_no_scope_field(mem, sample_dir):
+    """Single-scope MemSearch must NOT add a 'scope' field to results."""
+    mem._paths = [str(sample_dir)]
+    await mem.index()
+    results = await mem.search("python", top_k=2)
+    assert results
+    assert "scope" not in results[0]
+
+
+@pytest.mark.asyncio
+async def test_search_multi_scope_tags_results(two_scope_mem, tmp_path):
+    """Multi-scope: results carry 'scope' field; both scopes surface."""
+    proj_dir = tmp_path / "proj"
+    glob_dir = tmp_path / "glob"
+    proj_dir.mkdir()
+    glob_dir.mkdir()
+    await _seed_scope(two_scope_mem, "project", proj_dir / "p.md", "# Project\n\nDeploy via uv.\n")
+    await _seed_scope(two_scope_mem, "global", glob_dir / "g.md", "# Global\n\nUse 4-space indents.\n")
+
+    results = await two_scope_mem.search("how do I deploy", top_k=4)
+    scopes_seen = {r["scope"] for r in results}
+    assert "project" in scopes_seen
+    assert "scope" in results[0]
+
+
+@pytest.mark.asyncio
+async def test_search_only_scope_restriction(two_scope_mem, tmp_path):
+    """only_scope=['project'] must exclude 'global'."""
+    await _seed_scope(two_scope_mem, "project", tmp_path / "p.md", "# P\n\nFoo bar baz.\n")
+    await _seed_scope(two_scope_mem, "global", tmp_path / "g.md", "# G\n\nFoo bar baz.\n")
+
+    results = await two_scope_mem.search("foo", top_k=4, only_scope=["project"])
+    assert results
+    assert all(r["scope"] == "project" for r in results)
+
+
+@pytest.mark.asyncio
+async def test_search_only_scope_unknown_raises(two_scope_mem):
+    with pytest.raises(ValueError, match="unknown scope"):
+        await two_scope_mem.search("foo", top_k=4, only_scope=["nope"])

From 6149eb3f1db88a73e98be2af69aa653f2b35e964 Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 17:45:52 +0800
Subject: [PATCH 11/16] feat(core): route index() by per-scope paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MemSearch.index() now builds a plan from the default scope's _paths
plus any extra_scopes with non-empty paths.  Each file is indexed into
the per-scope store via _index_file(scope_name=…).  Read-only scopes
(empty paths) are skipped entirely.  _embed_and_store() also accepts an
optional scope_name so it writes to the correct store.

Backward-compat is preserved: objects constructed via __new__ without
_default_scope_name / _stores fall back to the old _store attr; when
scope_name is None the helpers use self._store as before.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/memsearch/core.py | 101 ++++++++++++++++++++----------------------
 tests/test_core.py    |  56 +++++++++++++++++++++++
 2 files changed, 104 insertions(+), 53 deletions(-)

diff --git a/src/memsearch/core.py b/src/memsearch/core.py
index 461db23d..9b6774d7 100644
--- a/src/memsearch/core.py
+++ b/src/memsearch/core.py
@@ -195,35 +195,48 @@ def __init__(
     # ------------------------------------------------------------------
 
     async def index(self, *, force: bool = False) -> int:
-        """Scan paths and index all markdown files.
+        """Scan all scopes' paths and index files into the per-scope stores.
 
-        Returns the number of chunks indexed.  Also removes chunks for
-        files that no longer exist on disk (deleted-file cleanup).
+        Default scope uses ``self._paths``. Extra scopes with non-empty
+        ``paths`` are also indexed into their own stores. Read-only scopes
+        (empty paths) are skipped.
+        Returns total chunks indexed across all scopes.
         """
-        files = scan_paths(self._paths)
+        default_name: str | None = getattr(self, "_default_scope_name", None)
+        extra_scopes: list[Scope] = getattr(self, "_extra_scopes", [])
+        stores: dict[str, MilvusStore] = getattr(self, "_stores", {})
+
+        plan: list[tuple[str | None, list[str]]] = [(default_name, self._paths)]
+        plan.extend((sc.name, sc.paths) for sc in extra_scopes if sc.paths)
+
         total = 0
-        failed = 0
-        active_sources: set[str] = set()
-        for f in files:
-            active_sources.add(str(f.path))
-            try:
-                n = await self._index_file(f, force=force)
-                total += n
-            except Exception:
-                failed += 1
-                logger.exception("Failed to index %s, skipping", f.path)
-
-        # Clean up chunks for files that no longer exist
-        indexed_sources = self._store.indexed_sources()
-        for source in indexed_sources:
-            if source not in active_sources:
-                self._store.delete_by_source(source)
-                logger.info("Removed stale chunks for deleted file: %s", source)
-
-        if failed:
-            logger.warning("Indexed %d chunks from %d files (%d files failed)", total, len(files) - failed, failed)
-        else:
-            logger.info("Indexed %d chunks from %d files", total, len(files))
+        for scope_name, paths in plan:
+            if not paths:
+                continue
+            files = scan_paths(paths)
+            failed = 0
+            active_sources: set[str] = set()
+            for f in files:
+                active_sources.add(str(f.path))
+                try:
+                    if scope_name is not None:
+                        n = await self._index_file(f, scope_name=scope_name, force=force)
+                    else:
+                        n = await self._index_file(f, force=force)
+                    total += n
+                except Exception:
+                    failed += 1
+                    logger.exception("Failed to index %s, skipping", f.path)
+
+            store = stores.get(scope_name) if scope_name else getattr(self, "_store", None)
+            if store is not None:
+                for source in store.indexed_sources():
+                    if source not in active_sources:
+                        store.delete_by_source(source)
+                        logger.info("[%s] removed stale chunks for deleted file: %s", scope_name, source)
+
+            if failed:
+                logger.warning("[%s] indexed (%d files failed)", scope_name, failed)
         return total
 
     async def index_file(self, path: str | Path) -> int:
@@ -233,7 +246,8 @@ async def index_file(self, path: str | Path) -> int:
         sf = ScannedFile(path=p, mtime=_st.st_mtime, size=_st.st_size)
         return await self._index_file(sf)
 
-    async def _index_file(self, f: ScannedFile, *, force: bool = False) -> int:
+    async def _index_file(self, f: ScannedFile, *, scope_name: str | None = None, force: bool = False) -> int:
+        store = self._stores[scope_name] if scope_name else self._store
         source = str(f.path)
         text = f.path.read_text(encoding="utf-8")
         chunks = chunk_markdown(
@@ -243,21 +257,14 @@ async def _index_file(self, f: ScannedFile, *, force: bool = False) -> int:
             overlap_lines=self._overlap_lines,
         )
         model = self._embedder.model_name
-
-        # Compute composite chunk IDs (matching OpenClaw format)
         chunk_ids = {compute_chunk_id(c.source, c.start_line, c.end_line, c.content_hash, model) for c in chunks}
-        old_ids = self._store.hashes_by_source(source)
-
-        # Delete stale chunks that are no longer in the file
+        old_ids = store.hashes_by_source(source)
         stale = old_ids - chunk_ids
         if stale:
-            self._store.delete_by_hashes(list(stale))
-
+            store.delete_by_hashes(list(stale))
         if not chunks:
             return 0
-
         if not force:
-            # Only embed chunks whose ID doesn't already exist
             chunks = [
                 c
                 for c in chunks
@@ -265,29 +272,18 @@ async def _index_file(self, f: ScannedFile, *, force: bool = False) -> int:
             ]
             if not chunks:
                 return 0
+        return await self._embed_and_store(chunks, scope_name=scope_name)
 
-        return await self._embed_and_store(chunks)
-
-    async def _embed_and_store(self, chunks: list[Chunk]) -> int:
+    async def _embed_and_store(self, chunks: list[Chunk], *, scope_name: str | None = None) -> int:
         if not chunks:
             return 0
-
+        store = self._stores[scope_name] if scope_name else self._store
         model = self._embedder.model_name
-        # Clean content for embedding: strip HTML comments and metadata noise
-        # so the embedding vector captures semantics, not UUIDs/paths.
-        # The original content is preserved in the Milvus record below.
         contents = [clean_content_for_embedding(c.content) for c in chunks]
         embeddings = await self._embedder.embed(contents)
-
         records: list[dict[str, Any]] = []
         for i, chunk in enumerate(chunks):
-            chunk_id = compute_chunk_id(
-                chunk.source,
-                chunk.start_line,
-                chunk.end_line,
-                chunk.content_hash,
-                model,
-            )
+            chunk_id = compute_chunk_id(chunk.source, chunk.start_line, chunk.end_line, chunk.content_hash, model)
             records.append(
                 {
                     "chunk_hash": chunk_id,
@@ -300,8 +296,7 @@ async def _embed_and_store(self, chunks: list[Chunk]) -> int:
                     "end_line": chunk.end_line,
                 }
             )
-
-        return self._store.upsert(records)
+        return store.upsert(records)
 
     # ------------------------------------------------------------------
     # Search
diff --git a/tests/test_core.py b/tests/test_core.py
index e26e8ece..6a63b2f8 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -155,3 +155,59 @@ async def test_search_only_scope_restriction(two_scope_mem, tmp_path):
 async def test_search_only_scope_unknown_raises(two_scope_mem):
     with pytest.raises(ValueError, match="unknown scope"):
         await two_scope_mem.search("foo", top_k=4, only_scope=["nope"])
+
+
+@pytest.mark.asyncio
+async def test_index_routes_files_by_scope_paths(tmp_path):
+    """Files under scope A's paths land in scope A's store; same for B."""
+    from memsearch.core import MemSearch, Scope
+
+    proj_dir = tmp_path / "proj"
+    glob_dir = tmp_path / "glob"
+    proj_dir.mkdir()
+    glob_dir.mkdir()
+    (proj_dir / "p.md").write_text("# Project\n\nProject-specific note.\n")
+    (glob_dir / "g.md").write_text("# Global\n\nGlobal preference note.\n")
+
+    m = MemSearch(
+        milvus_uri=str(tmp_path / "x.db"),
+        paths=[str(proj_dir)],
+        collection="ms_proj",
+        extra_scopes=[Scope(name="global", collection="ms_global", paths=[str(glob_dir)])],
+    )
+    try:
+        await m.index()
+        proj_results = m._stores["project"].search([0.0] * m._embedder.dimension, top_k=10)
+        glob_results = m._stores["global"].search([0.0] * m._embedder.dimension, top_k=10)
+        proj_sources = {r["source"] for r in proj_results}
+        glob_sources = {r["source"] for r in glob_results}
+        assert any("p.md" in s for s in proj_sources)
+        assert not any("g.md" in s for s in proj_sources)
+        assert any("g.md" in s for s in glob_sources)
+        assert not any("p.md" in s for s in glob_sources)
+    finally:
+        m.close()
+
+
+@pytest.mark.asyncio
+async def test_index_skips_read_only_scope(tmp_path):
+    """A scope with empty paths must not be touched by index()."""
+    from memsearch.core import MemSearch, Scope
+
+    proj_dir = tmp_path / "proj"
+    proj_dir.mkdir()
+    (proj_dir / "p.md").write_text("# P\n\nx.\n")
+    m = MemSearch(
+        milvus_uri=str(tmp_path / "x.db"),
+        paths=[str(proj_dir)],
+        collection="ms_proj",
+        extra_scopes=[Scope(name="readonly", collection="ms_team", paths=[])],
+    )
+    try:
+        n = await m.index()
+        # Read-only scope's collection should be empty
+        ro_results = m._stores["readonly"].search([0.0] * m._embedder.dimension, top_k=10)
+        assert ro_results == []
+        assert n > 0
+    finally:
+        m.close()

From c91abb14206c7f1b7dc0edd7edce40ac113c1f98 Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 17:55:50 +0800
Subject: [PATCH 12/16] feat(core): route watcher events to per-scope stores by
 path prefix

Add _resolve_scope_for_path() (longest-prefix match across all scopes)
and index_file_for_scope() (scope-aware single-file indexer); update
watch() to build a unified path list and route _on_change to the
correct store via the resolver instead of hardcoding the default scope.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/memsearch/core.py             |  56 ++++++++++++++--
 tests/test_watcher_multi_scope.py | 105 ++++++++++++++++++++++++++++++
 2 files changed, 156 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_watcher_multi_scope.py

diff --git a/src/memsearch/core.py b/src/memsearch/core.py
index 9b6774d7..9800a7dc 100644
--- a/src/memsearch/core.py
+++ b/src/memsearch/core.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import asyncio
+import contextlib
 import logging
 from collections.abc import Callable
 from dataclasses import dataclass, field
@@ -246,6 +247,42 @@ async def index_file(self, path: str | Path) -> int:
         sf = ScannedFile(path=p, mtime=_st.st_mtime, size=_st.st_size)
         return await self._index_file(sf)
 
+    def _resolve_scope_for_path(self, file_path: Path | str) -> str | None:
+        """Return the scope name whose paths contain ``file_path`` (longest prefix wins).
+
+        Returns None if the path is not under any configured scope.
+        """
+        target = Path(file_path).expanduser().resolve()
+
+        # Build (scope_name, resolved_path) entries for default scope and all extras
+        candidates: list[tuple[str, Path]] = [
+            (self._default_scope_name, Path(p).expanduser().resolve()) for p in self._paths
+        ]
+        candidates.extend((sc.name, Path(p).expanduser().resolve()) for sc in self._extra_scopes for p in sc.paths)
+
+        # Find all candidates that are an ancestor of (or equal to) target
+        matches: list[tuple[str, Path]] = []
+        for name, root in candidates:
+            with contextlib.suppress(ValueError):
+                target.relative_to(root)
+                matches.append((name, root))
+
+        if not matches:
+            return None
+        # Longest path wins (most specific)
+        matches.sort(key=lambda x: len(x[1].parts), reverse=True)
+        return matches[0][0]
+
+    async def index_file_for_scope(self, path: str | Path, scope_name: str) -> int:
+        """Index a single file into the named scope's store.
+
+        Returns the number of chunks indexed.
+        """
+        p = Path(path).expanduser().resolve()
+        st = p.stat()
+        sf = ScannedFile(path=p, mtime=st.st_mtime, size=st.st_size)
+        return await self._index_file(sf, scope_name=scope_name)
+
     async def _index_file(self, f: ScannedFile, *, scope_name: str | None = None, force: bool = False) -> int:
         store = self._stores[scope_name] if scope_name else self._store
         source = str(f.path)
@@ -540,12 +577,17 @@ def watch(
 
         def _on_change(event_type: str, file_path: Path) -> None:
             try:
+                scope_name = self._resolve_scope_for_path(file_path)
+                if scope_name is None:
+                    logger.debug("Watcher event for %s ignored: not under any scope", file_path)
+                    return
+                store = self._stores[scope_name]
                 if event_type == "deleted":
-                    self._store.delete_by_source(str(file_path))
-                    summary = f"Removed chunks for {file_path}"
+                    store.delete_by_source(str(file_path))
+                    summary = f"[{scope_name}] removed chunks for {file_path}"
                 else:
-                    n = loop.run_until_complete(self.index_file(file_path))
-                    summary = f"Indexed {n} chunks from {file_path}"
+                    n = loop.run_until_complete(self.index_file_for_scope(file_path, scope_name))
+                    summary = f"[{scope_name}] indexed {n} chunks from {file_path}"
                 logger.info(summary)
                 if on_event is not None:
                     on_event(event_type, summary, file_path)
@@ -558,7 +600,11 @@ def _on_change(event_type: str, file_path: Path) -> None:
         fw_kwargs: dict[str, Any] = {}
         if debounce_ms is not None:
             fw_kwargs["debounce_ms"] = debounce_ms
-        watcher = FileWatcher(self._paths, _on_change, **fw_kwargs)
+        # Watch all scopes' paths, not just the default scope
+        all_paths: list[str] = list(self._paths)
+        for sc in self._extra_scopes:
+            all_paths.extend(sc.paths)
+        watcher = FileWatcher(all_paths, _on_change, **fw_kwargs)
         watcher.start()
         return watcher
 
diff --git a/tests/test_watcher_multi_scope.py b/tests/test_watcher_multi_scope.py
new file mode 100644
index 00000000..28c08741
--- /dev/null
+++ b/tests/test_watcher_multi_scope.py
@@ -0,0 +1,105 @@
+"""Watcher routes file events to the correct scope's store via path-prefix match."""
+
+from __future__ import annotations
+
+import os
+
+import pytest
+
+_needs_openai = pytest.mark.skipif(
+    not os.environ.get("OPENAI_API_KEY"),
+    reason="OPENAI_API_KEY not set",
+)
+
+
+@_needs_openai
+def test_resolve_scope_for_path_matches_longest_prefix(tmp_path):
+    """The path-to-scope router picks the longest matching prefix."""
+    from memsearch.core import MemSearch, Scope
+
+    proj = tmp_path / "proj"
+    glob = tmp_path / "glob"
+    proj.mkdir()
+    glob.mkdir()
+    nested = proj / "nested"
+    nested.mkdir()
+
+    m = MemSearch(
+        milvus_uri=str(tmp_path / "x.db"),
+        paths=[str(proj)],
+        collection="ms_proj",
+        extra_scopes=[Scope(name="global", collection="ms_global", paths=[str(glob)])],
+    )
+    try:
+        # File under proj/ resolves to "project"
+        assert m._resolve_scope_for_path(proj / "p.md") == "project"
+        # File under proj/nested/ also resolves to "project" (prefix match still works)
+        assert m._resolve_scope_for_path(nested / "deep.md") == "project"
+        # File under glob/ resolves to "global"
+        assert m._resolve_scope_for_path(glob / "g.md") == "global"
+        # File outside any scope's paths → returns None
+        outside = tmp_path / "outside.md"
+        assert m._resolve_scope_for_path(outside) is None
+    finally:
+        m.close()
+
+
+def test_resolve_scope_longest_prefix_wins(tmp_path):
+    """If two scopes' paths nest (e.g., one inside another), longest prefix wins."""
+    # NOTE: validate_scope_paths normally rejects overlap; we test the resolver
+    # directly here, bypassing the validator, because nested paths CAN occur
+    # programmatically (e.g., a path passed to FileWatcher that happens to be
+    # under two registered roots).
+    from memsearch.core import MemSearch, Scope
+
+    parent = tmp_path / "parent"
+    child = parent / "child"
+    child.mkdir(parents=True)
+
+    m = MemSearch.__new__(MemSearch)  # bypass __init__ to skip validate_scope_paths
+    m._default_scope_name = "outer"
+    m._paths = [str(parent)]
+    m._extra_scopes = [Scope(name="inner", collection="x", paths=[str(child)])]
+
+    file_in_child = child / "deep.md"
+    # Longest-prefix match: file is under both parent and child;
+    # child is the longer prefix so "inner" wins.
+    assert m._resolve_scope_for_path(file_in_child) == "inner"
+    file_in_parent_only = parent / "shallow.md"
+    assert m._resolve_scope_for_path(file_in_parent_only) == "outer"
+
+
+@_needs_openai
+@pytest.mark.asyncio
+async def test_watch_routes_modify_event_to_correct_scope(tmp_path):
+    """A modify event for a file under a scope's paths upserts into that scope's store."""
+    from memsearch.core import MemSearch, Scope
+
+    proj = tmp_path / "proj"
+    glob = tmp_path / "glob"
+    proj.mkdir()
+    glob.mkdir()
+    (proj / "p.md").write_text("# P\n\nProject content.\n")
+    (glob / "g.md").write_text("# G\n\nGlobal content.\n")
+
+    m = MemSearch(
+        milvus_uri=str(tmp_path / "x.db"),
+        paths=[str(proj)],
+        collection="ms_proj",
+        extra_scopes=[Scope(name="global", collection="ms_global", paths=[str(glob)])],
+    )
+    try:
+        # Simulate what _on_change does: route + index_file_for_scope
+        n_proj = await m.index_file_for_scope(proj / "p.md", scope_name="project")
+        n_glob = await m.index_file_for_scope(glob / "g.md", scope_name="global")
+        assert n_proj > 0
+        assert n_glob > 0
+        # Verify routing: project store has p.md, NOT g.md; global store has g.md, NOT p.md
+        proj_results = m._stores["project"].search([0.0] * m._embedder.dimension, top_k=10)
+        glob_results = m._stores["global"].search([0.0] * m._embedder.dimension, top_k=10)
+        assert any("p.md" in r["source"] for r in proj_results)
+        assert not any("g.md" in r["source"] for r in proj_results)
+        assert any("g.md" in r["source"] for r in glob_results)
+        assert not any("p.md" in r["source"] for r in glob_results)
+    finally:
+        m.close()

From 7fcc9b1eafe4a2578bb91ac6de4bf7b19d5d94db Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 18:02:58 +0800
Subject: [PATCH 13/16] feat(cli): --extra-scope and --only-scope flags on
 search

Add _parse_extra_scope helper, two new Click options on the search
command, and wire extra_scopes/only_scope through to MemSearch.search().

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/memsearch/cli.py             | 37 ++++++++++++++++++++++++++++++--
 tests/test_cli_error_handling.py | 12 +++++++++++
 tests/test_cli_help.py           |  7 ++++++
 3 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/src/memsearch/cli.py b/src/memsearch/cli.py
index 655e78a6..98ea7cc1 100644
--- a/src/memsearch/cli.py
+++ b/src/memsearch/cli.py
@@ -96,6 +96,23 @@ def _cfg_to_memsearch_kwargs(cfg: MemSearchConfig) -> dict:
     }
 
 
+def _parse_extra_scope(value: str):
+    """Parse 'name:collection[:quota]' into a Scope."""
+    from .core import Scope
+
+    parts = value.split(":")
+    if len(parts) < 2 or len(parts) > 3:
+        raise click.BadParameter(f"Invalid --extra-scope format: {value!r}. Expected 'name:collection[:quota]'.")
+    name, collection = parts[0], parts[1]
+    quota: int | None = None
+    if len(parts) == 3:
+        try:
+            quota = int(parts[2])
+        except ValueError:
+            raise click.BadParameter(f"Invalid quota in --extra-scope: {parts[2]!r}. Must be an integer.") from None
+    return Scope(name=name, collection=collection, quota=quota)
+
+
 def _normalize_compact_source(source: str | None) -> str | None:
     """Normalize compact --source paths to the absolute form used at index time.
 
@@ -197,6 +214,18 @@ def index(
 @_common_options
 @click.option("--reranker-model", default=None, help="Cross-encoder model for reranking (empty string disables).")
 @click.option("--json-output", "-j", is_flag=True, help="Output as JSON.")
+@click.option(
+    "--extra-scope",
+    "extra_scope",
+    multiple=True,
+    help="Add a search scope: name:collection[:quota]. Repeatable.",
+)
+@click.option(
+    "--only-scope",
+    "only_scope",
+    default=None,
+    help="Comma-separated scope names to restrict the search to.",
+)
 def search(
     query: str,
     top_k: int | None,
@@ -211,6 +240,8 @@ def search(
     milvus_token: str | None,
     reranker_model: str | None,
     json_output: bool,
+    extra_scope: tuple[str, ...],
+    only_scope: str | None,
 ) -> None:
     """Search indexed memory for QUERY."""
     from .core import MemSearch
@@ -228,10 +259,12 @@ def search(
             reranker_model=reranker_model,
         )
     )
+    extra_scopes = [_parse_extra_scope(v) for v in extra_scope]
+    only_scope_list = [s.strip() for s in only_scope.split(",") if s.strip()] if only_scope else None
     ms = None
     try:
-        ms = MemSearch(**_cfg_to_memsearch_kwargs(cfg))
-        results = _run(ms.search(query, top_k=top_k or 5, source_prefix=source_prefix))
+        ms = MemSearch(**_cfg_to_memsearch_kwargs(cfg), extra_scopes=extra_scopes)
+        results = _run(ms.search(query, top_k=top_k or 5, source_prefix=source_prefix, only_scope=only_scope_list))
         if json_output:
             click.echo(json.dumps(results, indent=2, ensure_ascii=False))
         else:
diff --git a/tests/test_cli_error_handling.py b/tests/test_cli_error_handling.py
index 273b8ce3..2a29c1a7 100644
--- a/tests/test_cli_error_handling.py
+++ b/tests/test_cli_error_handling.py
@@ -77,3 +77,15 @@ def fake_load(_path):
     assert result.exit_code == 1
     assert "Configuration error:" in result.stderr
     assert "DEFINITELY_NOT_SET_MEMSEARCH_API_KEY" in result.stderr
+
+
+def test_search_extra_scope_malformed_raises():
+    # Single token (no colon) is invalid
+    result = CliRunner().invoke(cli, ["search", "foo", "--extra-scope", "badformat"])
+    assert result.exit_code != 0
+    assert "extra-scope" in result.output.lower() or "format" in result.output.lower()
+
+
+def test_search_extra_scope_quota_not_int_raises():
+    result = CliRunner().invoke(cli, ["search", "foo", "--extra-scope", "g:c:notanint"])
+    assert result.exit_code != 0
diff --git a/tests/test_cli_help.py b/tests/test_cli_help.py
index bf2e9783..4281b5b9 100644
--- a/tests/test_cli_help.py
+++ b/tests/test_cli_help.py
@@ -43,3 +43,10 @@ def test_chunk_size_flag_appears_in_help(args: list[str]) -> None:
 
     assert result.exit_code == 0
     assert "--max-chunk-size" in result.output
+
+
+def test_search_help_mentions_extra_scope():
+    result = CliRunner().invoke(cli, ["search", "--help"])
+    assert result.exit_code == 0
+    assert "--extra-scope" in result.output
+    assert "--only-scope" in result.output

From 451e6d4fd18be6de455a1ea0c6e3931302219519 Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 18:06:05 +0800
Subject: [PATCH 14/16] feat(cli): include scope in search text output when
 multi-scope

---
 src/memsearch/cli.py   |  7 ++++++-
 tests/test_cli_help.py | 26 ++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/memsearch/cli.py b/src/memsearch/cli.py
index 98ea7cc1..a79d29e2 100644
--- a/src/memsearch/cli.py
+++ b/src/memsearch/cli.py
@@ -276,7 +276,12 @@ def search(
                 source = r.get("source", "?")
                 heading = r.get("heading", "")
                 content = r.get("content", "")
-                click.echo(f"\n--- Result {i} (score: {score:.4f}) ---")
+                scope = r.get("scope")
+                header = f"\n--- Result {i} (score: {score:.4f}"
+                if scope:
+                    header += f", scope: {scope}"
+                header += ") ---"
+                click.echo(header)
                 click.echo(f"Source: {source}")
                 if heading:
                     click.echo(f"Heading: {heading}")
diff --git a/tests/test_cli_help.py b/tests/test_cli_help.py
index 4281b5b9..6bb1f119 100644
--- a/tests/test_cli_help.py
+++ b/tests/test_cli_help.py
@@ -50,3 +50,29 @@ def test_search_help_mentions_extra_scope():
     assert result.exit_code == 0
     assert "--extra-scope" in result.output
     assert "--only-scope" in result.output
+
+
+def test_search_text_output_includes_scope_when_present(monkeypatch):
+    """When results carry a 'scope' field, the text output shows it."""
+    from click.testing import CliRunner
+
+    from memsearch import cli as cli_mod
+
+    fake_results = [
+        {"chunk_hash": "h1", "score": 0.9, "source": "/x.md", "heading": "H", "content": "hi", "scope": "global"},
+    ]
+
+    class FakeMS:
+        def __init__(self, *a, **kw):
+            pass
+
+        async def search(self, *a, **kw):
+            return fake_results
+
+        def close(self):
+            pass
+
+    monkeypatch.setattr("memsearch.core.MemSearch", FakeMS)
+    runner = CliRunner()
+    result = runner.invoke(cli_mod.cli, ["search", "foo"])
+    assert "scope: global" in result.output

From a7306e98c76b08a38b969097ea7272a016a2fb24 Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 18:09:59 +0800
Subject: [PATCH 15/16] feat(cli): pass config-loaded scopes to MemSearch; CLI
 flags append

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/memsearch/cli.py             | 22 ++++++++++++++++++++--
 tests/test_cli_config_helpers.py | 20 ++++++++++++++++++++
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/src/memsearch/cli.py b/src/memsearch/cli.py
index a79d29e2..4517b127 100644
--- a/src/memsearch/cli.py
+++ b/src/memsearch/cli.py
@@ -81,6 +81,19 @@ def _build_cli_overrides(**kwargs) -> dict:
 
 def _cfg_to_memsearch_kwargs(cfg: MemSearchConfig) -> dict:
     """Extract MemSearch constructor kwargs from a resolved config."""
+    from .core import Scope
+
+    extra_scopes = [
+        Scope(
+            name=sc.name,
+            collection=sc.collection,
+            paths=list(sc.paths),
+            quota=sc.quota,
+            uri=sc.uri or None,
+            token=sc.token or None,
+        )
+        for sc in cfg.scopes
+    ]
     return {
         "embedding_provider": cfg.embedding.provider,
         "embedding_model": cfg.embedding.model or None,
@@ -93,6 +106,9 @@ def _cfg_to_memsearch_kwargs(cfg: MemSearchConfig) -> dict:
         "max_chunk_size": cfg.chunking.max_chunk_size,
         "overlap_lines": cfg.chunking.overlap_lines,
         "reranker_model": cfg.reranker.model,
+        "default_scope_name": cfg.default_scope.name,
+        "default_scope_quota": cfg.default_scope.quota,
+        "extra_scopes": extra_scopes,
     }
 
 
@@ -259,11 +275,13 @@ def search(
             reranker_model=reranker_model,
         )
     )
-    extra_scopes = [_parse_extra_scope(v) for v in extra_scope]
     only_scope_list = [s.strip() for s in only_scope.split(",") if s.strip()] if only_scope else None
     ms = None
     try:
-        ms = MemSearch(**_cfg_to_memsearch_kwargs(cfg), extra_scopes=extra_scopes)
+        base_kwargs = _cfg_to_memsearch_kwargs(cfg)
+        cli_extra = [_parse_extra_scope(v) for v in extra_scope]
+        base_kwargs["extra_scopes"] = base_kwargs.get("extra_scopes", []) + cli_extra
+        ms = MemSearch(**base_kwargs)
         results = _run(ms.search(query, top_k=top_k or 5, source_prefix=source_prefix, only_scope=only_scope_list))
         if json_output:
             click.echo(json.dumps(results, indent=2, ensure_ascii=False))
diff --git a/tests/test_cli_config_helpers.py b/tests/test_cli_config_helpers.py
index 73485918..3732b3aa 100644
--- a/tests/test_cli_config_helpers.py
+++ b/tests/test_cli_config_helpers.py
@@ -84,4 +84,24 @@ def test_cfg_to_memsearch_kwargs_translates_resolved_config() -> None:
         "max_chunk_size": 1800,
         "overlap_lines": 4,
         "reranker_model": "",
+        "default_scope_name": "project",
+        "default_scope_quota": None,
+        "extra_scopes": [],
     }
+
+
+def test_cfg_to_memsearch_kwargs_includes_scopes():
+    from memsearch.cli import _cfg_to_memsearch_kwargs
+    from memsearch.config import DefaultScopeConfig, MemSearchConfig, ScopeConfig
+
+    cfg = MemSearchConfig(
+        default_scope=DefaultScopeConfig(name="myproj", quota=5),
+        scopes=[ScopeConfig(name="global", collection="ms_global", quota=3)],
+    )
+    kwargs = _cfg_to_memsearch_kwargs(cfg)
+    assert kwargs["default_scope_name"] == "myproj"
+    assert kwargs["default_scope_quota"] == 5
+    assert len(kwargs["extra_scopes"]) == 1
+    assert kwargs["extra_scopes"][0].name == "global"
+    assert kwargs["extra_scopes"][0].collection == "ms_global"
+    assert kwargs["extra_scopes"][0].quota == 3

From a76fbe17fe3c63136b36b6a2f5649220f64b1e1e Mon Sep 17 00:00:00 2001
From: 1TommyCheung <tomicheung@gmail.com>
Date: Sun, 3 May 2026 18:43:18 +0800
Subject: [PATCH 16/16] test(scenarios): multi-scope E2E validation harness
 using ONNX embeddings

Three scenario-driven workflows that exercise multi-scope routing end-to-end
without requiring any API key (uses the ONNX local embedding provider):

1. Solo dev (closes #337): project + global personal scopes, blended retrieval
   with quota enforcement and only_scope restriction.
2. Chat agents shared memory: a "registrar" indexes shared canon once; multiple
   agents (Alice, Bob) attach to it as a read-only scope (empty paths) while
   each writes to their own private scope. Verifies cross-agent privacy.
3. Individual isolation: two independent MemSearch instances on separate
   Milvus DBs cannot cross-leak. Single-scope behavior unchanged.

Run via: uv run python scripts/scenario_validation.py
---
 scripts/scenario_validation.py | 345 +++++++++++++++++++++++++++++++++
 1 file changed, 345 insertions(+)
 create mode 100644 scripts/scenario_validation.py

diff --git a/scripts/scenario_validation.py b/scripts/scenario_validation.py
new file mode 100644
index 00000000..60ffa55d
--- /dev/null
+++ b/scripts/scenario_validation.py
@@ -0,0 +1,345 @@
+"""Scenario-driven E2E validation of multi-scope memsearch.
+
+Runs three personas end-to-end with real ONNX embeddings (no API key).
+Output is a transcript suitable for pasting into the PR thread as evidence.
+
+Personas:
+  1. Solo dev (issue #337):  project + global personal, blended retrieval
+  2. Chat agents shared:     agents share canon; each agent has private scope
+  3. Individual:             per-user private memory invisible to others
+"""
+
+from __future__ import annotations
+
+import asyncio
+import shutil
+import tempfile
+from pathlib import Path
+
+from memsearch.core import MemSearch, Scope
+
+
+def _bar(label: str) -> None:
+    line = "=" * 78
+    print(f"\n{line}\n  {label}\n{line}")
+
+
+def _section(label: str) -> None:
+    print(f"\n--- {label} ---")
+
+
+def _show_results(label: str, results: list[dict]) -> None:
+    print(f"\n  {label}: {len(results)} result(s)")
+    for i, r in enumerate(results, 1):
+        scope = r.get("scope", "—")
+        score = r.get("score", 0.0)
+        source = Path(r["source"]).name
+        snippet = r["content"][:80].replace("\n", " ")
+        print(f"    {i}. [{scope:>10}] score={score:.3f}  {source}  «{snippet}»")
+
+
+async def scenario_337_solo_dev(workdir: Path) -> None:
+    """#337: solo dev with project memory + global personal preferences.
+
+    Setup:
+      - project/lazarus/ → ms_project_lazarus  (deploy notes, fixes)
+      - personal/        → ms_personal         (coding preferences)
+
+    Verifies:
+      - Project queries surface project hits (highest priority)
+      - Cross-cutting queries surface BOTH project and personal hits with quota
+      - Querying from a different project still surfaces personal preferences
+    """
+    _bar("SCENARIO 1: Solo dev (closes issue #337)")
+
+    proj_dir = workdir / "project_lazarus"
+    pers_dir = workdir / "personal"
+    proj_dir.mkdir()
+    pers_dir.mkdir()
+
+    (proj_dir / "deploy.md").write_text(
+        "# Lazarus Deployment\n\n"
+        "Deploy via scripts/deploy/bring_up_workspaces.sh. "
+        "The reproducibility-gate must use varied queries to bypass session cache.\n"
+    )
+    (proj_dir / "bugfix.md").write_text(
+        "# Reproducibility Gate Bug\n\n"
+        "Fixed session cache by adding cache-busting query suffixes.\n"
+    )
+    (pers_dir / "python_style.md").write_text(
+        "# My Python preferences\n\n"
+        "I prefer 4-space indentation. Always use type hints. Avoid implicit str→bytes coercion.\n"
+    )
+    (pers_dir / "git_habits.md").write_text(
+        "# Git habits\n\nSquash-merge feature branches. Conventional commits. Sign commits with GPG.\n"
+    )
+
+    mem = MemSearch(
+        embedding_provider="onnx",
+        milvus_uri=str(workdir / "337.db"),
+        paths=[str(proj_dir)],
+        collection="ms_project_lazarus",
+        default_scope_quota=3,
+        extra_scopes=[
+            Scope(name="personal", collection="ms_personal", paths=[str(pers_dir)], quota=2),
+        ],
+    )
+    try:
+        _section("Indexing")
+        n = await mem.index()
+        print(f"  Indexed {n} total chunks across scopes")
+        for sname, store in mem._stores.items():
+            count = len(store.indexed_sources())
+            print(f"    {sname:>10}: {count} unique source(s)")
+
+        _section("Query 1 — project-specific: 'how do I deploy lazarus'")
+        results = await mem.search("how do I deploy lazarus", top_k=4)
+        _show_results("Blended", results)
+        scopes_seen = {r["scope"] for r in results}
+        assert "project" in scopes_seen, "expected project scope in deploy query"
+
+        _section("Query 2 — cross-cutting: 'python coding style'")
+        results = await mem.search("python coding style", top_k=4)
+        _show_results("Blended", results)
+        scopes_seen = {r["scope"] for r in results}
+        assert "personal" in scopes_seen, "expected personal scope to surface for style query"
+
+        _section("Query 3 — restrict to personal only")
+        results = await mem.search("style", top_k=4, only_scope=["personal"])
+        _show_results("only_scope=['personal']", results)
+        assert all(r["scope"] == "personal" for r in results)
+
+        _section("Quota enforcement check")
+        results = await mem.search("deploy", top_k=10)
+        per_scope = {}
+        for r in results:
+            per_scope[r["scope"]] = per_scope.get(r["scope"], 0) + 1
+        print(f"  per-scope counts: {per_scope}")
+        print(f"  configured quotas: project=3, personal=2")
+        assert per_scope.get("project", 0) <= 3
+        assert per_scope.get("personal", 0) <= 2
+
+        print("\n  ✓ Scenario 1 PASSED — solo dev with project + personal scopes works end-to-end")
+    finally:
+        mem.close()
+
+
+async def scenario_chat_agents_shared(workdir: Path) -> None:
+    """Chat agents shared memory: agents read shared canon, write to private scopes.
+
+    Setup:
+      - canon/                  → ms_canon (read-only — populated once, shared by all)
+      - agent_alice_private/    → ms_alice_private (alice's private notes)
+      - agent_bob_private/      → ms_bob_private (bob's private notes)
+
+    Verifies:
+      - Both agents see the same canon facts
+      - Each agent sees their OWN private notes but NOT the other's
+      - Read-only canon is searched but never indexed against (its files live in
+        a separate dir owned by a "registrar" process, not by the agents)
+    """
+    _bar("SCENARIO 2: Chat agents — shared canon + per-agent private")
+
+    canon_dir = workdir / "canon"
+    alice_dir = workdir / "agent_alice_private"
+    bob_dir = workdir / "agent_bob_private"
+    canon_dir.mkdir()
+    alice_dir.mkdir()
+    bob_dir.mkdir()
+
+    # Canon facts (would be written by a "registrar" with access to canon_dir)
+    (canon_dir / "family_lore.md").write_text(
+        "# Family Lore\n\n"
+        "Cecil's name was changed from Clonk by Alice in 2024. "
+        "ZenCrabby is the canon owner. Tommy approves all canon changes.\n"
+    )
+    (canon_dir / "world.md").write_text(
+        "# World\n\nThe Temple of Tobe is the family's main meeting place. Founded 2023.\n"
+    )
+
+    # Alice's private notes (only Alice can see these)
+    (alice_dir / "alice_notes.md").write_text(
+        "# Alice's private observations\n\n"
+        "Tommy seemed grumpy about gateway latency today. Bob asked about temple history again.\n"
+    )
+
+    # Bob's private notes (only Bob can see these)
+    (bob_dir / "bob_notes.md").write_text(
+        "# Bob's private observations\n\n"
+        "Cecil mentioned wanting to revisit the renaming. Alice was checking deployment timing.\n"
+    )
+
+    # ----- Step 1: index canon ONCE via a "registrar" instance -----
+    registrar = MemSearch(
+        embedding_provider="onnx",
+        milvus_uri=str(workdir / "shared.db"),
+        paths=[str(canon_dir)],
+        collection="ms_canon",
+    )
+    try:
+        n = await registrar.index()
+        print(f"\n  Registrar indexed canon: {n} chunks")
+    finally:
+        registrar.close()
+
+    # ----- Step 2: Alice's MemSearch — canon is READ-ONLY (no paths), private is writable -----
+    alice = MemSearch(
+        embedding_provider="onnx",
+        milvus_uri=str(workdir / "shared.db"),  # same Milvus, different collections
+        paths=[str(alice_dir)],
+        collection="ms_alice_private",
+        default_scope_name="alice_private",
+        default_scope_quota=2,
+        extra_scopes=[
+            Scope(name="canon", collection="ms_canon", paths=[], quota=2),  # read-only
+        ],
+    )
+    bob = MemSearch(
+        embedding_provider="onnx",
+        milvus_uri=str(workdir / "shared.db"),
+        paths=[str(bob_dir)],
+        collection="ms_bob_private",
+        default_scope_name="bob_private",
+        default_scope_quota=2,
+        extra_scopes=[
+            Scope(name="canon", collection="ms_canon", paths=[], quota=2),  # read-only
+        ],
+    )
+
+    try:
+        _section("Indexing private scopes (canon NOT re-indexed by agents — read-only scope)")
+        await alice.index()
+        await bob.index()
+
+        # Verify canon was NOT indexed by alice/bob (read-only scope = no paths)
+        # Their "canon" scope's collection sources came from registrar only
+        alice_canon_count = len(alice._stores["canon"].indexed_sources())
+        bob_canon_count = len(bob._stores["canon"].indexed_sources())
+        print(f"  Alice's view of canon: {alice_canon_count} sources (registrar's work)")
+        print(f"  Bob's view of canon:   {bob_canon_count} sources (registrar's work)")
+        assert alice_canon_count == 2 and bob_canon_count == 2, "agents see canon via shared collection"
+
+        _section("Query 'temple of tobe' via Alice — should surface canon")
+        results = await alice.search("temple of tobe", top_k=4)
+        _show_results("Alice's blended results", results)
+        scopes_seen = {r["scope"] for r in results}
+        assert "canon" in scopes_seen, "Alice should see canon facts"
+        # Alice should NOT see Bob's notes (different collection, not in her config)
+        assert not any("bob_notes" in r["source"] for r in results), "Alice must not see Bob's private notes"
+
+        _section("Query 'gateway latency' via Alice — should surface Alice's private")
+        results = await alice.search("gateway latency observations", top_k=4)
+        _show_results("Alice's blended results", results)
+        # The hit should be Alice's private note
+        alice_private_hits = [r for r in results if r["scope"] == "alice_private"]
+        assert alice_private_hits, "Alice should see her own private observations"
+
+        _section("Query 'gateway latency' via Bob — should NOT surface Alice's private")
+        results = await bob.search("gateway latency observations", top_k=4)
+        _show_results("Bob's blended results", results)
+        # Bob's results MUST NOT contain anything from alice_dir
+        assert not any("alice_notes" in r["source"] for r in results), \
+            "PRIVACY VIOLATION: Bob saw Alice's private notes!"
+        print("  ✓ Privacy preserved: Bob cannot see Alice's private notes")
+
+        _section("Query 'cecil' via Bob — should surface canon + Bob's private")
+        results = await bob.search("cecil renaming", top_k=4)
+        _show_results("Bob's blended results", results)
+        scopes_seen = {r["scope"] for r in results}
+        # Should have BOTH canon and bob_private
+        print(f"  Scopes returned: {scopes_seen}")
+
+        print("\n  ✓ Scenario 2 PASSED — shared canon read by both agents; private scopes are isolated")
+    finally:
+        alice.close()
+        bob.close()
+
+
+async def scenario_individual_isolation(workdir: Path) -> None:
+    """Individual: per-user memory pools that must not leak across users.
+
+    Setup:
+      - user_alice/ → ms_user_alice (Alice's project work)
+      - user_bob/   → ms_user_bob (Bob's project work)
+
+    Each user runs their own MemSearch with their own collection. Verifies
+    that one user's queries cannot reach another user's collection unless
+    explicitly configured.
+    """
+    _bar("SCENARIO 3: Individual user isolation")
+
+    alice_dir = workdir / "user_alice"
+    bob_dir = workdir / "user_bob"
+    alice_dir.mkdir()
+    bob_dir.mkdir()
+
+    (alice_dir / "alice_secret.md").write_text(
+        "# Alice's secret project\n\nAPI key rotation schedule: every 90 days. Notify ops.\n"
+    )
+    (bob_dir / "bob_secret.md").write_text(
+        "# Bob's secret project\n\nDatabase migration plan: dry-run on staging first.\n"
+    )
+
+    alice = MemSearch(
+        embedding_provider="onnx",
+        milvus_uri=str(workdir / "indiv_alice.db"),
+        paths=[str(alice_dir)],
+        collection="ms_user_alice",
+    )
+    bob = MemSearch(
+        embedding_provider="onnx",
+        milvus_uri=str(workdir / "indiv_bob.db"),
+        paths=[str(bob_dir)],
+        collection="ms_user_bob",
+    )
+
+    try:
+        _section("Indexing per-user")
+        await alice.index()
+        await bob.index()
+
+        _section("Alice queries her own data")
+        results = await alice.search("API key rotation", top_k=3)
+        _show_results("Alice's results (single-scope, no scope tag)", results)
+        # Single-scope mode: no 'scope' field on results
+        assert results
+        assert "scope" not in results[0], "single-scope must not add scope tag"
+        assert any("alice_secret" in r["source"] for r in results)
+
+        _section("Alice queries Bob's data — should return nothing")
+        results = await alice.search("database migration plan", top_k=3)
+        _show_results("Alice's results", results)
+        # Alice's query against her own collection should NOT find bob's content
+        assert not any("bob_secret" in r["source"] for r in results), \
+            "PRIVACY VIOLATION: Alice's query reached Bob's collection!"
+        print("  ✓ Isolation preserved: separate Milvus DBs and collections cannot cross-leak")
+
+        _section("Bob queries his own data")
+        results = await bob.search("database migration plan", top_k=3)
+        _show_results("Bob's results", results)
+        assert any("bob_secret" in r["source"] for r in results)
+
+        print("\n  ✓ Scenario 3 PASSED — per-user isolation works (single-scope mode unchanged)")
+    finally:
+        alice.close()
+        bob.close()
+
+
+async def main() -> None:
+    workdir = Path(tempfile.mkdtemp(prefix="memsearch_scenario_"))
+    print(f"Workdir: {workdir}")
+    try:
+        for sub in ("scenario1", "scenario2", "scenario3"):
+            (workdir / sub).mkdir()
+        await scenario_337_solo_dev(workdir / "scenario1")
+        await scenario_chat_agents_shared(workdir / "scenario2")
+        await scenario_individual_isolation(workdir / "scenario3")
+        _bar("ALL SCENARIOS PASSED ✓")
+    finally:
+        shutil.rmtree(workdir, ignore_errors=True)
+
+
+if __name__ == "__main__":
+    # Each scenario uses its own subdir so workdirs don't collide
+    Path(tempfile.gettempdir()).mkdir(exist_ok=True)
+    asyncio.run(main())