diff --git a/.gitignore b/.gitignore index 1619ba870..01f0097b0 100644 --- a/.gitignore +++ b/.gitignore @@ -35,4 +35,5 @@ coverage.xml venv/ # ChromaDB local data +*.sqlite3 *.sqlite3-journal diff --git a/mempalace/cli.py b/mempalace/cli.py index b06a7110a..01e3e45e1 100644 --- a/mempalace/cli.py +++ b/mempalace/cli.py @@ -70,6 +70,8 @@ def _ensure_mempalace_files_gitignored(project_dir) -> bool: def cmd_init(args): import json from pathlib import Path + + directory = str(Path(args.dir).expanduser().resolve()) from .entity_detector import scan_for_detection, detect_entities, confirm_entities from .room_detector_local import detect_rooms_local @@ -92,8 +94,15 @@ def cmd_init(args): print(" No entities detected — proceeding with directory-based rooms.") # Pass 2: detect rooms from folder structure - detect_rooms_local(project_dir=args.dir, yes=getattr(args, "yes", False)) - MempalaceConfig().init() + try: + detect_rooms_local(project_dir=args.dir, yes=getattr(args, "yes", False)) + except TypeError: + detect_rooms_local(project_dir=args.dir) + + config = MempalaceConfig() + config.init(root_dir=directory) + print(f"\n Root directory set: {directory}") + print(" Subdirectories will be auto-detected as wings on each startup.") # Pass 3: protect git repos from accidentally committing per-project files _ensure_mempalace_files_gitignored(args.dir) diff --git a/mempalace/config.py b/mempalace/config.py index a8cbee31c..f092d3e30 100644 --- a/mempalace/config.py +++ b/mempalace/config.py @@ -171,6 +171,15 @@ def palace_path(self): return env_val return self._file_config.get("palace_path", DEFAULT_PALACE_PATH) + @property + def root_dir(self): + """Root directory specified during init. + Subdirectories become wings automatically.""" + env_val = os.environ.get("MEMPALACE_ROOT_DIR") + if env_val: + return env_val + return self._file_config.get("root_dir", None) + @property def collection_name(self): """ChromaDB collection name.""" @@ -218,7 +227,36 @@ def set_hook_setting(self, key: str, value: bool): except OSError: pass - def init(self): + def _save(self): + """Persist current config to disk.""" + self._config_dir.mkdir(parents=True, exist_ok=True) + with open(self._config_file, "w", encoding="utf-8") as f: + json.dump(self._file_config, f, indent=2, ensure_ascii=False) + + @property + def config_dir(self): + """Public access to the config directory path.""" + return self._config_dir + + def load_wing_config(self): + """Load wing_config.json and return as dict.""" + wing_config_path = self._config_dir / "wing_config.json" + if wing_config_path.exists(): + try: + with open(wing_config_path, encoding="utf-8") as f: + return json.load(f) + except (json.JSONDecodeError, OSError): + pass + return {} + + def save_wing_config(self, wing_config): + """Save wing_config.json.""" + self._config_dir.mkdir(parents=True, exist_ok=True) + wing_config_path = self._config_dir / "wing_config.json" + with open(wing_config_path, "w", encoding="utf-8") as f: + json.dump(wing_config, f, indent=2, ensure_ascii=False) + + def init(self, root_dir=None): """Create config directory and write default config.json if it doesn't exist.""" self._config_dir.mkdir(parents=True, exist_ok=True) # Restrict directory permissions to owner only (Unix) @@ -233,13 +271,18 @@ def init(self): "topic_wings": DEFAULT_TOPIC_WINGS, "hall_keywords": DEFAULT_HALL_KEYWORDS, } - with open(self._config_file, "w") as f: - json.dump(default_config, f, indent=2) + if root_dir: + default_config["root_dir"] = str(root_dir) + with open(self._config_file, "w", encoding="utf-8") as f: + json.dump(default_config, f, indent=2, ensure_ascii=False) # Restrict config file to owner read/write only try: self._config_file.chmod(0o600) except (OSError, NotImplementedError): pass + elif root_dir: + self._file_config["root_dir"] = str(root_dir) + self._save() return self._config_file def save_people_map(self, people_map): diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py index 3918a19a7..849d5bee3 100644 --- a/mempalace/mcp_server.py +++ b/mempalace/mcp_server.py @@ -46,6 +46,7 @@ import json # noqa: E402 import logging # noqa: E402 import hashlib # noqa: E402 +import re # noqa: E402 import time # noqa: E402 from datetime import datetime # noqa: E402 from pathlib import Path # noqa: E402 @@ -240,6 +241,122 @@ def _no_palace(): } +# ==================== AUTO-DISCOVER WINGS ==================== + +IGNORE_DIRS = { + "node_modules", + ".git", + ".cursor", + "__pycache__", + ".venv", + "venv", + ".env", + "dist", + "build", + "target", + ".next", + ".nuxt", + ".mempalace", + ".idea", + ".vs", + ".vscode", +} + +# Cache for discovered wings — avoids repeated filesystem scans +_discovered_wings_cache = None + + +def _folder_to_wing(folder_name: str) -> str: + """Normalize a folder name into a valid wing name. + + Preserves Unicode characters (CJK, etc.), hyphens, and underscores. + Leading/trailing hyphens and underscores are stripped. + 'My-Project' -> 'wing_my-project' + 'my_project' -> 'wing_my_project' + 'プロジェクトA' -> 'wing_プロジェクトa' + """ + slug = folder_name.lower() + # Keep word characters (Unicode-aware), hyphens, digits + slug = re.sub(r"[^\w\-]+", "_", slug) + # Trim leading/trailing separators so '--project--' -> 'project', not '--project--' + slug = slug.strip("_-") + if not slug: + slug = "unnamed" + return f"wing_{slug}" + + +def _sync_wings_from_root(force=False): + """Scan subdirectories under root_dir and register new ones as wings. + + Called once on server startup. Results are cached so subsequent + calls (e.g. from tool_status) are free unless force=True. + + New folders become wings automatically. Deleted folders are left alone + (memories are preserved). + """ + global _discovered_wings_cache + + if _discovered_wings_cache is not None and not force: + return _discovered_wings_cache + + root_dir = _config.root_dir + if not root_dir: + _discovered_wings_cache = [] + return [] + + root_path = Path(root_dir).expanduser().resolve() + if not root_path.is_dir(): + logger.warning(f"root_dir not found: {root_dir}") + _discovered_wings_cache = [] + return [] + + # Known wings from wing_config.json (single source of truth) + wing_config = _config.load_wing_config() + known_wings = set(wing_config.get("wings", {}).keys()) + + # Scan subdirectories + new_wings = [] + for child in sorted(root_path.iterdir()): + if not child.is_dir(): + continue + if child.name.startswith("."): + continue + if child.name.lower() in IGNORE_DIRS: + continue + + wing_name = _folder_to_wing(child.name) + if wing_name not in known_wings: + new_wings.append( + { + "name": wing_name, + "path": str(child), + "folder": child.name, + } + ) + + # Register new wings in wing_config.json + if new_wings: + wings = wing_config.get("wings", {}) + for w in new_wings: + wings[w["name"]] = { + "type": "project", + "path": w["path"], + "keywords": [w["folder"].lower()], + "auto_discovered": True, + } + wing_config["wings"] = wings + if "default_wing" not in wing_config: + wing_config["default_wing"] = "wing_general" + + _config.save_wing_config(wing_config) + + names = ", ".join(w["folder"] for w in new_wings) + logger.info(f"Auto-discovered {len(new_wings)} new wing(s): {names}") + + _discovered_wings_cache = new_wings + return new_wings + + # ==================== HELPERS ==================== @@ -294,6 +411,8 @@ def _sanitize_optional_name(value: str = None, field_name: str = "name") -> str: def tool_status(): + # Return cached auto-discovered wings (no rescan, no I/O) + _sync_wings_from_root(force=False) # Use create=True only when a palace DB already exists on disk -- this # bootstraps the ChromaDB collection on a valid-but-empty palace without # accidentally creating a palace in a non-existent directory (#830). @@ -1686,6 +1805,7 @@ def _restore_stdout(): def main(): _restore_stdout() logger.info("MemPalace MCP Server starting...") + _sync_wings_from_root() while True: try: line = sys.stdin.readline() diff --git a/mempalace/split_mega_files.py b/mempalace/split_mega_files.py index f57becaf1..25cc93d8d 100644 --- a/mempalace/split_mega_files.py +++ b/mempalace/split_mega_files.py @@ -184,7 +184,7 @@ def split_file(filepath, output_dir, dry_run=False): path = Path(filepath) max_size = 500 * 1024 * 1024 # 500 MB safety limit if path.stat().st_size > max_size: - print(f" SKIP: {path.name} exceeds {max_size // (1024*1024)} MB limit") + print(f" SKIP: {path.name} exceeds {max_size // (1024 * 1024)} MB limit") return [] lines = path.read_text(errors="replace").splitlines(keepends=True) @@ -273,7 +273,7 @@ def main(): max_scan_size = 500 * 1024 * 1024 # 500 MB for f in files: if f.stat().st_size > max_scan_size: - print(f" SKIP: {f.name} exceeds {max_scan_size // (1024*1024)} MB limit") + print(f" SKIP: {f.name} exceeds {max_scan_size // (1024 * 1024)} MB limit") continue lines = f.read_text(errors="replace").splitlines(keepends=True) boundaries = find_session_boundaries(lines) diff --git a/tests/test_auto_discover.py b/tests/test_auto_discover.py new file mode 100644 index 000000000..40c127065 --- /dev/null +++ b/tests/test_auto_discover.py @@ -0,0 +1,150 @@ +"""Tests for auto-discover wings from root_dir.""" + +import os +import tempfile +import shutil +import uuid + +from mempalace.config import MempalaceConfig +from mempalace.mcp_server import _folder_to_wing, _sync_wings_from_root, _config +import mempalace.mcp_server as mcp_mod + + +class TestFolderToWing: + def test_basic(self): + assert _folder_to_wing("MyProject") == "wing_myproject" + + def test_hyphens_preserved(self): + assert _folder_to_wing("My-Project") == "wing_my-project" + + def test_underscores_preserved(self): + assert _folder_to_wing("my_project") == "wing_my_project" + + def test_no_collision_hyphen_vs_underscore(self): + """Folders 'My-Project' and 'my_project' must produce different wing names.""" + assert _folder_to_wing("My-Project") != _folder_to_wing("my_project") + + def test_special_chars(self): + assert _folder_to_wing("Project (v2)!") == "wing_project_v2" + + def test_leading_trailing_cleanup(self): + assert _folder_to_wing("--project--") == "wing_project" + + def test_unicode_cjk_folder(self): + """CJK folder names are preserved, not stripped.""" + assert _folder_to_wing("プロジェクトA") == "wing_プロジェクトa" + + def test_unicode_korean_folder(self): + assert _folder_to_wing("프로젝트") == "wing_프로젝트" + + def test_empty_after_strip(self): + """Folders that become empty after stripping get a fallback name.""" + assert _folder_to_wing("!!!") == "wing_unnamed" + + +class TestSyncWingsFromRoot: + def setup_method(self): + """Reset cache before each test.""" + mcp_mod._discovered_wings_cache = None + + def test_no_root_dir(self): + """Returns empty when root_dir is not set.""" + original = _config.root_dir + _config._file_config["root_dir"] = None + try: + result = _sync_wings_from_root(force=True) + assert result == [] + finally: + if original: + _config._file_config["root_dir"] = original + + def test_discovers_new_folders(self): + tmpdir = tempfile.mkdtemp() + try: + os.makedirs(os.path.join(tmpdir, "ProjectA")) + os.makedirs(os.path.join(tmpdir, "ProjectB")) + os.makedirs(os.path.join(tmpdir, ".git")) + os.makedirs(os.path.join(tmpdir, "node_modules")) + + _config._file_config["root_dir"] = tmpdir + result = _sync_wings_from_root(force=True) + + names = [w["folder"] for w in result] + assert "ProjectA" in names + assert "ProjectB" in names + assert ".git" not in names + assert "node_modules" not in names + finally: + _config._file_config.pop("root_dir", None) + shutil.rmtree(tmpdir, ignore_errors=True) + + def test_cache_prevents_rescan(self): + tmpdir = tempfile.mkdtemp() + # Unique folder so this test is not affected by wing_config from other tests + folder = f"CacheScan_{uuid.uuid4().hex[:8]}" + try: + os.makedirs(os.path.join(tmpdir, folder)) + _config._file_config["root_dir"] = tmpdir + + result1 = _sync_wings_from_root(force=True) + assert len(result1) > 0 + + # Second call should return cached (same object; no filesystem rescan) + mcp_mod._discovered_wings_cache = result1 + result2 = _sync_wings_from_root(force=False) + assert result2 is result1 # same object = cached + finally: + _config._file_config.pop("root_dir", None) + shutil.rmtree(tmpdir, ignore_errors=True) + + def test_no_chromadb_dependency(self): + """Wing discovery works without ChromaDB collection access.""" + tmpdir = tempfile.mkdtemp() + try: + os.makedirs(os.path.join(tmpdir, "NewProject")) + _config._file_config["root_dir"] = tmpdir + # Even if ChromaDB is unreachable, discovery should succeed + result = _sync_wings_from_root(force=True) + names = [w["folder"] for w in result] + assert "NewProject" in names + finally: + _config._file_config.pop("root_dir", None) + shutil.rmtree(tmpdir, ignore_errors=True) + + +class TestConfigRootDir: + def test_root_dir_property(self): + tmpdir = tempfile.mkdtemp() + try: + config = MempalaceConfig(config_dir=tmpdir) + assert config.root_dir is None + + config._file_config["root_dir"] = "/some/path" + assert config.root_dir == "/some/path" + finally: + shutil.rmtree(tmpdir, ignore_errors=True) + + def test_init_saves_root_dir(self): + tmpdir = tempfile.mkdtemp() + try: + config = MempalaceConfig(config_dir=tmpdir) + config.init(root_dir="/my/projects") + + # Reload and verify + config2 = MempalaceConfig(config_dir=tmpdir) + assert config2.root_dir == "/my/projects" + finally: + shutil.rmtree(tmpdir, ignore_errors=True) + + def test_wing_config_roundtrip(self): + tmpdir = tempfile.mkdtemp() + try: + config = MempalaceConfig(config_dir=tmpdir) + config.init() + + wc = {"wings": {"wing_test": {"type": "project"}}} + config.save_wing_config(wc) + loaded = config.load_wing_config() + assert loaded["wings"]["wing_test"]["type"] == "project" + finally: + shutil.rmtree(tmpdir, ignore_errors=True) diff --git a/tests/test_closets.py b/tests/test_closets.py index 976086dce..081f6a0ca 100644 --- a/tests/test_closets.py +++ b/tests/test_closets.py @@ -825,9 +825,9 @@ def worker(i): assert not errors, f"worker raised: {errors}" tunnels = list_tunnels() - assert len(tunnels) == 5, ( - f"expected 5 concurrent tunnels, got {len(tunnels)} — " "write race dropped some" - ) + assert ( + len(tunnels) == 5 + ), f"expected 5 concurrent tunnels, got {len(tunnels)} — write race dropped some" def test_created_at_is_timezone_aware(self): """Regression: created_at must be tz-aware UTC, not naive.""" diff --git a/tests/test_normalize.py b/tests/test_normalize.py index c175450bb..b8c5a79a8 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -1173,11 +1173,7 @@ class TestStripNoiseRemovesSystemChrome: def test_strips_line_anchored_system_reminder_block(self): text = ( - "> User:\n" - "\n" - "Auto-save reminder...\n" - "\n" - "> Real message." + "> User:\n\nAuto-save reminder...\n\n> Real message." ) out = strip_noise(text) assert "system-reminder" not in out @@ -1187,7 +1183,7 @@ def test_strips_line_anchored_system_reminder_block(self): def test_strips_system_reminder_with_blockquote_prefix(self): # _messages_to_transcript prefixes lines with "> ", so the line # anchor must also accept that shape. - text = "> User:\n" "> Injected noise\n" "> Real message." + text = "> User:\n> Injected noise\n> Real message." out = strip_noise(text) assert "Injected noise" not in out assert "Real message." in out