Skip to content

Commit 3bac365

Browse files
authored
Merge pull request #911 from MemPalace/refactor/entity-detector-i18n
refactor(entity_detector): make multi-language extensible via i18n JSON
2 parents 56b6a63 + c722c91 commit 3bac365

7 files changed

Lines changed: 651 additions & 426 deletions

File tree

mempalace/cli.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,25 @@ def cmd_init(args):
7373
from .entity_detector import scan_for_detection, detect_entities, confirm_entities
7474
from .room_detector_local import detect_rooms_local
7575

76+
cfg = MempalaceConfig()
77+
78+
# Resolve entity-detection languages: --lang overrides config.
79+
lang_arg = getattr(args, "lang", None)
80+
if lang_arg:
81+
languages = [s.strip() for s in lang_arg.split(",") if s.strip()] or ["en"]
82+
cfg.set_entity_languages(languages)
83+
else:
84+
languages = cfg.entity_languages
85+
languages_tuple = tuple(languages)
86+
7687
# Pass 1: auto-detect people and projects from file content
7788
print(f"\n Scanning for entities in: {args.dir}")
89+
if languages_tuple != ("en",):
90+
print(f" Languages: {', '.join(languages_tuple)}")
7891
files = scan_for_detection(args.dir)
7992
if files:
8093
print(f" Reading {len(files)} files...")
81-
detected = detect_entities(files)
94+
detected = detect_entities(files, languages=languages_tuple)
8295
total = len(detected["people"]) + len(detected["projects"]) + len(detected["uncertain"])
8396
if total > 0:
8497
confirmed = confirm_entities(detected, yes=getattr(args, "yes", False))
@@ -93,7 +106,7 @@ def cmd_init(args):
93106

94107
# Pass 2: detect rooms from folder structure
95108
detect_rooms_local(project_dir=args.dir, yes=getattr(args, "yes", False))
96-
MempalaceConfig().init()
109+
cfg.init()
97110

98111
# Pass 3: protect git repos from accidentally committing per-project files
99112
_ensure_mempalace_files_gitignored(args.dir)
@@ -478,6 +491,16 @@ def main():
478491
action="store_true",
479492
help="Auto-accept all detected entities (non-interactive)",
480493
)
494+
p_init.add_argument(
495+
"--lang",
496+
default=None,
497+
help=(
498+
"Comma-separated language codes for entity detection "
499+
"(e.g. 'en' or 'en,pt-br'). Defaults to value from config "
500+
"(MEMPALACE_ENTITY_LANGUAGES env var or config.json), or 'en'. "
501+
"When given, the value is also persisted to config.json."
502+
),
503+
)
481504

482505
# mine
483506
p_mine = sub.add_parser("mine", help="Mine files into the palace")

mempalace/config.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,42 @@ def hall_keywords(self):
197197
"""Mapping of hall names to keyword lists."""
198198
return self._file_config.get("hall_keywords", DEFAULT_HALL_KEYWORDS)
199199

200+
@property
201+
def entity_languages(self):
202+
"""Languages whose entity-detection patterns should be applied.
203+
204+
Reads from env var ``MEMPALACE_ENTITY_LANGUAGES`` (comma-separated)
205+
first, then the ``entity_languages`` field in ``config.json``,
206+
defaulting to ``["en"]``.
207+
"""
208+
env_val = os.environ.get("MEMPALACE_ENTITY_LANGUAGES") or os.environ.get(
209+
"MEMPAL_ENTITY_LANGUAGES"
210+
)
211+
if env_val:
212+
return [s.strip() for s in env_val.split(",") if s.strip()] or ["en"]
213+
cfg = self._file_config.get("entity_languages")
214+
if isinstance(cfg, list) and cfg:
215+
return [str(s) for s in cfg]
216+
return ["en"]
217+
218+
def set_entity_languages(self, languages):
219+
"""Persist the entity-detection language list to ``config.json``."""
220+
normalized = [s.strip() for s in languages if s and s.strip()]
221+
if not normalized:
222+
normalized = ["en"]
223+
self._file_config["entity_languages"] = normalized
224+
self._config_dir.mkdir(parents=True, exist_ok=True)
225+
try:
226+
with open(self._config_file, "w", encoding="utf-8") as f:
227+
json.dump(self._file_config, f, indent=2, ensure_ascii=False)
228+
except OSError:
229+
pass
230+
try:
231+
self._config_file.chmod(0o600)
232+
except (OSError, NotImplementedError):
233+
pass
234+
return normalized
235+
200236
@property
201237
def hook_silent_save(self):
202238
"""Whether the stop hook saves directly (True) or blocks for MCP calls (False)."""

0 commit comments

Comments
 (0)