From f27b003673d3e4e953de7be8f335162662ea8170 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Fri, 8 May 2026 23:05:10 +0200
Subject: [PATCH 01/21] feat(model): /model picker, live Ollama discovery,
 persistent choice
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds an inline-keyboard model picker so users can switch LLM backends
from Telegram without SSH or env edits, and makes the choice survive
reboots.

User-facing additions
- New /model command. Without args it shows an inline keyboard with
  every preset (gemini, glm, ollama). With an argument (`/model glm`)
  it falls through to the existing /use logic, so the old behaviour
  still works.
- Tapping the 🦙 ollama row queries the configured Ollama server live
  (`/api/tags` + `/api/show`), filters by capabilities containing
  `tools`, and only lists tool-capable models. If none of the
  installed models advertise tool support, falls back to all of them
  with a warning. Includes `◂ Back` button and a graceful unreachable
  state.
- Selecting a model immediately switches the active LiteLLM model and
  acknowledges with the new model name on the E-Ink face.

Persistence
- `LiteLLMConnector.set_model()` now writes the choice to
  `data/active_model.json` (gitignored). On startup the connector
  restores that selection before falling back to `DEFAULT_LITE_PRESET`,
  so reboots and `systemctl restart` no longer reset the user's pick.

Reliability
- Increase the application's HTTP timeouts via `Application.builder()`
  (`read=60`, `write=60`, `connect=30`, `pool=30`). The Pi Zero 2W's
  WiFi can otherwise time out polling Telegram while a long Ollama
  reply is streaming, surfacing as `httpx.ReadError` / `Timed out`.

Config
- Add `OLLAMA_MODEL` and `OLLAMA_API_BASE` env-driven defaults plus an
  `ollama` entry in `LLM_PRESETS`. Default API base is the placeholder
  `http://ollama-server:11434`; set `OLLAMA_API_BASE` in `.env` to
  point at your actual Ollama host.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .gitignore                   |   1 +
 src/bot/handlers.py          | 153 ++++++++++++++++++++++++++++++++++-
 src/config.py                |   6 ++
 src/llm/litellm_connector.py |  50 ++++++++++--
 src/main.py                  |  20 ++++-
 5 files changed, 218 insertions(+), 12 deletions(-)

diff --git a/.gitignore b/.gitignore
index 31d0648..d64c2f4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,6 +102,7 @@ coverage.xml
 # ===================
 rate_limits.json
 data/cron_jobs.json
+data/active_model.json
 # data/custom_faces.json
 
 # ===================
diff --git a/src/bot/handlers.py b/src/bot/handlers.py
index c733a48..5e36fc1 100644
--- a/src/bot/handlers.py
+++ b/src/bot/handlers.py
@@ -8,7 +8,7 @@
 import tempfile
 from pathlib import Path
 
-from telegram import Update
+from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
 from telegram.constants import ChatAction
 from telegram.ext import ContextTypes
 
@@ -29,7 +29,7 @@
 from memory.summarize import optimize_history
 from cron.scheduler import add_cron_job, list_cron_jobs, remove_cron_job
 from skills.loader import get_eligible_skills
-from config import LLM_PRESETS, OPENAI_API_KEY
+from config import LLM_PRESETS, OPENAI_API_KEY, OLLAMA_API_BASE
 from llm.prompts import build_system_context, build_vault_context
 
 log = logging.getLogger(__name__)
@@ -886,11 +886,158 @@ async def cmd_use(update: Update, context: ContextTypes.DEFAULT_TYPE):
     if "gemini" in model_key: emoji = "♊️"
     
     await update.message.reply_text(f"{emoji} Switched to *{model_key.upper()}*!\nModel: {preset['model']}", parse_mode="Markdown")
-    
+
     # Visual update
     show_face(mood="happy", text=f"Model: {model_key.upper()}")
 
 
+# --- /model command: inline-button picker with live Ollama discovery ---
+
+_MODEL_EMOJI = {"gemini": "♊️", "glm": "🇨🇳", "ollama": "🦙"}
+
+
+def _ollama_list_with_capabilities(timeout: float = 4.0) -> list[dict]:
+    """Fetch installed Ollama models + capabilities. Returns [{name, supports_tools}]."""
+    import requests
+    base = (OLLAMA_API_BASE or "").rstrip("/")
+    if not base:
+        return []
+    try:
+        r = requests.get(f"{base}/api/tags", timeout=timeout)
+        r.raise_for_status()
+        names = [m.get("name") for m in r.json().get("models", []) if m.get("name")]
+    except Exception as e:
+        log.warning(f"Ollama /api/tags failed: {e}")
+        return []
+
+    out = []
+    for name in names:
+        supports = False
+        try:
+            sr = requests.post(f"{base}/api/show", json={"model": name}, timeout=timeout)
+            if sr.ok:
+                caps = sr.json().get("capabilities") or []
+                supports = "tools" in caps
+        except Exception:
+            pass
+        out.append({"name": name, "supports_tools": supports})
+    return out
+
+
+def _top_model_markup(current: str) -> InlineKeyboardMarkup:
+    rows = []
+    for key in LLM_PRESETS.keys():
+        emoji = _MODEL_EMOJI.get(key, "🔹")
+        active = LLM_PRESETS[key]["model"] == current or (
+            key == "ollama" and isinstance(current, str) and current.startswith("ollama_chat/")
+        )
+        marker = " ✅" if active else ""
+        suffix = " ▸" if key == "ollama" else ""
+        rows.append([InlineKeyboardButton(f"{emoji} {key}{marker}{suffix}", callback_data=f"model:{key}")])
+    return InlineKeyboardMarkup(rows)
+
+
+async def cmd_model(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Show inline buttons to switch LLM model. With argument acts like /use."""
+    if not is_allowed(update.effective_user.id, update.effective_chat.id):
+        return
+
+    if context.args:
+        return await cmd_use(update, context)
+
+    router = get_router()
+    current = router.litellm.model
+    text = f"🦄 *Current:* `{current}`\n\nPick a model:"
+    await update.message.reply_text(text, parse_mode="Markdown", reply_markup=_top_model_markup(current))
+
+
+async def cb_model(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Callback for /model inline buttons (presets + Ollama submenu)."""
+    import asyncio
+    query = update.callback_query
+    if not is_allowed(query.from_user.id, query.message.chat_id):
+        await query.answer("Not allowed", show_alert=True)
+        return
+    await query.answer()
+
+    data = query.data or ""
+    router = get_router()
+
+    # Specific Ollama model switch: omd:<name>
+    if data.startswith("omd:"):
+        model_name = data.split(":", 1)[1]
+        full = f"ollama_chat/{model_name}"
+        router.litellm.set_model(full, OLLAMA_API_BASE)
+        router.force_lite = True
+        await query.edit_message_text(
+            f"🦙 Switched to *Ollama / {model_name}*\n`{full}`",
+            parse_mode="Markdown"
+        )
+        show_face(mood="happy", text=f"Ollama: {model_name[:20]}")
+        return
+
+    key = data.split(":", 1)[-1]
+
+    # Back to top menu
+    if key == "back":
+        await query.edit_message_text(
+            f"🦄 *Current:* `{router.litellm.model}`\n\nPick a model:",
+            parse_mode="Markdown",
+            reply_markup=_top_model_markup(router.litellm.model)
+        )
+        return
+
+    # Ollama: fetch and show submenu (only tool-capable models)
+    if key == "ollama":
+        await query.edit_message_text("🦙 Fetching models from Ollama server…")
+        models = await asyncio.to_thread(_ollama_list_with_capabilities)
+
+        if not models:
+            await query.edit_message_text(
+                f"❌ Could not reach Ollama at `{OLLAMA_API_BASE}`",
+                parse_mode="Markdown",
+                reply_markup=InlineKeyboardMarkup([[InlineKeyboardButton("◂ Back", callback_data="model:back")]])
+            )
+            return
+
+        tool_models = [m for m in models if m["supports_tools"]]
+        show_models = tool_models if tool_models else models
+        note = "" if tool_models else "\n⚠️ _No tool-capable models found, showing all_"
+
+        rows = []
+        for m in show_models:
+            name = m["name"]
+            cb = f"omd:{name}"
+            if len(cb.encode("utf-8")) > 60:
+                continue  # Telegram callback_data limit (64 bytes)
+            tag = "🔧" if m["supports_tools"] else "🔸"
+            rows.append([InlineKeyboardButton(f"{tag} {name}", callback_data=cb)])
+        rows.append([InlineKeyboardButton("◂ Back", callback_data="model:back")])
+
+        await query.edit_message_text(
+            f"🦙 *Ollama models* ({len(show_models)}){note}\n\n🔧 = supports tools",
+            parse_mode="Markdown",
+            reply_markup=InlineKeyboardMarkup(rows)
+        )
+        return
+
+    # Static presets (gemini, glm)
+    if key not in LLM_PRESETS:
+        await query.edit_message_text("❌ Unknown model.")
+        return
+
+    preset = LLM_PRESETS[key]
+    router.litellm.set_model(preset["model"], preset["api_base"])
+    router.force_lite = True
+
+    emoji = _MODEL_EMOJI.get(key, "🔹")
+    await query.edit_message_text(
+        f"{emoji} Switched to *{key.upper()}*\n`{preset['model']}`",
+        parse_mode="Markdown"
+    )
+    show_face(mood="happy", text=f"Model: {key.upper()}")
+
+
 async def cmd_memory(update: Update, context: ContextTypes.DEFAULT_TYPE):
     """Handle /memory command — show database stats."""
     user = update.effective_user
diff --git a/src/config.py b/src/config.py
index affb81f..fe49b39 100644
--- a/src/config.py
+++ b/src/config.py
@@ -34,6 +34,8 @@ def _env_flag(name: str, default: bool = False) -> bool:
 GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini/gemini-1.5-flash")
 GEMINI_API_BASE = os.environ.get("GEMINI_API_BASE", "")  # Optional override for Z.ai/OpenAI
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
+OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5:14b")
+OLLAMA_API_BASE = os.environ.get("OLLAMA_API_BASE", "http://ollama-server:11434")
 BOT_LANGUAGE = os.environ.get("BOT_LANGUAGE", "en")  # Default response language
 GROUP_CHAT_ID = int(os.environ.get("GROUP_CHAT_ID", "0"))  # Optional group for heartbeat
 ENABLE_LITELLM_TOOLS = _env_flag("ENABLE_LITELLM_TOOLS", True)
@@ -54,6 +56,10 @@ def _env_flag(name: str, default: bool = False) -> bool:
     "glm": {
         "model": "anthropic/glm-5.1",
         "api_base": "https://api.z.ai/api/anthropic"
+    },
+    "ollama": {
+        "model": f"ollama_chat/{OLLAMA_MODEL}",
+        "api_base": OLLAMA_API_BASE
     }
 }
 
diff --git a/src/llm/litellm_connector.py b/src/llm/litellm_connector.py
index e5f8e65..9fc9011 100644
--- a/src/llm/litellm_connector.py
+++ b/src/llm/litellm_connector.py
@@ -1245,25 +1245,63 @@ def _build_tool_footer(actions: list[str]) -> str:
 # CONNECTOR
 # ============================================================
 
+_ACTIVE_MODEL_FILE = None  # Set lazily to avoid circular import
+
+
+def _active_model_path() -> Path:
+    global _ACTIVE_MODEL_FILE
+    if _ACTIVE_MODEL_FILE is None:
+        from config import DATA_DIR
+        _ACTIVE_MODEL_FILE = DATA_DIR / "active_model.json"
+    return _ACTIVE_MODEL_FILE
+
+
+def _load_active_model() -> Optional[dict]:
+    try:
+        p = _active_model_path()
+        if p.exists():
+            return json.loads(p.read_text())
+    except Exception as e:
+        log.warning(f"Could not load active_model.json: {e}")
+    return None
+
+
+def _save_active_model(model: str, api_base: Optional[str]) -> None:
+    try:
+        p = _active_model_path()
+        p.parent.mkdir(parents=True, exist_ok=True)
+        p.write_text(json.dumps({"model": model, "api_base": api_base}, indent=2))
+    except Exception as e:
+        log.warning(f"Could not save active_model.json: {e}")
+
+
 class LiteLLMConnector(LLMConnector):
     """LiteLLM connector with tools."""
-    
+
     name = "litellm"
-    
+
     def __init__(self, model: str = None, api_base: str = None):
         from config import DEFAULT_LITE_PRESET, LLM_PRESETS, GEMINI_API_BASE
         if model is not None:
             self.model = model
             self.api_base = api_base
         else:
-            preset = LLM_PRESETS.get(DEFAULT_LITE_PRESET, LLM_PRESETS["glm"])
-            self.model = preset["model"]
-            self.api_base = preset.get("api_base") or GEMINI_API_BASE or None
+            # Persisted choice from /model (survives restart) takes priority
+            saved = _load_active_model()
+            if saved and saved.get("model"):
+                self.model = saved["model"]
+                self.api_base = saved.get("api_base")
+                log.info(f"[LiteLLM] Restored active model: {self.model}")
+            else:
+                preset = LLM_PRESETS.get(DEFAULT_LITE_PRESET, LLM_PRESETS["glm"])
+                self.model = preset["model"]
+                self.api_base = preset.get("api_base") or GEMINI_API_BASE or None
 
     def set_model(self, model: str, api_base: str = None):
-        """Dynamically switch model and api_base."""
+        """Dynamically switch model and api_base. Persists across restarts."""
         self.model = model
         self.api_base = api_base
+        _save_active_model(model, api_base)
     
     def is_available(self) -> bool:
         return LITELLM_AVAILABLE
diff --git a/src/main.py b/src/main.py
index f8f9bda..17148bc 100644
--- a/src/main.py
+++ b/src/main.py
@@ -23,7 +23,7 @@
 SRC_DIR = Path(__file__).parent.resolve()
 sys.path.insert(0, str(SRC_DIR))
 
-from telegram.ext import Application, CommandHandler, MessageHandler, filters
+from telegram.ext import Application, CommandHandler, MessageHandler, CallbackQueryHandler, filters
 
 from config import BOT_TOKEN, HEARTBEAT_INTERVAL, HEARTBEAT_FIRST_RUN, LEVEL_UP_DISPLAY_DELAY
 from db.memory import init_db
@@ -31,7 +31,7 @@
 from bot.handlers import (
     cmd_start, cmd_clear, cmd_context, cmd_status, cmd_xp, cmd_pro, cmd_use,
     cmd_remember, cmd_recall, cmd_vault, cmd_cron, cmd_jobs, cmd_memory, cmd_health,
-    handle_message, handle_voice
+    cmd_model, cb_model, handle_message, handle_voice
 )
 from bot.heartbeat import send_heartbeat
 from hooks.runner import run_hook, HookEvent, discover_and_load_hooks
@@ -244,7 +244,19 @@ async def post_init(application: Application):
             application.job_queue.run_once(chill_mode, 60)
     
     # Build application
-    app = Application.builder().token(BOT_TOKEN).post_init(post_init).build()
+    # Generous HTTP timeouts — Pi Zero 2W's WiFi can otherwise time out polling
+    # Telegram while a long Ollama reply is streaming, surfacing as
+    # `httpx.ReadError` / `Timed out`.
+    app = (
+        Application.builder()
+        .token(BOT_TOKEN)
+        .post_init(post_init)
+        .read_timeout(60)
+        .write_timeout(60)
+        .connect_timeout(30)
+        .pool_timeout(30)
+        .build()
+    )
     
     # Register handlers
     app.add_handler(CommandHandler("start", cmd_start))
@@ -257,6 +269,8 @@ async def post_init(application: Application):
     app.add_handler(CommandHandler("mode", cmd_pro))
     app.add_handler(CommandHandler("use", cmd_use))
     app.add_handler(CommandHandler("switch", cmd_use))
+    app.add_handler(CommandHandler("model", cmd_model))
+    app.add_handler(CallbackQueryHandler(cb_model, pattern=r"^(model|omd):"))
     app.add_handler(CommandHandler("remember", cmd_remember))
     app.add_handler(CommandHandler("recall", cmd_recall))
     app.add_handler(CommandHandler("vault", cmd_vault))

From 40ae47fa4e97b94c4ffb349c9f3c654aca1762c4 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 00:10:54 +0200
Subject: [PATCH 02/21] docs(changelog): /model picker + live Ollama discovery
 + persistence

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 601e2b0..60bbcbc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,16 @@
 
 All notable changes to the OpenClawGotchi project will be documented in this file.
 
+## [Unreleased] - 2026-05-09
+
+### Added
+- **`/model` Telegram command**: inline-keyboard model picker. Without args it opens buttons for every preset (gemini, glm, ollama). With an argument (`/model glm`) it falls through to the existing `/use` flow. `/use` and `/switch` remain as text aliases.
+- **Live Ollama discovery**: tapping `🦙 ollama ▸` queries the configured Ollama server (`/api/tags` + `/api/show`), filters by `capabilities.tools`, and only lists tool-capable models. Falls back to all installed models with a warning when none advertise tools. Includes `◂ Back` button and a graceful "could not reach server" state. New env vars: `OLLAMA_MODEL` (default `qwen2.5:14b`) and `OLLAMA_API_BASE` (placeholder default `http://ollama-server:11434`).
+- **Persistent model choice**: `/model` and `/use` now write the selection to `data/active_model.json` (gitignored). On startup `LiteLLMConnector` restores it before falling back to `DEFAULT_LITE_PRESET`. Survives `systemctl restart` and reboots.
+
+### Changed
+- **HTTP timeouts** raised via `Application.builder()` (`read=60`, `write=60`, `connect=30`, `pool=30`). Pi Zero 2W's WiFi can otherwise time out polling Telegram while a long Ollama reply is streaming, surfacing as `httpx.ReadError` / `Timed out`.
+
 ## [Unreleased] - 2026-04-29
 
 ### Added

From ff4d67d3c2be66124836f474dcbdc0032d1e6e25 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Fri, 8 May 2026 23:06:03 +0200
Subject: [PATCH 03/21] fix(prompts): respect BOT_LANGUAGE in system prompt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`BOT_LANGUAGE` was defined in `config.py` and exposed via `.env`, but
nothing actually injected it into the system prompt — heartbeat
reflections and the SAY: speech bubble would happily drift into
Japanese/Chinese on Qwen-family models because no language was pinned.

Add `_language_directive()` and append it from `build_system_context()`
so the directive is part of every system prompt path (Telegram replies,
heartbeat reflections, SAY: bubble, autonomous output). Codes are
mapped to readable names for the LLM (`de` → "German (Deutsch)" etc.)
for the common languages; unknown codes pass through verbatim.

Default behaviour stays English (`BOT_LANGUAGE=en`). Users can mirror
another language by writing in it — the directive explicitly allows
that override, but blocks drifting into a third language.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/llm/prompts.py | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/src/llm/prompts.py b/src/llm/prompts.py
index 06d59f7..cc9152c 100644
--- a/src/llm/prompts.py
+++ b/src/llm/prompts.py
@@ -7,7 +7,30 @@
 
 from pathlib import Path
 
-from config import PROJECT_DIR, WORKSPACE_DIR, CUSTOM_FACES_PATH
+from config import PROJECT_DIR, WORKSPACE_DIR, CUSTOM_FACES_PATH, BOT_LANGUAGE
+
+
+_LANG_NAMES = {
+    "de": "German (Deutsch)", "en": "English", "ru": "Russian (Русский)",
+    "es": "Spanish (Español)", "fr": "French (Français)", "it": "Italian (Italiano)",
+    "pt": "Portuguese", "nl": "Dutch", "pl": "Polish", "tr": "Turkish",
+    "ja": "Japanese", "zh": "Chinese", "ko": "Korean",
+}
+
+
+def _language_directive() -> str:
+    """Build a strong language instruction from BOT_LANGUAGE."""
+    code = (BOT_LANGUAGE or "").strip().lower()
+    if not code:
+        return ""
+    name = _LANG_NAMES.get(code, code)
+    return (
+        f"\n---\n## Language\n"
+        f"ALWAYS respond in **{name}** by default — including the SAY: speech bubble "
+        f"and any autonomous output (heartbeat, reflections, status). "
+        f"Only switch language if the user clearly writes to you in another language; "
+        f"in that case, mirror their language. Never use a third language."
+    )
 from hardware.system import get_stats_string
 import json
 
@@ -180,7 +203,12 @@ def build_system_context(user_message: str = "") -> str:
     ALWAYS includes skills (if available).
     """
     parts = [load_bot_instructions()]
-    
+
+    # Pin response language (BOT_LANGUAGE) — applies to heartbeat, SAY: bubble, replies
+    lang = _language_directive()
+    if lang:
+        parts.append(lang)
+
     # Add custom faces list if any
     custom_faces = _load_custom_faces_list()
     if custom_faces:

From f6d1cba9850cd270cdd3281f6142e973814eed55 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 00:11:33 +0200
Subject: [PATCH 04/21] fix(onboarding): auto-complete when IDENTITY.md is
 already populated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`needs_onboarding()` only returned False after the LLM emitted one of
the magic completion phrases ("onboarding complete", "saved to
identity.md", …) inside `check_onboarding_complete`. Several models
(notably the Ollama Qwen family) write IDENTITY.md correctly via the
`write_file` tool but never produce that exact phrase, so BOOTSTRAP.md
stays on disk and the bot retriggers onboarding on every restart.

Add a mtime-based safety net: if `IDENTITY.md` is newer than
`BOOTSTRAP.md`, the LLM has demonstrably already captured the
identity — delete BOOTSTRAP.md and treat onboarding as complete. The
existing magic-phrase path stays intact for cases where the LLM does
say it.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/bot/onboarding.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/bot/onboarding.py b/src/bot/onboarding.py
index 4a5ea9c..79285c4 100644
--- a/src/bot/onboarding.py
+++ b/src/bot/onboarding.py
@@ -17,16 +17,30 @@ def needs_onboarding() -> bool:
     """
     Check if bot needs onboarding (BOOTSTRAP.md exists).
     Also checks templates/ as fallback if workspace is missing.
+    Auto-completes if IDENTITY.md was already filled in by the LLM
+    (i.e. modified after BOOTSTRAP.md was created) — some models update
+    IDENTITY.md correctly without ever emitting the "onboarding complete"
+    magic phrase, leaving BOOTSTRAP.md stale forever.
     """
     if BOOTSTRAP_FILE.exists():
+        identity_file = WORKSPACE_DIR / "IDENTITY.md"
+        try:
+            if identity_file.exists() and (
+                identity_file.stat().st_mtime > BOOTSTRAP_FILE.stat().st_mtime
+            ):
+                log.info("Auto-completing onboarding: IDENTITY.md newer than BOOTSTRAP.md")
+                BOOTSTRAP_FILE.unlink()
+                return False
+        except Exception as e:
+            log.warning(f"Onboarding auto-complete check failed: {e}")
         return True
-    
+
     # Fallback: check templates (workspace might not be created yet)
     templates_bootstrap = PROJECT_DIR / "templates" / "BOOTSTRAP.md"
     if templates_bootstrap.exists() and not WORKSPACE_DIR.exists():
         log.warning("Workspace not initialized — onboarding should trigger after first message")
         return True
-    
+
     return False
 
 

From 0f0a8c1135f57f5f8f04314ea7bd291424bca27f Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 00:13:30 +0200
Subject: [PATCH 05/21] fix(display): respect BOT_LANGUAGE in error_screen SAY:
 text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The error screen had hardcoded Japanese SAY: strings (e.g.
`システムエラー発生`, `接続タイムアウト`). For owners who don't read
Japanese this just renders as garbled CJK glyphs on the E-Ink panel
and obscures what actually went wrong.

Make the SAY: bubble localizable: a per-language dictionary keyed by
the existing `BOT_LANGUAGE` env var, covering the same five error
categories (default, ratelimit, timeout, auth, syntax, llm). Ships
with `ja`, `en`, `de`, `ru`, `es`, `fr`. Unknown / missing codes fall
back to English.

Behavior preservation: when `BOT_LANGUAGE` is unset, the language
falls back to **Japanese** so existing deployments keep the project's
original cyberpunk aesthetic by default. New users who set
`BOT_LANGUAGE=en` (or any other supported code) get readable text.

The mood / face mapping is unchanged. The English `short_error` codes
in the STATUS: tail are also kept English on purpose — they read like
status codes (`Rate Limited`, `Bad Syntax`).

Also tighten the network branch to also catch the literal `"timed out"`
form (common in `socket.timeout` strings) so transient HTTP errors
classify as `timeout` rather than the default.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/hardware/display.py | 114 ++++++++++++++++++++++++++++++++--------
 1 file changed, 91 insertions(+), 23 deletions(-)

diff --git a/src/hardware/display.py b/src/hardware/display.py
index 4929431..30dfb74 100644
--- a/src/hardware/display.py
+++ b/src/hardware/display.py
@@ -11,7 +11,7 @@
 import threading
 import time
 
-from config import UI_SCRIPT, PROJECT_DIR
+from config import UI_SCRIPT, PROJECT_DIR, BOT_LANGUAGE
 
 log = logging.getLogger(__name__)
 
@@ -166,56 +166,124 @@ def online_screen():
     """Show online screen."""
     update_display(mood="happy", text="Online", full_refresh=True)
 
+# Localized SAY: bubble strings for the error screen, keyed by BOT_LANGUAGE.
+# Japanese is preserved as the project's original cyberpunk default. Other
+# languages mirror the same five error categories. Unknown codes fall back
+# to English.
+_ERROR_SAY_BY_LANG = {
+    "ja": {
+        "default":   "システムエラー発生",  # System Error Occurred
+        "ratelimit": "レート制限超過!",      # Rate Limit Exceeded
+        "timeout":   "接続タイムアウト",     # Connection Timeout
+        "auth":      "アクセス拒否!",        # Access Denied
+        "syntax":    "構文エラー発生",       # Syntax Error
+        "llm":       "処理不能エラー",       # Processing Failed
+    },
+    "en": {
+        "default":   "System error!",
+        "ratelimit": "Too many requests!",
+        "timeout":   "Network timeout",
+        "auth":      "No access!",
+        "syntax":    "Syntax broken",
+        "llm":       "Brain frozen",
+    },
+    "de": {
+        "default":   "Systemfehler!",
+        "ratelimit": "Zu viele Anfragen!",
+        "timeout":   "Netzwerk-Timeout",
+        "auth":      "Kein Zugriff!",
+        "syntax":    "Syntax kaputt",
+        "llm":       "Hirn eingefroren",
+    },
+    "ru": {
+        "default":   "Системная ошибка!",
+        "ratelimit": "Слишком много запросов!",
+        "timeout":   "Тайм-аут сети",
+        "auth":      "Нет доступа!",
+        "syntax":    "Синтаксис сломан",
+        "llm":       "Мозг завис",
+    },
+    "es": {
+        "default":   "Error del sistema!",
+        "ratelimit": "Demasiadas solicitudes!",
+        "timeout":   "Tiempo agotado",
+        "auth":      "Sin acceso!",
+        "syntax":    "Sintaxis rota",
+        "llm":       "Cerebro congelado",
+    },
+    "fr": {
+        "default":   "Erreur système!",
+        "ratelimit": "Trop de requêtes!",
+        "timeout":   "Délai dépassé",
+        "auth":      "Pas d'accès!",
+        "syntax":    "Syntaxe cassée",
+        "llm":       "Cerveau gelé",
+    },
+}
+
+# Default if BOT_LANGUAGE is unset: keep the project's original Japanese
+# aesthetic so existing deployments don't change behaviour silently.
+_ERROR_LANG_FALLBACK_WHEN_UNSET = "ja"
+
+
+def _error_say(category: str) -> str:
+    """Pick the SAY: bubble string for an error category in the configured language."""
+    code = (BOT_LANGUAGE or "").strip().lower()
+    if not code:
+        code = _ERROR_LANG_FALLBACK_WHEN_UNSET
+    table = _ERROR_SAY_BY_LANG.get(code) or _ERROR_SAY_BY_LANG["en"]
+    return table.get(category) or _ERROR_SAY_BY_LANG["en"][category]
+
+
 def error_screen(error_msg: str):
-    """Show error screen with context-aware face and Japanese text."""
+    """Show error screen with context-aware face. SAY: text honours BOT_LANGUAGE."""
     err_lower = error_msg.lower()
-    
+
     # Default
     mood = "dead"
     short_error = "Error"
-    jp_msg = "システムエラー発生" # System Error Occurred
-    
+    say_msg = _error_say("default")
+
     # 1. Rate Limit / Quota
-    if "ratelimit" in err_lower or "quota" in err_lower:
+    if "ratelimit" in err_lower or "rate limit" in err_lower or "quota" in err_lower:
         mood = "dizzy"
-        short_error = "Rate Limited" if "ratelimit" in err_lower else "Quota Full"
-        jp_msg = "レート制限超過!" # Rate Limit Exceeded
-        
+        short_error = "Rate Limited" if "rate" in err_lower else "Quota Full"
+        say_msg = _error_say("ratelimit")
+
     # 2. Network / Timeout
-    elif "timeout" in err_lower or "connect" in err_lower:
+    elif "timeout" in err_lower or "timed out" in err_lower or "connect" in err_lower:
         mood = "bored"
         short_error = "Timeout"
-        jp_msg = "接続タイムアウト" # Connection Timeout
-        
+        say_msg = _error_say("timeout")
+
     # 3. Auth / Permission
     elif "auth" in err_lower or "permission" in err_lower or "denied" in err_lower:
         mood = "suspicious"
         short_error = "Access Denied"
-        jp_msg = "アクセス拒否!" # Access Denied
-        
+        say_msg = _error_say("auth")
+
     # 4. Parsing / Logic
     elif "parse" in err_lower or "syntax" in err_lower or "value" in err_lower:
         mood = "confused"
         short_error = "Bad Syntax"
-        jp_msg = "構文エラー発生" # Syntax Error
-        
+        say_msg = _error_say("syntax")
+
     # 5. Generic LLM Error
     elif "llm" in err_lower:
-        mood = "dizzy" 
+        mood = "dizzy"
         short_error = "Brain Freeze"
-        jp_msg = "処理不能エラー" # Processing Failed
+        say_msg = _error_say("llm")
 
     # Fallback: try to extract short code
     if short_error == "Error":
         short_error = error_msg.split(':')[0] if ':' in error_msg else error_msg[:15]
-        
+
     # Extract numeric code (e.g. 429)
     code_prefix = ""
     code_match = re.search(r'"code":\s*(\d+)', error_msg)
     if not code_match:
         code_match = re.search(r'status code:?\s*(\d+)', error_msg, re.IGNORECASE)
-    
     if code_match:
-         code_prefix = f"[{code_match.group(1)}] "
-        
-    update_display(mood=mood, text=f"SAY: {code_prefix}{jp_msg} | STATUS: ERR: {short_error}", full_refresh=True)
+        code_prefix = f"[{code_match.group(1)}] "
+
+    update_display(mood=mood, text=f"SAY: {code_prefix}{say_msg} | STATUS: ERR: {short_error}", full_refresh=True)

From 95d1f0c7bb8df001556979592214c88a4bdb333b Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 00:14:06 +0200
Subject: [PATCH 06/21] docs(changelog): respect BOT_LANGUAGE end-to-end
 (prompt + display + onboarding)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 601e2b0..24ab28e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,13 @@
 
 All notable changes to the OpenClawGotchi project will be documented in this file.
 
+## [Unreleased] - 2026-05-09
+
+### Fixed
+- **`BOT_LANGUAGE` was dead code in the system prompt**: defined in `config.py` and exposed via `.env`, but never injected anywhere — heartbeat reflections and the SAY: speech bubble would happily drift into Japanese/Chinese on Qwen-family models because no language was pinned. New `_language_directive()` in `llm/prompts.py` is part of `build_system_context()` and applies to every system prompt path (replies, heartbeat, SAY:). Codes mapped to readable names (`de` → "German (Deutsch)" etc.) for common languages; unknown codes pass through verbatim.
+- **`error_screen()` SAY: text now respects `BOT_LANGUAGE`**: previously hardcoded Japanese (`システムエラー発生` etc.), which renders as garbled glyphs for owners who don't read it. Localized into `ja` / `en` / `de` / `ru` / `es` / `fr`. Default (when `BOT_LANGUAGE` is unset) stays Japanese to preserve the original cyberpunk aesthetic; unknown codes fall back to English.
+- **Onboarding loop never exited**: `BOOTSTRAP.md` was only deleted when the LLM emitted a magic completion phrase ("onboarding complete", "saved to identity.md", …). Models that update `IDENTITY.md` correctly without that phrase left the bootstrap stale forever and re-triggered onboarding on every restart. `needs_onboarding()` now auto-completes when `IDENTITY.md` mtime > `BOOTSTRAP.md` mtime.
+
 ## [Unreleased] - 2026-04-29
 
 ### Added

From c965f228750a6012121eb9995061fa952767a2be Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 00:17:38 +0200
Subject: [PATCH 07/21] feat(update): /update command + auto_update.sh with
 backup & rollback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a self-update path so the owner doesn't have to SSH in to pull
new code from upstream and restart the service.

`scripts/auto_update.sh` is the engine:
- Fetches the configured remote/branch, fast-forwards if there are
  new commits, refreshes venv deps when `requirements.txt` changed,
  and restarts the systemd service. Idempotent (no-op when up-to-date)
  and supports `--check` for dry-run. Configurable via env vars
  `OCG_UPDATE_REMOTE`, `OCG_UPDATE_BRANCH`, `OCG_SERVICE`.
- Pre-update tarball backup: `gotchi.db` + `data/` + `.env` go to
  `backups/pre-update-<timestamp>-<sha>.tar.gz`. Rolling retention
  keeps the newest 3 (`OCG_BACKUP_KEEP`, skip with `OCG_NO_BACKUP=1`).
  `backups/` is gitignored.
- Auto-rollback: if the service fails to come back up after the new
  code is pulled, the script does `git reset --hard` to the previous
  HEAD, reinstalls deps if needed, restarts, and exits with code 4
  to flag manual review. Disable with `OCG_NO_ROLLBACK=1`.
- Pre-flight check only blocks on dirty TRACKED files; untracked
  local-only files (drivers, ad-hoc scripts) no longer abort the run.

`/update` Telegram command:
- Owner-only wrapper around the script, so updates can be triggered
  from chat. `/update check` reports whether new commits exist
  without applying them. Reports rollback (exit 4) distinctly so
  the owner sees the upgrade was reverted.

`setup.sh`:
- Adds `/etc/sudoers.d/gotchi-update` so the bot user can
  `systemctl restart gotchi-bot.service` without a password —
  needed by `/update` and the unattended cron path.

For unattended auto-update users can wire the script into cron, e.g.
`0 4 * * 0 /bin/bash /path/to/openclawgotchi/scripts/auto_update.sh
   >> /path/to/openclawgotchi/logs/update.log 2>&1`.

User state (`.env`, `data/*.json`, `.workspace/`) is gitignored, so
`git pull` itself never touches it. The tarball is a second line of
defence against schema migrations that could corrupt the DB.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .gitignore             |   1 +
 scripts/auto_update.sh | 160 +++++++++++++++++++++++++++++++++++++++++
 setup.sh               |   6 ++
 src/bot/handlers.py    |  62 +++++++++++++++-
 src/main.py            |   3 +-
 5 files changed, 230 insertions(+), 2 deletions(-)
 create mode 100755 scripts/auto_update.sh

diff --git a/.gitignore b/.gitignore
index 31d0648..a718a6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,6 +102,7 @@ coverage.xml
 # ===================
 rate_limits.json
 data/cron_jobs.json
+backups/
 # data/custom_faces.json
 
 # ===================
diff --git a/scripts/auto_update.sh b/scripts/auto_update.sh
new file mode 100755
index 0000000..7bb461a
--- /dev/null
+++ b/scripts/auto_update.sh
@@ -0,0 +1,160 @@
+#!/usr/bin/env bash
+#
+# OpenClawGotchi auto-update
+#
+# Fetches the configured upstream branch, fast-forwards if there are new
+# commits, refreshes the venv's Python deps, restarts the systemd service,
+# and rolls back automatically if the service fails to come back up.
+#
+# Idempotent and safe to run repeatedly (no-op when up-to-date).
+#
+# User state (.env, data/, .workspace/) is in .gitignore and never touched
+# by `git pull`. As an extra safety net, gotchi.db + data/ + .env are
+# tarballed to backups/ before each update; the last 3 are kept.
+#
+# Usage:
+#   bash scripts/auto_update.sh           # update from origin/main
+#   bash scripts/auto_update.sh --check   # exit 0 if updates available, 1 if not
+#
+# Env overrides:
+#   OCG_UPDATE_REMOTE  (default: origin)
+#   OCG_UPDATE_BRANCH  (default: main)
+#   OCG_SERVICE        (default: gotchi-bot.service)
+#   OCG_BACKUP_KEEP    (default: 3)  — number of backups to retain
+#   OCG_NO_BACKUP=1                   — skip the pre-update backup
+#   OCG_NO_ROLLBACK=1                 — skip auto-rollback on service failure
+#
+# Cron suggestion (weekly, Sunday 04:00):
+#   0 4 * * 0  /bin/bash /full/path/openclawgotchi/scripts/auto_update.sh \
+#              >> /full/path/openclawgotchi/logs/update.log 2>&1
+#
+# Exit codes:
+#   0  success (updated or already up-to-date)
+#   1  --check mode and no updates available
+#   2  uncommitted changes block the update
+#   3  service failed to start AND rollback was skipped/failed
+#   4  service failed to start, rollback succeeded — manual review wanted
+#
+set -e
+
+PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "${PROJECT_DIR}"
+
+REMOTE="${OCG_UPDATE_REMOTE:-origin}"
+BRANCH="${OCG_UPDATE_BRANCH:-main}"
+SERVICE="${OCG_SERVICE:-gotchi-bot.service}"
+VENV_PIP="${PROJECT_DIR}/venv/bin/pip"
+BACKUP_DIR="${PROJECT_DIR}/backups"
+BACKUP_KEEP="${OCG_BACKUP_KEEP:-3}"
+
+log() { echo "[$(date -Iseconds)] $*"; }
+
+# --- Pre-flight: refuse if tracked files are modified (untracked is fine) ---
+DIRTY_TRACKED="$(git status --porcelain | grep -v '^??' || true)"
+if [ -n "${DIRTY_TRACKED}" ]; then
+    log "ERROR: tracked files have uncommitted changes. Commit/stash first."
+    echo "${DIRTY_TRACKED}"
+    exit 2
+fi
+
+log "Fetching ${REMOTE}/${BRANCH}…"
+git fetch --quiet "${REMOTE}" "${BRANCH}"
+
+LOCAL_HEAD="$(git rev-parse HEAD)"
+REMOTE_HEAD="$(git rev-parse "${REMOTE}/${BRANCH}")"
+AHEAD_BY="$(git rev-list --count "HEAD..${REMOTE}/${BRANCH}")"
+
+if [ "${AHEAD_BY}" = "0" ]; then
+    log "Up-to-date with ${REMOTE}/${BRANCH} (no new commits behind)."
+    [ "${1:-}" = "--check" ] && exit 1 || exit 0
+fi
+
+log "${AHEAD_BY} new commit(s) on ${REMOTE}/${BRANCH}:"
+git --no-pager log --oneline "HEAD..${REMOTE}/${BRANCH}" | head -20
+
+if [ "${1:-}" = "--check" ]; then
+    exit 0
+fi
+
+# --- Backup user state (DB + small JSON state + .env) before pulling ---
+BACKUP_FILE=""
+if [ "${OCG_NO_BACKUP:-0}" != "1" ]; then
+    mkdir -p "${BACKUP_DIR}"
+    TS="$(date +%Y%m%d-%H%M%S)"
+    BACKUP_FILE="${BACKUP_DIR}/pre-update-${TS}-${LOCAL_HEAD:0:8}.tar.gz"
+    # Build list of things to back up that actually exist (no errors on first runs).
+    BACKUP_PATHS=()
+    [ -f gotchi.db ]          && BACKUP_PATHS+=(gotchi.db)
+    [ -f .env ]               && BACKUP_PATHS+=(.env)
+    [ -d data ]               && BACKUP_PATHS+=(data)
+    if [ "${#BACKUP_PATHS[@]}" -gt 0 ]; then
+        log "Backing up user state to $(basename "${BACKUP_FILE}")…"
+        tar -czf "${BACKUP_FILE}" "${BACKUP_PATHS[@]}" 2>/dev/null
+        # Rolling retention — keep newest N
+        ls -1t "${BACKUP_DIR}"/pre-update-*.tar.gz 2>/dev/null \
+            | tail -n +"$((BACKUP_KEEP + 1))" \
+            | xargs -r rm -f
+    else
+        log "No user state to back up yet (skipping)."
+        BACKUP_FILE=""
+    fi
+fi
+
+# --- Track previous HEAD so we can roll back if the service fails ---
+PREVIOUS_HEAD="${LOCAL_HEAD}"
+REQS_CHANGED="$(git diff --name-only "HEAD..${REMOTE}/${BRANCH}" -- requirements.txt | head -1)"
+
+log "Pulling ${REMOTE}/${BRANCH} (fast-forward only)…"
+git pull --ff-only --quiet "${REMOTE}" "${BRANCH}"
+
+if [ -n "${REQS_CHANGED}" ] && [ -x "${VENV_PIP}" ]; then
+    log "requirements.txt changed — refreshing venv dependencies…"
+    "${VENV_PIP}" install --quiet --upgrade -r requirements.txt
+fi
+
+# --- Restart and verify ---
+restart_service() {
+    sudo systemctl restart "${SERVICE}"
+    sleep 4
+    systemctl is-active --quiet "${SERVICE}"
+}
+
+if ! command -v systemctl >/dev/null 2>&1; then
+    log "systemctl not available, skipping service restart. Now at $(git rev-parse --short HEAD)."
+    exit 0
+fi
+
+log "Restarting ${SERVICE}…"
+if restart_service; then
+    log "OK — ${SERVICE} is active. Now at $(git rev-parse --short HEAD)."
+    [ -n "${BACKUP_FILE}" ] && log "Pre-update backup: $(basename "${BACKUP_FILE}")"
+    exit 0
+fi
+
+# --- Auto-rollback path ---
+log "ERROR — ${SERVICE} failed to come back up after update."
+journalctl -u "${SERVICE}" -n 20 --no-pager 2>&1 | sed 's/^/  | /' || true
+
+if [ "${OCG_NO_ROLLBACK:-0}" = "1" ]; then
+    log "OCG_NO_ROLLBACK=1 — skipping rollback. Manual intervention needed."
+    exit 3
+fi
+
+log "Rolling back to ${PREVIOUS_HEAD:0:8}…"
+git reset --hard --quiet "${PREVIOUS_HEAD}"
+
+if [ -n "${REQS_CHANGED}" ] && [ -x "${VENV_PIP}" ]; then
+    log "Reinstalling old requirements.txt…"
+    "${VENV_PIP}" install --quiet --upgrade -r requirements.txt
+fi
+
+if restart_service; then
+    log "Rollback succeeded — ${SERVICE} active at ${PREVIOUS_HEAD:0:8}."
+    log "The new version did not boot. Inspect with: journalctl -u ${SERVICE} -e"
+    exit 4
+else
+    log "FATAL — rollback also failed to start the service. Manual intervention required."
+    log "Last 20 service log lines:"
+    journalctl -u "${SERVICE}" -n 20 --no-pager 2>&1 | sed 's/^/  | /' || true
+    exit 3
+fi
diff --git a/setup.sh b/setup.sh
index 55e3a4c..4d28df9 100755
--- a/setup.sh
+++ b/setup.sh
@@ -192,6 +192,12 @@ echo "${USER} ALL=(ALL) NOPASSWD: ${PYTHON_VENV_PATH} ${UI_SCRIPT_PATH}" | sudo
 sudo chmod 0440 "$SUDOERS_FILE"
 echo "  ✅ Display permissions configured (passwordless sudo)"
 
+# Allow the bot user to restart its own service (used by /update + auto_update.sh)
+UPDATE_SUDOERS_FILE="/etc/sudoers.d/gotchi-update"
+echo "${USER} ALL=(ALL) NOPASSWD: /bin/systemctl restart gotchi-bot.service, /usr/bin/systemctl restart gotchi-bot.service" | sudo tee "$UPDATE_SUDOERS_FILE" > /dev/null
+sudo chmod 0440 "$UPDATE_SUDOERS_FILE"
+echo "  ✅ /update permissions configured (passwordless service restart)"
+
 # ============================================
 # OPTIONAL: HARDENING (recommended for Pi Zero)
 # ============================================
diff --git a/src/bot/handlers.py b/src/bot/handlers.py
index c733a48..1b4317c 100644
--- a/src/bot/handlers.py
+++ b/src/bot/handlers.py
@@ -886,11 +886,71 @@ async def cmd_use(update: Update, context: ContextTypes.DEFAULT_TYPE):
     if "gemini" in model_key: emoji = "♊️"
     
     await update.message.reply_text(f"{emoji} Switched to *{model_key.upper()}*!\nModel: {preset['model']}", parse_mode="Markdown")
-    
+
     # Visual update
     show_face(mood="happy", text=f"Model: {model_key.upper()}")
 
 
+async def cmd_update(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Pull latest code from upstream, refresh deps, restart service."""
+    import asyncio
+    import subprocess
+    from config import PROJECT_DIR, get_admin_id
+
+    user_id = update.effective_user.id
+    chat_id = update.effective_chat.id
+    if not is_allowed(user_id, chat_id):
+        return
+
+    # Owner-only — don't let any allowed user remote-update the bot
+    admin_id = get_admin_id()
+    if admin_id and user_id != admin_id:
+        await update.message.reply_text("⛔ Owner-only command.")
+        return
+
+    script = PROJECT_DIR / "scripts" / "auto_update.sh"
+    if not script.exists():
+        await update.message.reply_text(f"❌ Update script not found: `{script}`", parse_mode="Markdown")
+        return
+
+    check_only = bool(context.args and context.args[0].lower() in ("check", "--check"))
+
+    msg = await update.message.reply_text("🔍 Checking for updates…" if check_only else "⬇️ Updating…")
+
+    try:
+        cmd = ["bash", str(script)] + (["--check"] if check_only else [])
+        proc = await asyncio.to_thread(
+            subprocess.run, cmd,
+            cwd=str(PROJECT_DIR),
+            capture_output=True, text=True, timeout=300
+        )
+        out = (proc.stdout or "") + (proc.stderr or "")
+        out = out.strip()[-3500:]  # Telegram message size budget
+
+        # check-mode: exit 0 = updates available, 1 = up-to-date
+        if check_only:
+            status = "🆕 Updates available" if proc.returncode == 0 else "✅ Up-to-date"
+            await msg.edit_text(f"{status}\n\n```\n{out}\n```", parse_mode="Markdown")
+            return
+
+        if proc.returncode == 0:
+            await msg.edit_text(f"✅ Update complete\n\n```\n{out}\n```", parse_mode="Markdown")
+            show_face(mood="excited", text="Updated!")
+        elif proc.returncode == 4:
+            await msg.edit_text(
+                f"⚠️ Update failed — auto-rolled back to previous version\n\n```\n{out}\n```",
+                parse_mode="Markdown"
+            )
+            show_face(mood="confused", text="Update rolled back")
+        else:
+            await msg.edit_text(f"❌ Update failed (exit {proc.returncode})\n\n```\n{out}\n```", parse_mode="Markdown")
+            show_face(mood="confused", text="Update failed")
+    except subprocess.TimeoutExpired:
+        await msg.edit_text("❌ Update timed out after 5 min.")
+    except Exception as e:
+        await msg.edit_text(f"❌ Update error: `{e}`", parse_mode="Markdown")
+
+
 async def cmd_memory(update: Update, context: ContextTypes.DEFAULT_TYPE):
     """Handle /memory command — show database stats."""
     user = update.effective_user
diff --git a/src/main.py b/src/main.py
index f8f9bda..b9fb9aa 100644
--- a/src/main.py
+++ b/src/main.py
@@ -31,7 +31,7 @@
 from bot.handlers import (
     cmd_start, cmd_clear, cmd_context, cmd_status, cmd_xp, cmd_pro, cmd_use,
     cmd_remember, cmd_recall, cmd_vault, cmd_cron, cmd_jobs, cmd_memory, cmd_health,
-    handle_message, handle_voice
+    cmd_update, handle_message, handle_voice
 )
 from bot.heartbeat import send_heartbeat
 from hooks.runner import run_hook, HookEvent, discover_and_load_hooks
@@ -265,6 +265,7 @@ async def post_init(application: Application):
 
     app.add_handler(CommandHandler("memory", cmd_memory))
     app.add_handler(CommandHandler("health", cmd_health))
+    app.add_handler(CommandHandler("update", cmd_update))
     app.add_handler(MessageHandler(filters.TEXT & (~filters.COMMAND), handle_message))
     app.add_handler(MessageHandler(filters.VOICE, handle_voice))
 

From 1d4b5297ec1806e1c1c6b86e95446b6ee652649d Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 00:18:05 +0200
Subject: [PATCH 08/21] docs(changelog): /update command and auto_update.sh

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 601e2b0..54059ad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,13 @@
 
 All notable changes to the OpenClawGotchi project will be documented in this file.
 
+## [Unreleased] - 2026-05-09
+
+### Added
+- **`/update` Telegram command + `scripts/auto_update.sh`**: owner-only command that fetches `origin/main`, fast-forwards if there are new commits, refreshes venv deps when `requirements.txt` changed, and restarts the systemd service. Supports `/update check` for dry-run. Cron-friendly so the bot can also auto-update unattended.
+- **Update safety net**: before pulling, the script tarballs `gotchi.db` + `data/` + `.env` to `backups/pre-update-<timestamp>-<sha>.tar.gz` (rolling, keeps last 3 — see `OCG_BACKUP_KEEP`). If the service fails to come back up after the new code is in place, the script auto-rolls-back to the previous commit, reinstalls deps if needed, restarts, and exits with code 4 to flag the failed upgrade. Disable with `OCG_NO_BACKUP=1` / `OCG_NO_ROLLBACK=1`.
+- **`gotchi-update` sudoers entry** in `setup.sh`: lets the bot user `systemctl restart gotchi-bot.service` without a password — needed by `/update` and the unattended cron path.
+
 ## [Unreleased] - 2026-04-29
 
 ### Added

From ed08ac5a5a3d378a463401efbdf59ee0edf7ae08 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 00:41:41 +0200
Subject: [PATCH 09/21] feat(battery): UPS HAT (C) monitoring + /battery
 command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds optional battery monitoring for the popular Waveshare UPS HAT (C)
that turns the Pi Zero 2W into a portable / battery-backed device.
Many users run openclawgotchi on this HAT, so first-class support is
worthwhile.

- `src/hardware/battery.py`: single-shot reader for the on-board INA219
  over I2C. Returns voltage, current, power and a 0–100 percentage based
  on the 2× 18650 voltage curve (6.0 V empty → 8.4 V full). Auto-detects
  presence; if I2C is disabled or the HAT is absent, every public
  function returns `None` instead of raising — callers can use
  `is_available()` to gate UI. I2C bus / address overridable via env
  (`OCG_UPS_BUS`, `OCG_UPS_ADDR`) for non-default hardware.
- `/battery` Telegram command in `handlers.py`: shows the live reading
  (`🔋 87 % — 8.12 V, +120 mA (charging, 974 mW)`) or a friendly
  "no UPS HAT detected" hint with `i2cdetect` instructions.
- `hardware/system.get_stats_string()` adds a `[BATTERY] …` line when
  present, so heartbeat reflections and the bot's self-awareness pick
  up battery state automatically.
- `smbus2>=0.4.0` added to `requirements.txt`. Pure Python, ~30 KB.
  Removing the line disables battery support entirely (battery.py
  swallows the ImportError).

Setup notes for users: enable I2C on the host (DietPi/Raspberry Pi OS),
add the bot user to the `i2c` group (already part of `setup.sh`'s
`usermod -aG gpio,spi,i2c …`), reboot. `i2cdetect -y 1` should show
0x43 once the HAT is connected.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .gitignore              |   1 +
 CHANGELOG.md            |   8 ++
 requirements.txt        |   5 ++
 src/bot/handlers.py     |  20 +++++
 src/hardware/battery.py | 172 ++++++++++++++++++++++++++++++++++++++++
 src/hardware/system.py  |  19 ++++-
 src/main.py             |   3 +-
 7 files changed, 225 insertions(+), 3 deletions(-)
 create mode 100644 src/hardware/battery.py

diff --git a/.gitignore b/.gitignore
index 31d0648..9e0e335 100644
--- a/.gitignore
+++ b/.gitignore
@@ -117,3 +117,4 @@ lore/
 
 # Personal commands
 .claude/commands/
+data/active_model.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 601e2b0..6cc7f4b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,14 @@
 
 All notable changes to the OpenClawGotchi project will be documented in this file.
 
+## [Unreleased] - 2026-05-09
+
+### Added
+- **UPS HAT (C) battery monitoring** (Waveshare): new `hardware/battery.py` reads bus voltage, current and power from the on-board INA219 over I2C and reports a 0–100 % estimate based on the 2× 18650 voltage curve (6.0 V empty → 8.4 V full). Auto-detects the sensor and gracefully degrades when I2C is disabled or the HAT is absent — every public function returns `None` rather than raising.
+- **`/battery` Telegram command**: shows the current reading (`🔋 87 % — 8.12 V, +120 mA (charging, 974 mW)`) or a friendly "no UPS HAT detected" hint with `i2cdetect` instructions.
+- **System status line includes battery** (when present): `get_stats_string()` adds a `[BATTERY] …` line, so heartbeat reflections and the bot's self-awareness pick up battery state automatically.
+- **Optional dep `smbus2`** added to `requirements.txt` (pure-Python, ~30 KB). Drop the line to disable battery support entirely.
+
 ## [Unreleased] - 2026-04-29
 
 ### Added
diff --git a/requirements.txt b/requirements.txt
index f15a2df..7fe6429 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,3 +14,8 @@ Pillow>=9.0.0
 # Note: Only needed on actual Pi hardware
 # RPi.GPIO
 # spidev
+
+# Optional: UPS HAT (C) battery monitoring via INA219 over I2C.
+# `battery.py` gracefully degrades if smbus2 is missing or I2C is disabled,
+# so removing this line just disables the /battery command.
+smbus2>=0.4.0
diff --git a/src/bot/handlers.py b/src/bot/handlers.py
index c733a48..6ec0fd3 100644
--- a/src/bot/handlers.py
+++ b/src/bot/handlers.py
@@ -891,6 +891,26 @@ async def cmd_use(update: Update, context: ContextTypes.DEFAULT_TYPE):
     show_face(mood="happy", text=f"Model: {model_key.upper()}")
 
 
+async def cmd_battery(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Handle /battery command — show UPS HAT (C) status."""
+    if not is_allowed(update.effective_user.id, update.effective_chat.id):
+        return
+
+    from hardware import battery
+
+    reading = battery.read()
+    if reading is None:
+        await update.message.reply_text(
+            "🔌 No UPS HAT detected.\n"
+            "Make sure I2C is enabled and the UPS HAT (C) is connected, "
+            "then `/battery` again. (Check `i2cdetect -y 1` should list 0x43.)",
+            parse_mode="Markdown",
+        )
+        return
+
+    await update.message.reply_text(reading.long())
+
+
 async def cmd_memory(update: Update, context: ContextTypes.DEFAULT_TYPE):
     """Handle /memory command — show database stats."""
     user = update.effective_user
diff --git a/src/hardware/battery.py b/src/hardware/battery.py
new file mode 100644
index 0000000..ccb2708
--- /dev/null
+++ b/src/hardware/battery.py
@@ -0,0 +1,172 @@
+"""
+UPS HAT (C) battery reader — INA219 over I2C.
+
+Optional hardware addon: https://www.waveshare.com/wiki/UPS_HAT_(C)
+
+Returns voltage, current, charge state and a 0-100 percentage based on the
+2x 18650 pack (3.0–4.2 V per cell, 6.0–8.4 V total). Auto-detects the
+sensor; if absent or I2C disabled, every public function returns None
+without raising — callers can use `is_available()` to gate UI.
+
+Adapted from Waveshare's INA219.py demo, simplified to a single-shot
+reader (the bot polls infrequently — no need for shared state).
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import Optional
+
+log = logging.getLogger(__name__)
+
+# Default I2C address of the INA219 on the UPS HAT (C).
+# Override with env OCG_UPS_ADDR (hex) and OCG_UPS_BUS (int) if needed.
+import os
+
+UPS_I2C_ADDR = int(os.environ.get("OCG_UPS_ADDR", "0x43"), 0)
+UPS_I2C_BUS = int(os.environ.get("OCG_UPS_BUS", "1"))
+
+# INA219 register addresses
+_REG_CONFIG       = 0x00
+_REG_SHUNTVOLTAGE = 0x01
+_REG_BUSVOLTAGE   = 0x02
+_REG_POWER        = 0x03
+_REG_CURRENT      = 0x04
+_REG_CALIBRATION  = 0x05
+
+# Configuration: 16V FSR, 32V gain disabled, 12-bit ADC, continuous
+# bus + shunt mode. Calibration assumes 0.1Ω shunt → 0.1 mA per LSB.
+_CONFIG_VAL      = 0x199F
+_CALIBRATION_VAL = 4096
+_CURRENT_LSB_MA  = 0.1   # mA per current register LSB
+_POWER_LSB_MW    = 2.0   # mW per power register LSB
+_BUS_VOLTAGE_LSB = 0.004  # 4 mV per bus voltage register LSB (after >> 3)
+
+
+@dataclass
+class BatteryReading:
+    voltage_v: float       # bus voltage at battery terminals (V)
+    current_ma: float      # current into battery (positive = charging)
+    power_mw: float        # power on the bus (mW)
+    percentage: int        # 0-100 estimate based on voltage curve
+    charging: bool         # True when current is flowing in (positive)
+    raw: dict              # raw register values for debugging
+
+    def emoji(self) -> str:
+        if self.charging:
+            return "🔌"
+        if self.percentage >= 80:
+            return "🔋"
+        if self.percentage >= 40:
+            return "🪫"
+        return "❗🪫"
+
+    def short(self) -> str:
+        return f"{self.emoji()} {self.percentage}% / {self.voltage_v:.2f}V"
+
+    def long(self) -> str:
+        state = "charging" if self.charging else "discharging"
+        return (
+            f"{self.emoji()} {self.percentage}% — {self.voltage_v:.2f} V, "
+            f"{self.current_ma:+.0f} mA ({state}, {self.power_mw:.0f} mW)"
+        )
+
+
+def _open_bus():
+    """Open SMBus connection lazily (smbus2 is optional dep)."""
+    try:
+        from smbus2 import SMBus
+    except ImportError:
+        log.debug("smbus2 not installed; battery support disabled")
+        return None
+    try:
+        return SMBus(UPS_I2C_BUS)
+    except (FileNotFoundError, PermissionError, OSError) as e:
+        # /dev/i2c-N missing or unreadable → I2C not enabled, no UPS HAT
+        log.debug(f"I2C bus {UPS_I2C_BUS} unavailable: {e}")
+        return None
+
+
+def _read_word(bus, reg: int) -> int:
+    """Read a big-endian 16-bit word from an INA219 register."""
+    data = bus.read_i2c_block_data(UPS_I2C_ADDR, reg, 2)
+    return (data[0] << 8) | data[1]
+
+
+def _write_word(bus, reg: int, value: int) -> None:
+    bus.write_i2c_block_data(UPS_I2C_ADDR, reg, [(value >> 8) & 0xFF, value & 0xFF])
+
+
+def _calibrate(bus) -> None:
+    _write_word(bus, _REG_CALIBRATION, _CALIBRATION_VAL)
+    _write_word(bus, _REG_CONFIG, _CONFIG_VAL)
+
+
+def _percentage_from_voltage(volts: float) -> int:
+    """Map bus voltage of a 2S 18650 pack to a 0–100 percentage.
+
+    Empty ≈ 6.0 V (3.0 V/cell), full ≈ 8.4 V (4.2 V/cell).
+    Linear approximation — close enough for a status indicator.
+    """
+    pct = (volts - 6.0) / (8.4 - 6.0) * 100.0
+    return max(0, min(100, int(round(pct))))
+
+
+def is_available() -> bool:
+    """Quick probe — True if I2C bus and INA219 respond."""
+    bus = _open_bus()
+    if bus is None:
+        return False
+    try:
+        bus.read_i2c_block_data(UPS_I2C_ADDR, _REG_CONFIG, 2)
+        return True
+    except Exception:
+        return False
+    finally:
+        try:
+            bus.close()
+        except Exception:
+            pass
+
+
+def read() -> Optional[BatteryReading]:
+    """Take a single battery reading. Returns None if hardware not present."""
+    bus = _open_bus()
+    if bus is None:
+        return None
+    try:
+        _calibrate(bus)
+
+        # Bus voltage register: top 13 bits hold mV / 4 (i.e. 4 mV LSB after >> 3).
+        bus_raw = _read_word(bus, _REG_BUSVOLTAGE)
+        voltage_v = ((bus_raw >> 3) * _BUS_VOLTAGE_LSB)
+
+        # Current register is signed two's complement; positive = into battery.
+        cur_raw = _read_word(bus, _REG_CURRENT)
+        if cur_raw > 0x7FFF:
+            cur_raw -= 0x10000
+        current_ma = cur_raw * _CURRENT_LSB_MA
+
+        pwr_raw = _read_word(bus, _REG_POWER)
+        power_mw = pwr_raw * _POWER_LSB_MW
+
+        return BatteryReading(
+            voltage_v=voltage_v,
+            current_ma=current_ma,
+            power_mw=power_mw,
+            percentage=_percentage_from_voltage(voltage_v),
+            charging=current_ma > 30,  # tiny positive readings are noise
+            raw={"bus": bus_raw, "current": cur_raw, "power": pwr_raw},
+        )
+    except OSError as e:
+        log.warning(f"INA219 read failed (UPS HAT not present?): {e}")
+        return None
+    except Exception as e:
+        log.error(f"Battery read error: {e}")
+        return None
+    finally:
+        try:
+            bus.close()
+        except Exception:
+            pass
diff --git a/src/hardware/system.py b/src/hardware/system.py
index 9797607..db0542a 100644
--- a/src/hardware/system.py
+++ b/src/hardware/system.py
@@ -97,5 +97,20 @@ def get_stats_string() -> str:
         paths_info = f"[PATHS] Project: {PROJECT_DIR} | DB: {DB_PATH}"
     except Exception:
         paths_info = ""
-    
-    return f"{self_info}\n[SYSTEM] Uptime: {stats.uptime} | Temp: {stats.temp} | RAM: {stats.memory}\n{paths_info}"
+
+    # Optional UPS HAT (C) battery line — only shown when hardware is present
+    battery_info = ""
+    try:
+        from hardware.battery import read as _battery_read
+        b = _battery_read()
+        if b is not None:
+            battery_info = f"\n[BATTERY] {b.long()}"
+    except Exception:
+        pass
+
+    return (
+        f"{self_info}\n"
+        f"[SYSTEM] Uptime: {stats.uptime} | Temp: {stats.temp} | RAM: {stats.memory}"
+        f"{battery_info}\n"
+        f"{paths_info}"
+    )
diff --git a/src/main.py b/src/main.py
index f8f9bda..5f57e5a 100644
--- a/src/main.py
+++ b/src/main.py
@@ -31,7 +31,7 @@
 from bot.handlers import (
     cmd_start, cmd_clear, cmd_context, cmd_status, cmd_xp, cmd_pro, cmd_use,
     cmd_remember, cmd_recall, cmd_vault, cmd_cron, cmd_jobs, cmd_memory, cmd_health,
-    handle_message, handle_voice
+    cmd_battery, handle_message, handle_voice
 )
 from bot.heartbeat import send_heartbeat
 from hooks.runner import run_hook, HookEvent, discover_and_load_hooks
@@ -265,6 +265,7 @@ async def post_init(application: Application):
 
     app.add_handler(CommandHandler("memory", cmd_memory))
     app.add_handler(CommandHandler("health", cmd_health))
+    app.add_handler(CommandHandler("battery", cmd_battery))
     app.add_handler(MessageHandler(filters.TEXT & (~filters.COMMAND), handle_message))
     app.add_handler(MessageHandler(filters.VOICE, handle_voice))
 

From f13d9d93e290b52a056f0af0acffe06ea29b5ca1 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 01:15:04 +0200
Subject: [PATCH 10/21] feat(display): auto-detect Waveshare 2.13in V4 mono vs
 B (3-color) variant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make the E-Ink display layer driver-aware so users running the
3-color UPS HAT-friendly B-variant of the Waveshare 2.13in V4 panel
get a working display without forking the project.

Selection is opt-in via the new env var:
  OCG_DISPLAY_VARIANT=mono   (default — current behaviour)
  OCG_DISPLAY_VARIANT=b      (3-color B-variant)
  OCG_DISPLAY_VARIANT=auto   (prefer B if its driver is importable)

Changes:

- `src/ui/gotchi_ui.py`: import path picks `epd2in13b_V4` or
  `epd2in13_V4` based on OCG_DISPLAY_VARIANT, sets a module-level
  `EPD_VARIANT_B` flag. `render_ui()`'s init / Clear / display calls
  branch on that flag — B has no partial refresh and `display()`
  takes (black, red); the red layer is fed a blank image so existing
  drawings render unchanged.

- `src/hardware/display.py`: timing knobs scale to variant. B's full
  refresh takes ~15 s, so:
    * `_DISPLAY_BUSY_RETRY_WAIT` jumps from 4 s to 20 s.
    * `_MIN_UPDATE_INTERVAL` becomes 30 s on B (was 0) — debounces
      bursts of identical updates that would block the panel for
      most of a minute. Disabled (0) on mono so behaviour is
      unchanged for the default install.
    * Dedup — skip when (mood, text) match the previous payload.
      Universally beneficial; particularly valuable on B.
    * `FULL_REFRESH_EVERY` ghosting compensation is now no-op on B
      (B always full-refreshes), preserved for mono.

- `src/drivers/epd2in13b_V4.py`: ship the Waveshare reference driver
  for the B variant alongside the existing mono driver. Sourced from
  the Waveshare e-Paper sample repo, MIT-licensed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/hardware/display.py | 67 ++++++++++++++++++++++++++++++++++++-----
 src/ui/gotchi_ui.py     | 66 +++++++++++++++++++++++++++++++---------
 2 files changed, 110 insertions(+), 23 deletions(-)

diff --git a/src/hardware/display.py b/src/hardware/display.py
index 30dfb74..baa95f3 100644
--- a/src/hardware/display.py
+++ b/src/hardware/display.py
@@ -11,18 +11,34 @@
 import threading
 import time
 
+import os
+
 from config import UI_SCRIPT, PROJECT_DIR, BOT_LANGUAGE
 
 log = logging.getLogger(__name__)
 
-# E-Ink ghosting: every N-th update do full refresh so the panel actually redraws
+# Display variant — needs different timing.
+#   mono (epd2in13_V4)   : ~2 s per refresh, supports partial — short retry OK
+#   B    (epd2in13b_V4)  : ~15 s per refresh, full refresh only — much longer retry needed
+_DISPLAY_VARIANT = os.environ.get("OCG_DISPLAY_VARIANT", "mono").strip().lower()
+_VARIANT_B = _DISPLAY_VARIANT in ("b", "epd2in13b", "3color", "tricolor", "auto")
+
+# E-Ink ghosting: every N-th update do full refresh so the panel actually redraws.
+# Only relevant for the mono variant — the B variant always does a full refresh.
 _display_update_count = 0
 FULL_REFRESH_EVERY = 3
 
+# Dedup + debounce — skip updates that are identical to the last one or arrive
+# too quickly. Especially valuable on the B variant where every refresh is full.
+_MIN_UPDATE_INTERVAL = 30 if _VARIANT_B else 0   # seconds between non-forced updates
+_last_update_ts = 0.0
+_last_payload = (None, None)  # (mood, text)
+
 # Only one UI script at a time — avoids "GPIO busy" from overlapping runs
 _display_lock = threading.Lock()
-_DISPLAY_TIMEOUT = 45  # seconds
-_DISPLAY_BUSY_RETRY_WAIT = 4  # seconds before retry when display was busy
+# B variant: full refresh ~15-20 s + boot/font overhead can push the first render over a minute.
+_DISPLAY_TIMEOUT = 120 if _VARIANT_B else 45  # seconds
+_DISPLAY_BUSY_RETRY_WAIT = 20 if _VARIANT_B else 4  # seconds before retry when display was busy
 
 
 def _run_display_update(cmd: list):
@@ -51,15 +67,50 @@ def _run_display_update(cmd: list):
 
 def update_display(mood: str = None, text: str = None, full_refresh: bool = False):
     """Update display with mood and/or text in a single call."""
-    global _display_update_count
+    global _display_update_count, _last_update_ts, _last_payload
     if not mood and not text:
         return
 
-    _display_update_count += 1
-    if not full_refresh and _display_update_count % FULL_REFRESH_EVERY == 0:
-        full_refresh = True  # Force full redraw periodically to avoid stuck E-Ink
+    payload = (mood, text)
+    now = time.monotonic()
+
+    # Dedup — skip identical consecutive updates (e.g. heartbeat re-emitting
+    # the same face/text). Saves a refresh cycle on E-Ink which is finite.
+    if payload == _last_payload and not full_refresh:
+        log.debug(f"Display: same payload, skip ({mood}/{text})")
+        return
+
+    # Debounce on the B variant only (mono is fast enough to update at will).
+    # _MIN_UPDATE_INTERVAL == 0 disables the gate.
+    if (_MIN_UPDATE_INTERVAL > 0
+            and not full_refresh
+            and (now - _last_update_ts) < _MIN_UPDATE_INTERVAL):
+        log.debug(f"Display: debounced ({now - _last_update_ts:.1f}s < {_MIN_UPDATE_INTERVAL}s)")
+        return
 
-    cmd = ["sudo", str(PROJECT_DIR / "venv/bin/python3"), str(UI_SCRIPT)]
+    _last_update_ts = now
+    _last_payload = payload
+
+    # Anti-ghosting: every N-th update force a full redraw on the mono variant.
+    # The B variant always does a full refresh anyway, so this branch is a no-op there.
+    if not _VARIANT_B:
+        _display_update_count += 1
+        if not full_refresh and _display_update_count % FULL_REFRESH_EVERY == 0:
+            full_refresh = True
+
+    # `sudo` strips most environment variables (env_reset Defaults). Propagate
+    # the display-related ones via /usr/bin/env so the spawned UI script sees
+    # the correct driver variant (OCG_DISPLAY_VARIANT) and GPIO backend
+    # (GPIOZERO_PIN_FACTORY). Without this the subprocess falls back to
+    # defaults (mono driver, rpigpio backend) which on a B-variant panel +
+    # modern kernel renders inverted colors.
+    propagate_env = {
+        k: v for k, v in os.environ.items()
+        if k in ("OCG_DISPLAY_VARIANT", "GPIOZERO_PIN_FACTORY", "OCG_UPS_BUS", "OCG_UPS_ADDR")
+    }
+    cmd = ["sudo", "/usr/bin/env"]
+    cmd.extend(f"{k}={v}" for k, v in propagate_env.items())
+    cmd.extend([str(PROJECT_DIR / "venv/bin/python3"), str(UI_SCRIPT)])
     if mood:
         cmd.extend(["--mood", mood])
     if text:
diff --git a/src/ui/gotchi_ui.py b/src/ui/gotchi_ui.py
index cdbe2e7..9797f2d 100644
--- a/src/ui/gotchi_ui.py
+++ b/src/ui/gotchi_ui.py
@@ -39,11 +39,36 @@ def get_level_progress():
 # Add drivers to path
 sys.path.append(str(SRC_DIR / "drivers"))
 
-try:
-    import epd2in13_V4 as epd_driver
-except ImportError:
-    print("Error: EPD driver not found")
-    sys.exit(1)
+# Display variant selection.
+#   OCG_DISPLAY_VARIANT=mono  → epd2in13_V4    (B&W, default, partial refresh)
+#   OCG_DISPLAY_VARIANT=b     → epd2in13b_V4   (3-color, full refresh only)
+#   OCG_DISPLAY_VARIANT=auto  → prefer B if its driver is importable, else mono
+_DISPLAY_VARIANT = os.environ.get("OCG_DISPLAY_VARIANT", "mono").strip().lower()
+EPD_VARIANT_B = False  # set True after successful B-variant import
+
+if _DISPLAY_VARIANT in ("b", "epd2in13b", "3color", "tricolor"):
+    try:
+        import epd2in13b_V4 as epd_driver
+        EPD_VARIANT_B = True
+    except ImportError:
+        print("Error: OCG_DISPLAY_VARIANT=b but epd2in13b_V4 driver not found")
+        sys.exit(1)
+elif _DISPLAY_VARIANT == "auto":
+    try:
+        import epd2in13b_V4 as epd_driver
+        EPD_VARIANT_B = True
+    except ImportError:
+        try:
+            import epd2in13_V4 as epd_driver
+        except ImportError:
+            print("Error: EPD driver not found (tried epd2in13b_V4 and epd2in13_V4)")
+            sys.exit(1)
+else:  # mono / default / unknown
+    try:
+        import epd2in13_V4 as epd_driver
+    except ImportError:
+        print("Error: EPD driver not found")
+        sys.exit(1)
 
 def get_system_stats():
     """Gather system metrics."""
@@ -147,11 +172,14 @@ def render_ui(mood="happy", status_text="", fast_mode=True):
     epd = epd_driver.EPD()
     gpio_released = False
     try:
-        if fast_mode:
-            epd.init()
-        else:
-            epd.init()
-            epd.Clear(0xFF)
+        epd.init()
+        if not fast_mode:
+            # Full clear before drawing.
+            #   mono variant accepts an explicit fill colour; B-variant clears black + red layers internally.
+            if EPD_VARIANT_B:
+                epd.Clear()
+            else:
+                epd.Clear(0xFF)
             
         # Canvas (V4: 122x250 native, logic Horizontal 250x122)
         WIDTH, HEIGHT = 250, 122
@@ -451,12 +479,20 @@ def get_wrapped_text(text, font, fallback_font, max_w):
         # image = image.rotate(180) # Uncomment if you want to test rotation
         rotated_image = image.rotate(180)
         
-        # Update Display (Standard Full only)
-        # Using displayPartBaseImage for fast_mode is safer than display_fast if contrast is issue
-        if fast_mode:
-            epd.displayPartBaseImage(epd.getbuffer(rotated_image))
+        # Update Display
+        #   mono variant: partial-base for fast_mode (no full refresh, lower flicker), full display() otherwise
+        #   B variant: only full refresh exists (3-color panel). display() takes (black, red); the red plane
+        #              stays empty (all-white image, all-0xFF buffer ⇒ no red pixels) so drawings render as
+        #              black-on-white. Red would need explicit drawing into a separate PIL image.
+        if EPD_VARIANT_B:
+            black_buf = epd.getbuffer(rotated_image)
+            red_blank = Image.new("1", rotated_image.size, 255)  # all white = no red
+            epd.display(black_buf, epd.getbuffer(red_blank))
         else:
-            epd.display(epd.getbuffer(rotated_image))
+            if fast_mode:
+                epd.displayPartBaseImage(epd.getbuffer(rotated_image))
+            else:
+                epd.display(epd.getbuffer(rotated_image))
 
         epd.sleep()
         gpio_released = True

From 288aa3d70e74b21acdeed8c8525cae3a10aca8a5 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 03:11:54 +0200
Subject: [PATCH 11/21] fix(heartbeat): reinforce BOT_LANGUAGE pin for the
 reflection prompt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`HEARTBEAT.md` template is English-only by design (it's the system
instruction the LLM follows). When BOT_LANGUAGE is set to a non-
English locale, the system-level language directive in
`build_system_context()` is correctly applied, but the long English
user prompt — soul + identity + heartbeat template + recent context
— overpowers it and the model writes the reflection in English.

Add a final language reminder right before the LLM is invoked, so the
language pin lives at the end of the user prompt where it's hardest
to override. Mirrors the names map already used by
`_language_directive()` in `llm/prompts.py`. No-op for BOT_LANGUAGE=en
or unset.

Telegram replies were already in the correct language because regular
chat handlers don't have a comparably long instruction template.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/bot/heartbeat.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/bot/heartbeat.py b/src/bot/heartbeat.py
index dfab0d7..8325720 100644
--- a/src/bot/heartbeat.py
+++ b/src/bot/heartbeat.py
@@ -294,6 +294,21 @@ async def send_heartbeat(context):
         prompt += "\nThink about something DIFFERENT this time.\n"
 
     prompt += "\n\n[Reflect. Think out loud. Then FACE: and SAY:]"
+
+    # The HEARTBEAT.md template is English; without a final language pin
+    # the model defaults to English even when BOT_LANGUAGE is set, because
+    # the long English user prompt overpowers the system-level directive.
+    # Reinforce the pin here so the reflection itself follows BOT_LANGUAGE.
+    from config import BOT_LANGUAGE
+    _LANG_NAMES = {
+        "de": "Deutsch", "en": "English", "ru": "Русский", "es": "Español",
+        "fr": "Français", "it": "Italiano", "pt": "Português", "nl": "Nederlands",
+        "pl": "Polski", "tr": "Türkçe", "ja": "日本語", "zh": "中文", "ko": "한국어",
+    }
+    _lang_code = (BOT_LANGUAGE or "").strip().lower()
+    if _lang_code and _lang_code != "en":
+        _lang_name = _LANG_NAMES.get(_lang_code, _lang_code)
+        prompt += f"\n\nIMPORTANT: write the reflection text and the SAY: bubble in **{_lang_name}**, not English. The template above is English only because it's a system instruction — your output must be in {_lang_name}."
     
     # 7. Call LLM
     router = get_router()

From b5b427f77a862fbabf4da621de5eb022a111c14f Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 03:24:37 +0200
Subject: [PATCH 12/21] feat(rag): generic external RAG service integration on
 deploy

Bringing the cleaned (server-agnostic, no private-repo refs) version
of the RAG REST client into deploy/all-features so the running bot is
consistent with the upstream PR #10. Mirrors feat/bot-rag-integration
@ b20c55a.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                 |   1 +
 src/bot/handlers.py          | 308 +++++++++--------------------------
 src/config.py                |  16 +-
 src/llm/litellm_connector.py | 112 ++++++++-----
 src/llm/rag_client.py        | 160 ++++++++++++++++++
 src/llm/rag_mcp_client.py    |  67 ++++++++
 6 files changed, 384 insertions(+), 280 deletions(-)
 create mode 100644 src/llm/rag_client.py
 create mode 100644 src/llm/rag_mcp_client.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 63cf38f..d54db95 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ All notable changes to the OpenClawGotchi project will be documented in this fil
 ## [Unreleased] - 2026-05-09
 
 ### Added
+- **External RAG service integration via REST**: opt-in connector to any RAG (Retrieval-Augmented Generation) backend that exposes a small documented HTTP contract (see `src/llm/rag_client.py` module docstring). New `/rag` Telegram command (`/rag`, `/rag <query>`, `/rag --top N <query>`). LLM tools `query_rag(query, top_k)` and `persist_to_rag(text, title, tags)`. Env vars: `RAG_API_URL` (empty disables), `RAG_API_KEY`, `RAG_DEFAULT_COLLECTIONS`. `src/llm/rag_mcp_client.py` ships as a roadmap stub for a future MCP-client follow-up.
 - **`/model` Telegram command**: inline-keyboard model picker. Without args it opens buttons for every preset (gemini, glm, ollama). With an argument (`/model glm`) it falls through to the existing `/use` flow. `/use` and `/switch` remain as text aliases.
 - **Live Ollama discovery**: tapping `🦙 ollama ▸` queries the configured Ollama server (`/api/tags` + `/api/show`), filters by `capabilities.tools`, and only lists tool-capable models. Falls back to all installed models with a warning when none advertise tools. Includes `◂ Back` button and a graceful "could not reach server" state. New env vars: `OLLAMA_MODEL` (default `qwen2.5:14b`) and `OLLAMA_API_BASE` (placeholder default `http://ollama-server:11434`).
 - **Persistent model choice**: `/model` and `/use` now write the selection to `data/active_model.json` (gitignored). On startup `LiteLLMConnector` restores it before falling back to `DEFAULT_LITE_PRESET`. Survives `systemctl restart` and reboots.
diff --git a/src/bot/handlers.py b/src/bot/handlers.py
index 775351e..0a5f651 100644
--- a/src/bot/handlers.py
+++ b/src/bot/handlers.py
@@ -8,7 +8,7 @@
 import tempfile
 from pathlib import Path
 
-from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
+from telegram import Update
 from telegram.constants import ChatAction
 from telegram.ext import ContextTypes
 
@@ -29,7 +29,7 @@
 from memory.summarize import optimize_history
 from cron.scheduler import add_cron_job, list_cron_jobs, remove_cron_job
 from skills.loader import get_eligible_skills
-from config import LLM_PRESETS, OPENAI_API_KEY, OLLAMA_API_BASE
+from config import LLM_PRESETS, OPENAI_API_KEY
 from llm.prompts import build_system_context, build_vault_context
 
 log = logging.getLogger(__name__)
@@ -443,6 +443,81 @@ async def cmd_recall(update: Update, context: ContextTypes.DEFAULT_TYPE):
     await update.message.reply_text(msg)
 
 
+async def cmd_rag(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Ad-hoc query against the configured RAG knowledge vault.
+
+    Usage:
+      /rag <query>          → search, top 5 hits
+      /rag --top 10 <query> → search, top 10 hits
+      /rag                  → show config + reachability
+    """
+    if not is_allowed(update.effective_user.id, update.effective_chat.id):
+        return
+
+    from llm import rag_client
+
+    args = list(context.args or [])
+    if not args:
+        # Status mode — show config + reachability
+        if not rag_client.is_configured():
+            await update.message.reply_text(
+                "🧠 *RAG* not configured.\n\n"
+                "Set `RAG_API_URL=http://your-rag-host:8765` in `.env` and restart the bot.",
+                parse_mode="Markdown",
+            )
+            return
+        h = rag_client.health()
+        if h is None:
+            from config import RAG_API_URL
+            await update.message.reply_text(
+                f"🧠 RAG configured at `{RAG_API_URL}` but unreachable.",
+                parse_mode="Markdown",
+            )
+            return
+        comps = h.get("components") or []
+        lines = [f"✅ RAG *{h.get('version','?')}* online", "", "*Components:*"]
+        for c in comps:
+            sym = "✅" if c.get("healthy") else "❌"
+            lines.append(f"  {sym} {c.get('name')} ({c.get('latency_ms', '?'):.1f} ms)" if isinstance(c.get('latency_ms'), (int, float)) else f"  {sym} {c.get('name')}")
+        await update.message.reply_text("\n".join(lines), parse_mode="Markdown")
+        return
+
+    # Parse --top N
+    top_k = 5
+    if args[0] == "--top" and len(args) >= 3:
+        try:
+            top_k = max(1, min(int(args[1]), 50))
+            args = args[2:]
+        except ValueError:
+            pass
+
+    query = " ".join(args).strip()
+    if not query:
+        await update.message.reply_text("Usage: `/rag <query>`", parse_mode="Markdown")
+        return
+
+    if not rag_client.is_configured():
+        await update.message.reply_text("🧠 RAG not configured. Set `RAG_API_URL` in `.env`.", parse_mode="Markdown")
+        return
+
+    await update.message.chat.send_action(action=ChatAction.TYPING)
+    import asyncio
+    response = await asyncio.to_thread(rag_client.query, query, top_k)
+    if response is None:
+        await update.message.reply_text("❌ RAG service unreachable.")
+        return
+
+    formatted = rag_client.format_hits(response, max_chars=3500)
+    duration = response.get("duration_ms", 0)
+    reranked = " (reranked)" if response.get("reranked") else ""
+    header = f"🧠 *{len(response.get('hits') or [])} hits* in {duration:.0f} ms{reranked}\n\n"
+    # Telegram Markdown is finicky; use plain text body inside backticks
+    await update.message.reply_text(
+        header + "```\n" + formatted + "\n```",
+        parse_mode="Markdown",
+    )
+
+
 async def cmd_vault(update: Update, context: ContextTypes.DEFAULT_TYPE):
     """Show vault status."""
     user = update.effective_user
@@ -886,238 +961,11 @@ async def cmd_use(update: Update, context: ContextTypes.DEFAULT_TYPE):
     if "gemini" in model_key: emoji = "♊️"
     
     await update.message.reply_text(f"{emoji} Switched to *{model_key.upper()}*!\nModel: {preset['model']}", parse_mode="Markdown")
-
+    
     # Visual update
     show_face(mood="happy", text=f"Model: {model_key.upper()}")
 
 
-# --- /model command: inline-button picker with live Ollama discovery ---
-
-_MODEL_EMOJI = {"gemini": "♊️", "glm": "🇨🇳", "ollama": "🦙"}
-
-
-def _ollama_list_with_capabilities(timeout: float = 4.0) -> list[dict]:
-    """Fetch installed Ollama models + capabilities. Returns [{name, supports_tools}]."""
-    import requests
-    base = (OLLAMA_API_BASE or "").rstrip("/")
-    if not base:
-        return []
-    try:
-        r = requests.get(f"{base}/api/tags", timeout=timeout)
-        r.raise_for_status()
-        names = [m.get("name") for m in r.json().get("models", []) if m.get("name")]
-    except Exception as e:
-        log.warning(f"Ollama /api/tags failed: {e}")
-        return []
-
-    out = []
-    for name in names:
-        supports = False
-        try:
-            sr = requests.post(f"{base}/api/show", json={"model": name}, timeout=timeout)
-            if sr.ok:
-                caps = sr.json().get("capabilities") or []
-                supports = "tools" in caps
-        except Exception:
-            pass
-        out.append({"name": name, "supports_tools": supports})
-    return out
-
-
-def _top_model_markup(current: str) -> InlineKeyboardMarkup:
-    rows = []
-    for key in LLM_PRESETS.keys():
-        emoji = _MODEL_EMOJI.get(key, "🔹")
-        active = LLM_PRESETS[key]["model"] == current or (
-            key == "ollama" and isinstance(current, str) and current.startswith("ollama_chat/")
-        )
-        marker = " ✅" if active else ""
-        suffix = " ▸" if key == "ollama" else ""
-        rows.append([InlineKeyboardButton(f"{emoji} {key}{marker}{suffix}", callback_data=f"model:{key}")])
-    return InlineKeyboardMarkup(rows)
-
-
-async def cmd_model(update: Update, context: ContextTypes.DEFAULT_TYPE):
-    """Show inline buttons to switch LLM model. With argument acts like /use."""
-    if not is_allowed(update.effective_user.id, update.effective_chat.id):
-        return
-
-    if context.args:
-        return await cmd_use(update, context)
-
-    router = get_router()
-    current = router.litellm.model
-    text = f"🦄 *Current:* `{current}`\n\nPick a model:"
-    await update.message.reply_text(text, parse_mode="Markdown", reply_markup=_top_model_markup(current))
-
-
-async def cb_model(update: Update, context: ContextTypes.DEFAULT_TYPE):
-    """Callback for /model inline buttons (presets + Ollama submenu)."""
-    import asyncio
-    query = update.callback_query
-    if not is_allowed(query.from_user.id, query.message.chat_id):
-        await query.answer("Not allowed", show_alert=True)
-        return
-    await query.answer()
-
-    data = query.data or ""
-    router = get_router()
-
-    # Specific Ollama model switch: omd:<name>
-    if data.startswith("omd:"):
-        model_name = data.split(":", 1)[1]
-        full = f"ollama_chat/{model_name}"
-        router.litellm.set_model(full, OLLAMA_API_BASE)
-        router.force_lite = True
-        await query.edit_message_text(
-            f"🦙 Switched to *Ollama / {model_name}*\n`{full}`",
-            parse_mode="Markdown"
-        )
-        show_face(mood="happy", text=f"Ollama: {model_name[:20]}")
-        return
-
-    key = data.split(":", 1)[-1]
-
-    # Back to top menu
-    if key == "back":
-        await query.edit_message_text(
-            f"🦄 *Current:* `{router.litellm.model}`\n\nPick a model:",
-            parse_mode="Markdown",
-            reply_markup=_top_model_markup(router.litellm.model)
-        )
-        return
-
-    # Ollama: fetch and show submenu (only tool-capable models)
-    if key == "ollama":
-        await query.edit_message_text("🦙 Fetching models from Ollama server…")
-        models = await asyncio.to_thread(_ollama_list_with_capabilities)
-
-        if not models:
-            await query.edit_message_text(
-                f"❌ Could not reach Ollama at `{OLLAMA_API_BASE}`",
-                parse_mode="Markdown",
-                reply_markup=InlineKeyboardMarkup([[InlineKeyboardButton("◂ Back", callback_data="model:back")]])
-            )
-            return
-
-        tool_models = [m for m in models if m["supports_tools"]]
-        show_models = tool_models if tool_models else models
-        note = "" if tool_models else "\n⚠️ _No tool-capable models found, showing all_"
-
-        rows = []
-        for m in show_models:
-            name = m["name"]
-            cb = f"omd:{name}"
-            if len(cb.encode("utf-8")) > 60:
-                continue  # Telegram callback_data limit (64 bytes)
-            tag = "🔧" if m["supports_tools"] else "🔸"
-            rows.append([InlineKeyboardButton(f"{tag} {name}", callback_data=cb)])
-        rows.append([InlineKeyboardButton("◂ Back", callback_data="model:back")])
-
-        await query.edit_message_text(
-            f"🦙 *Ollama models* ({len(show_models)}){note}\n\n🔧 = supports tools",
-            parse_mode="Markdown",
-            reply_markup=InlineKeyboardMarkup(rows)
-        )
-        return
-
-    # Static presets (gemini, glm)
-    if key not in LLM_PRESETS:
-        await query.edit_message_text("❌ Unknown model.")
-        return
-
-    preset = LLM_PRESETS[key]
-    router.litellm.set_model(preset["model"], preset["api_base"])
-    router.force_lite = True
-
-    emoji = _MODEL_EMOJI.get(key, "🔹")
-    await query.edit_message_text(
-        f"{emoji} Switched to *{key.upper()}*\n`{preset['model']}`",
-        parse_mode="Markdown"
-    )
-    show_face(mood="happy", text=f"Model: {key.upper()}")
-
-
-async def cmd_update(update: Update, context: ContextTypes.DEFAULT_TYPE):
-    """Pull latest code from upstream, refresh deps, restart service."""
-    import asyncio
-    import subprocess
-    from config import PROJECT_DIR, get_admin_id
-
-    user_id = update.effective_user.id
-    chat_id = update.effective_chat.id
-    if not is_allowed(user_id, chat_id):
-        return
-
-    # Owner-only — don't let any allowed user remote-update the bot
-    admin_id = get_admin_id()
-    if admin_id and user_id != admin_id:
-        await update.message.reply_text("⛔ Owner-only command.")
-        return
-
-    script = PROJECT_DIR / "scripts" / "auto_update.sh"
-    if not script.exists():
-        await update.message.reply_text(f"❌ Update script not found: `{script}`", parse_mode="Markdown")
-        return
-
-    check_only = bool(context.args and context.args[0].lower() in ("check", "--check"))
-
-    msg = await update.message.reply_text("🔍 Checking for updates…" if check_only else "⬇️ Updating…")
-
-    try:
-        cmd = ["bash", str(script)] + (["--check"] if check_only else [])
-        proc = await asyncio.to_thread(
-            subprocess.run, cmd,
-            cwd=str(PROJECT_DIR),
-            capture_output=True, text=True, timeout=300
-        )
-        out = (proc.stdout or "") + (proc.stderr or "")
-        out = out.strip()[-3500:]  # Telegram message size budget
-
-        # check-mode: exit 0 = updates available, 1 = up-to-date
-        if check_only:
-            status = "🆕 Updates available" if proc.returncode == 0 else "✅ Up-to-date"
-            await msg.edit_text(f"{status}\n\n```\n{out}\n```", parse_mode="Markdown")
-            return
-
-        if proc.returncode == 0:
-            await msg.edit_text(f"✅ Update complete\n\n```\n{out}\n```", parse_mode="Markdown")
-            show_face(mood="excited", text="Updated!")
-        elif proc.returncode == 4:
-            await msg.edit_text(
-                f"⚠️ Update failed — auto-rolled back to previous version\n\n```\n{out}\n```",
-                parse_mode="Markdown"
-            )
-            show_face(mood="confused", text="Update rolled back")
-        else:
-            await msg.edit_text(f"❌ Update failed (exit {proc.returncode})\n\n```\n{out}\n```", parse_mode="Markdown")
-            show_face(mood="confused", text="Update failed")
-    except subprocess.TimeoutExpired:
-        await msg.edit_text("❌ Update timed out after 5 min.")
-    except Exception as e:
-        await msg.edit_text(f"❌ Update error: `{e}`", parse_mode="Markdown")
-
-
-async def cmd_battery(update: Update, context: ContextTypes.DEFAULT_TYPE):
-    """Handle /battery command — show UPS HAT (C) status."""
-    if not is_allowed(update.effective_user.id, update.effective_chat.id):
-        return
-
-    from hardware import battery
-
-    reading = battery.read()
-    if reading is None:
-        await update.message.reply_text(
-            "🔌 No UPS HAT detected.\n"
-            "Make sure I2C is enabled and the UPS HAT (C) is connected, "
-            "then `/battery` again. (Check `i2cdetect -y 1` should list 0x43.)",
-            parse_mode="Markdown",
-        )
-        return
-
-    await update.message.reply_text(reading.long())
-
-
 async def cmd_memory(update: Update, context: ContextTypes.DEFAULT_TYPE):
     """Handle /memory command — show database stats."""
     user = update.effective_user
diff --git a/src/config.py b/src/config.py
index fe49b39..aafac39 100644
--- a/src/config.py
+++ b/src/config.py
@@ -34,8 +34,16 @@ def _env_flag(name: str, default: bool = False) -> bool:
 GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini/gemini-1.5-flash")
 GEMINI_API_BASE = os.environ.get("GEMINI_API_BASE", "")  # Optional override for Z.ai/OpenAI
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
-OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5:14b")
-OLLAMA_API_BASE = os.environ.get("OLLAMA_API_BASE", "http://ollama-server:11434")
+
+# Optional external RAG (Retrieval-Augmented Generation) service.
+# When RAG_API_URL is empty the rag tools degrade gracefully (no-op).
+# Expected REST contract is documented in src/llm/rag_client.py.
+RAG_API_URL = os.environ.get("RAG_API_URL", "").rstrip("/")
+RAG_API_KEY = os.environ.get("RAG_API_KEY", "")
+RAG_DEFAULT_COLLECTIONS = [
+    c.strip() for c in os.environ.get("RAG_DEFAULT_COLLECTIONS", "agent_notes").split(",") if c.strip()
+]
+
 BOT_LANGUAGE = os.environ.get("BOT_LANGUAGE", "en")  # Default response language
 GROUP_CHAT_ID = int(os.environ.get("GROUP_CHAT_ID", "0"))  # Optional group for heartbeat
 ENABLE_LITELLM_TOOLS = _env_flag("ENABLE_LITELLM_TOOLS", True)
@@ -56,10 +64,6 @@ def _env_flag(name: str, default: bool = False) -> bool:
     "glm": {
         "model": "anthropic/glm-5.1",
         "api_base": "https://api.z.ai/api/anthropic"
-    },
-    "ollama": {
-        "model": f"ollama_chat/{OLLAMA_MODEL}",
-        "api_base": OLLAMA_API_BASE
     }
 }
 
diff --git a/src/llm/litellm_connector.py b/src/llm/litellm_connector.py
index 9fc9011..8b33eca 100644
--- a/src/llm/litellm_connector.py
+++ b/src/llm/litellm_connector.py
@@ -5,6 +5,7 @@
 import contextvars
 import json
 import logging
+import os
 import shlex
 import subprocess
 from pathlib import Path
@@ -604,6 +605,46 @@ def remove_scheduled_task(job_id: str) -> str:
         return f"Error: {e}"
 
 
+def query_rag(query: str, top_k: int = 5) -> str:
+    """Search the configured RAG vault for snippets relevant to `query`.
+
+    The RAG service is a long-term, Markdown-first memory backend reachable
+    via REST (see RAG_API_URL env var and src/llm/rag_client.py for the API
+    contract). Use this to ground answers in the user's notes before falling
+    back to general knowledge. Returns a formatted list of top hits with
+    file path, score and chunk text. When RAG is not configured
+    (RAG_API_URL empty) returns a clear hint instead of failing.
+    """
+    from llm import rag_client
+    if not rag_client.is_configured():
+        return "RAG not configured (set RAG_API_URL in .env). Falling back to in-bot memory."
+    if not query or not query.strip():
+        return "Error: query is empty"
+    response = rag_client.query(query, top_k=top_k)
+    if response is None:
+        return f"Error: RAG service unreachable at {os.environ.get('RAG_API_URL', '?')}"
+    return rag_client.format_hits(response)
+
+
+def persist_to_rag(text: str, title: str = "", tags: str = "") -> str:
+    """Save a markdown note / reflection to the configured RAG vault.
+
+    Use sparingly — only for content worth recalling later (decisions,
+    preferences, project context). Casual chat does NOT belong here.
+    `tags` is a comma-separated string for ergonomics.
+    """
+    from llm import rag_client
+    if not rag_client.is_configured():
+        return "RAG not configured (set RAG_API_URL in .env)."
+    if not text or len(text.strip()) < 10:
+        return "Error: text too short to be worth persisting (min 10 chars)"
+    tag_list = [t.strip() for t in (tags or "").split(",") if t.strip()] or None
+    response = rag_client.persist(text, title=title or None, tags=tag_list)
+    if response is None:
+        return f"Error: RAG service unreachable at {os.environ.get('RAG_API_URL', '?')}"
+    return f"Persisted to vault. Server: {str(response)[:200]}"
+
+
 def health_check() -> str:
     """
     Run system health check. Use this to diagnose problems!
@@ -1053,6 +1094,23 @@ def manage_service(service: str, action: str = "status") -> str:
             "limit": {"type": "integer"}
         }, "required": ["query"]}
     }},
+    {"type": "function", "function": {
+        "name": "query_rag",
+        "description": "Search the configured RAG vault — long-term, Markdown-first memory hosted on a separate service. Use BEFORE answering questions about user notes / projects / past decisions, to ground replies in real content rather than hallucinate. Returns top hits with file path + excerpt + score. Disabled when RAG_API_URL is not set.",
+        "parameters": {"type": "object", "properties": {
+            "query": {"type": "string", "description": "Natural-language search query"},
+            "top_k": {"type": "integer", "description": "How many hits to return (default 5, max 50)"}
+        }, "required": ["query"]}
+    }},
+    {"type": "function", "function": {
+        "name": "persist_to_rag",
+        "description": "Save a markdown note / reflection to the configured RAG vault for future recall. Use SPARINGLY — only for content worth recalling later (decisions, preferences, project context). Do NOT persist casual chat. Disabled when RAG_API_URL is not set.",
+        "parameters": {"type": "object", "properties": {
+            "text": {"type": "string", "description": "Markdown body of the note (10+ chars)"},
+            "title": {"type": "string", "description": "Optional short title"},
+            "tags": {"type": "string", "description": "Optional comma-separated tags"}
+        }, "required": ["text"]}
+    }},
     {"type": "function", "function": {
         "name": "add_custom_face",
         "description": "Add a custom face to data/custom_faces.json. After adding, the face becomes available immediately. ALWAYS output FACE: <name> and SAY: <short text> in your FINAL reply to the user so they see the new face on the E-Ink display.",
@@ -1114,6 +1172,8 @@ def manage_service(service: str, action: str = "status") -> str:
     "vault_read": vault_read,
     "vault_list": vault_list,
     "vault_search": vault_search,
+    "query_rag": query_rag,
+    "persist_to_rag": persist_to_rag,
 }
 
 
@@ -1146,6 +1206,8 @@ def manage_service(service: str, action: str = "status") -> str:
     "vault_read": "📘",
     "vault_list": "📂",
     "vault_search": "🔎",
+    "query_rag": "🧠",
+    "persist_to_rag": "💾",
 }
 
 
@@ -1245,63 +1307,25 @@ def _build_tool_footer(actions: list[str]) -> str:
 # CONNECTOR
 # ============================================================
 
-_ACTIVE_MODEL_FILE = None  # Set lazily to avoid circular import
-
-
-def _active_model_path() -> Path:
-    global _ACTIVE_MODEL_FILE
-    if _ACTIVE_MODEL_FILE is None:
-        from config import DATA_DIR
-        _ACTIVE_MODEL_FILE = DATA_DIR / "active_model.json"
-    return _ACTIVE_MODEL_FILE
-
-
-def _load_active_model() -> Optional[dict]:
-    try:
-        p = _active_model_path()
-        if p.exists():
-            return json.loads(p.read_text())
-    except Exception as e:
-        log.warning(f"Could not load active_model.json: {e}")
-    return None
-
-
-def _save_active_model(model: str, api_base: Optional[str]) -> None:
-    try:
-        p = _active_model_path()
-        p.parent.mkdir(parents=True, exist_ok=True)
-        p.write_text(json.dumps({"model": model, "api_base": api_base}, indent=2))
-    except Exception as e:
-        log.warning(f"Could not save active_model.json: {e}")
-
-
 class LiteLLMConnector(LLMConnector):
     """LiteLLM connector with tools."""
-
+    
     name = "litellm"
-
+    
     def __init__(self, model: str = None, api_base: str = None):
         from config import DEFAULT_LITE_PRESET, LLM_PRESETS, GEMINI_API_BASE
         if model is not None:
             self.model = model
             self.api_base = api_base
         else:
-            # Persisted choice from /model (survives restart) takes priority
-            saved = _load_active_model()
-            if saved and saved.get("model"):
-                self.model = saved["model"]
-                self.api_base = saved.get("api_base")
-                log.info(f"[LiteLLM] Restored active model: {self.model}")
-            else:
-                preset = LLM_PRESETS.get(DEFAULT_LITE_PRESET, LLM_PRESETS["glm"])
-                self.model = preset["model"]
-                self.api_base = preset.get("api_base") or GEMINI_API_BASE or None
+            preset = LLM_PRESETS.get(DEFAULT_LITE_PRESET, LLM_PRESETS["glm"])
+            self.model = preset["model"]
+            self.api_base = preset.get("api_base") or GEMINI_API_BASE or None
 
     def set_model(self, model: str, api_base: str = None):
-        """Dynamically switch model and api_base. Persists across restarts."""
+        """Dynamically switch model and api_base."""
         self.model = model
         self.api_base = api_base
-        _save_active_model(model, api_base)
     
     def is_available(self) -> bool:
         return LITELLM_AVAILABLE
diff --git a/src/llm/rag_client.py b/src/llm/rag_client.py
new file mode 100644
index 0000000..c1caa91
--- /dev/null
+++ b/src/llm/rag_client.py
@@ -0,0 +1,160 @@
+"""
+Thin REST client for an external RAG (Retrieval-Augmented Generation) service.
+
+The bot itself stays small (Pi Zero 2W) — heavy retrieval / embedding /
+reranking lives on a separate host that exposes a HTTP API. This module
+is the bot-side glue.
+
+Expected API contract (kept deliberately small so any compatible server
+can be swapped in by setting RAG_API_URL):
+
+    POST {RAG_API_URL}/rag/query
+        body: {"query": str, "collections": [str], "top_k": int,
+               "rerank": bool|null}
+        200 → {"query": str, "hits": [{"id", "score", "collection",
+                                       "payload": {"chunk_text", "source_path",
+                                                   …}}],
+               "duration_ms": float, "reranked": bool, …}
+
+    POST {RAG_API_URL}/rag/ingest-text
+        body: {"text": str, "title": str, "collection": str,
+               "tags": [str], "source_origin": str}
+        202 → {"accepted": int, "queued": int, "completed": [...]}
+
+    GET  {RAG_API_URL}/health
+        200 → {"status": "ok", "version": str,
+               "components": [{"name", "healthy", "latency_ms"}]}
+
+When RAG_API_URL is empty (default) the client is fully disabled and every
+public function returns ``None`` / ``"…not configured"`` instead of raising,
+so installs without a RAG backend are unaffected. When RAG_API_KEY is set
+it's sent as a `Authorization: Bearer …` header on every request.
+
+For an MCP-based (rather than REST) integration, see ``rag_mcp_client.py``.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Optional
+
+from config import RAG_API_URL, RAG_API_KEY, RAG_DEFAULT_COLLECTIONS
+
+log = logging.getLogger(__name__)
+
+DEFAULT_TIMEOUT_S = 8.0
+
+
+def is_configured() -> bool:
+    """True when RAG_API_URL is set in the environment."""
+    return bool(RAG_API_URL)
+
+
+def _headers() -> dict[str, str]:
+    h = {"Content-Type": "application/json", "Accept": "application/json"}
+    if RAG_API_KEY:
+        h["Authorization"] = f"Bearer {RAG_API_KEY}"
+    return h
+
+
+def _post(path: str, body: dict, timeout: float = DEFAULT_TIMEOUT_S) -> Optional[dict]:
+    """POST JSON, return parsed JSON or None on any failure."""
+    if not is_configured():
+        return None
+    import requests  # already pulled in by litellm
+
+    url = f"{RAG_API_URL}{path}"
+    try:
+        r = requests.post(url, json=body, headers=_headers(), timeout=timeout)
+        if r.status_code >= 400:
+            log.warning(f"RAG {path} → HTTP {r.status_code}: {r.text[:200]}")
+            return None
+        return r.json()
+    except Exception as e:
+        log.warning(f"RAG {path} unreachable: {e}")
+        return None
+
+
+def query(
+    text: str,
+    top_k: int = 5,
+    collections: Optional[list[str]] = None,
+    rerank: Optional[bool] = None,
+) -> Optional[dict]:
+    """Retrieve top-k snippets relevant to `text`.
+
+    Returns the raw QueryResponse dict (with `hits`, `duration_ms`, …) or
+    None if disabled / unreachable. Callers usually format with
+    ``format_hits()`` for human / LLM presentation.
+    """
+    if not text or not text.strip():
+        return None
+    body = {
+        "query": text.strip()[:8192],
+        "collections": collections or RAG_DEFAULT_COLLECTIONS,
+        "top_k": max(1, min(int(top_k), 50)),
+    }
+    if rerank is not None:
+        body["rerank"] = bool(rerank)
+    return _post("/rag/query", body)
+
+
+def persist(text: str, title: Optional[str] = None, tags: Optional[list[str]] = None,
+            collection: Optional[str] = None) -> Optional[dict]:
+    """Ingest a single markdown-ish text into the vault. Best-effort persistence.
+
+    Used for agent reflections, captured notes, etc. Returns the ingest
+    response dict or None on failure.
+    """
+    if not text or not text.strip():
+        return None
+    body: dict[str, Any] = {"text": text.strip()[:50000]}
+    if title:
+        body["title"] = title.strip()[:200]
+    if tags:
+        body["tags"] = [t.strip() for t in tags if t and t.strip()][:20]
+    if collection:
+        body["collection"] = collection
+    elif RAG_DEFAULT_COLLECTIONS:
+        body["collection"] = RAG_DEFAULT_COLLECTIONS[0]
+    return _post("/rag/ingest-text", body, timeout=20.0)
+
+
+def format_hits(response: dict, max_chars: int = 2000) -> str:
+    """Render a QueryResponse for human / LLM consumption.
+
+    Pulls ``payload.chunk_text`` (or ``payload.text``) plus source path
+    and score. Truncated to ``max_chars`` so it fits the bot's Telegram +
+    LLM message budget.
+    """
+    if not response or not response.get("hits"):
+        return "(no relevant snippets)"
+
+    parts: list[str] = []
+    for i, hit in enumerate(response["hits"], start=1):
+        payload = hit.get("payload") or {}
+        chunk = (payload.get("chunk_text") or payload.get("text") or "").strip()
+        source = payload.get("source_path") or payload.get("origin_file_name") or "?"
+        score = hit.get("score") or hit.get("rerank_score") or hit.get("rrf_score") or 0.0
+        # Compact source: just the file name, no full path
+        src_short = str(source).rsplit("/", 1)[-1]
+        parts.append(f"#{i} [{src_short}] (score={score:.3f})\n{chunk[:600]}")
+
+    out = "\n\n".join(parts)
+    if len(out) > max_chars:
+        out = out[: max_chars - 1] + "…"
+    return out
+
+
+def health() -> Optional[dict]:
+    """Probe ``/health`` — returns parsed body or None when unreachable."""
+    if not is_configured():
+        return None
+    import requests
+    try:
+        r = requests.get(f"{RAG_API_URL}/health", headers=_headers(), timeout=4.0)
+        if r.ok:
+            return r.json()
+    except Exception as e:
+        log.debug(f"RAG /health unreachable: {e}")
+    return None
diff --git a/src/llm/rag_mcp_client.py b/src/llm/rag_mcp_client.py
new file mode 100644
index 0000000..68950df
--- /dev/null
+++ b/src/llm/rag_mcp_client.py
@@ -0,0 +1,67 @@
+"""
+Skeleton for future MCP-client integration with an external RAG service.
+
+Today the bot talks to a RAG service via plain REST (`rag_client.py`) which
+is small, sync and dependency-light — fits the Pi Zero 2W's RAM budget.
+A future Option B would talk to the RAG service's MCP-SSE endpoint instead,
+which gives access to whatever tools that server exposes (`rag_search`,
+`rag_persist`, `rag_status`, …) dynamically rather than the curated REST
+surface this module wraps.
+
+Why it isn't wired in yet:
+
+  - the official `mcp` Python package pulls in `httpx[http2]`, `pydantic`,
+    `anyio`, etc. — non-trivial RAM hit on a 512 MB device
+  - SSE keeps a long-lived connection open per client, which doesn't play
+    nicely with the bot's "spawn-and-die" subprocess pattern for display
+    updates and other side jobs
+  - REST gets us 90 % of the value (search + persist) at 10 % of the cost
+
+When this gets activated:
+  1. add `mcp[cli]>=1.x` to requirements.txt
+  2. flesh out `RagMcpClient` below: `__aenter__`, `list_tools`,
+     `call_tool(name, args)`, retry/reconnect on SSE drops
+  3. extend `litellm_connector.TOOL_MAP` with a generic `rag_mcp_tool`
+     dispatcher OR auto-register every advertised MCP tool at startup
+  4. honour the same env vars as `rag_client.py` (`RAG_API_URL` /
+     `RAG_API_KEY`) but route to the MCP-SSE port (typically 8766)
+
+Until then this module is intentionally a placeholder so `import` doesn't
+break and the architectural shape is visible in the source.
+"""
+
+from __future__ import annotations
+
+import logging
+
+log = logging.getLogger(__name__)
+
+# Conventional SSE port for RAG-style MCP servers. Kept here so any future
+# activator only has to flip a flag rather than hunt constants. Override
+# via env or constructor when the actual server uses something else.
+DEFAULT_MCP_SSE_PORT = 8766
+
+
+def is_enabled() -> bool:
+    """Always False today — flips on in the future Option B PR."""
+    return False
+
+
+class RagMcpClient:
+    """Placeholder. Construct + call methods are stubs that raise."""
+
+    def __init__(self, base_url: str, api_key: str | None = None):
+        self.base_url = base_url
+        self.api_key = api_key
+
+    async def __aenter__(self):
+        raise NotImplementedError("RagMcpClient is a roadmap stub — see module docstring")
+
+    async def __aexit__(self, *exc):
+        return False
+
+    async def list_tools(self) -> list[dict]:
+        raise NotImplementedError("RagMcpClient is a roadmap stub")
+
+    async def call_tool(self, name: str, args: dict) -> dict:
+        raise NotImplementedError("RagMcpClient is a roadmap stub")

From 6a39e35c384965d4beb3117810d1770b37b2c7c8 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 03:32:30 +0200
Subject: [PATCH 13/21] feat(display): low-battery red accent on B variant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Demonstrates the new B-variant red layer with a real use case:
when a UPS HAT (C) is connected and reports < 20 % charge, the
" | 🔋NN%/X.XXV" suffix in the header stats line renders RED on
the panel instead of black. Healthy batteries / mono panels look
identical to before.

Implementation:
- render_ui() builds a parallel red_image (only on B variant) that
  stays all-white unless an accent is drawn into it.
- A best-effort battery probe (gracefully no-ops when no UPS HAT
  or smbus2 is missing) supplies the suffix.
- Below 20 %: prefix renders black, battery suffix renders into the
  red layer only — the panel composites black + red → suffix shows
  as red text inline.
- Otherwise: single black draw call, red layer stays blank.

Red is treated as an accent, never a background — so the user's
"don't paint the whole display red" rule is honoured.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/ui/gotchi_ui.py | 56 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 47 insertions(+), 9 deletions(-)

diff --git a/src/ui/gotchi_ui.py b/src/ui/gotchi_ui.py
index 9797f2d..d17897a 100644
--- a/src/ui/gotchi_ui.py
+++ b/src/ui/gotchi_ui.py
@@ -185,6 +185,23 @@ def render_ui(mood="happy", status_text="", fast_mode=True):
         WIDTH, HEIGHT = 250, 122
         image = Image.new('1', (WIDTH, HEIGHT), 255)
         draw = ImageDraw.Draw(image)
+
+        # B variant: build a parallel "red" image. All-white = no red pixels;
+        # we only paint into it for warning accents (e.g. low battery).
+        red_image = Image.new('1', (WIDTH, HEIGHT), 255) if EPD_VARIANT_B else None
+        red_draw = ImageDraw.Draw(red_image) if red_image is not None else None
+
+        # Best-effort battery probe — returns None when no UPS HAT or I2C off.
+        battery_text = ""
+        battery_low = False
+        try:
+            from hardware import battery as _battery
+            _b = _battery.read()
+            if _b is not None:
+                battery_text = _b.short()        # "🔋 87% / 8.12V"
+                battery_low = _b.percentage < 20  # red accent on B variant only
+        except Exception:
+            pass
         
         # --- FONTS ---
         try:
@@ -289,11 +306,29 @@ def draw_text_with_fallback(draw, xy, text, font, fallback_font, fill=0):
         draw.text((2, 1), display_name, font=font_ui, fill=0)
         
         # Right: Stats (Formatted clearly)
-        # e.g. T:45C | Free:120M | 14:00
+        # e.g. T:45C | Free:120M | 14:00 [| 🔋87%]
+        # The battery suffix renders as RED text on the B variant when low,
+        # otherwise black like the rest. Red is an accent only — never the
+        # background — so a healthy battery looks identical to the legacy
+        # display.
         txt_stats = f"T:{stats['temp']}°C | Free:{stats['mem_avail']}MB | {now}"
-        bbox = draw.textbbox((0, 0), txt_stats, font=font_ui)
+        battery_suffix = f" | {battery_text}" if battery_text else ""
+        full_stats = txt_stats + battery_suffix
+        bbox = draw.textbbox((0, 0), full_stats, font=font_ui)
         w = bbox[2] - bbox[0]
-        draw.text((WIDTH - w - 2, 1), txt_stats, font=font_ui, fill=0)
+        stats_x = WIDTH - w - 2
+
+        if battery_suffix and red_draw is not None and battery_low:
+            # Split: prefix in black layer, battery suffix in red layer only.
+            # Result on B panel: prefix renders black, suffix renders red.
+            bbox_pre = draw.textbbox((0, 0), txt_stats, font=font_ui)
+            pre_w = bbox_pre[2] - bbox_pre[0]
+            draw.text((stats_x, 1), txt_stats, font=font_ui, fill=0)
+            red_draw.text((stats_x + pre_w, 1), battery_suffix, font=font_ui, fill=0)
+        else:
+            # Mono variant, healthy battery, or no battery present:
+            # one black draw call, no red layer touched.
+            draw.text((stats_x, 1), full_stats, font=font_ui, fill=0)
         
         # Line
         draw.line((0, HEADER_H, WIDTH, HEADER_H), fill=0)
@@ -478,16 +513,19 @@ def get_wrapped_text(text, font, fallback_font, max_w):
         # Rotate 180 degrees if needed
         # image = image.rotate(180) # Uncomment if you want to test rotation
         rotated_image = image.rotate(180)
-        
+        rotated_red = red_image.rotate(180) if red_image is not None else None
+
         # Update Display
         #   mono variant: partial-base for fast_mode (no full refresh, lower flicker), full display() otherwise
-        #   B variant: only full refresh exists (3-color panel). display() takes (black, red); the red plane
-        #              stays empty (all-white image, all-0xFF buffer ⇒ no red pixels) so drawings render as
-        #              black-on-white. Red would need explicit drawing into a separate PIL image.
+        #   B variant: full refresh only (3-color panel). display() takes (black, red); when no warning
+        #              accent was drawn the red layer stays all-white (all-0xFF buffer ⇒ no red pixels)
+        #              and the panel renders pure black-on-white. Red is reserved for warning accents
+        #              (e.g. low-battery suffix), never used as background.
         if EPD_VARIANT_B:
             black_buf = epd.getbuffer(rotated_image)
-            red_blank = Image.new("1", rotated_image.size, 255)  # all white = no red
-            epd.display(black_buf, epd.getbuffer(red_blank))
+            red_buf = epd.getbuffer(rotated_red) if rotated_red is not None else \
+                      epd.getbuffer(Image.new("1", rotated_image.size, 255))
+            epd.display(black_buf, red_buf)
         else:
             if fast_mode:
                 epd.displayPartBaseImage(epd.getbuffer(rotated_image))

From 4315a39ece6e20e97e05590a6503f3e131650d83 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 03:33:44 +0200
Subject: [PATCH 14/21] docs(display-skill): document variant + red-as-accent
 rule
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The display skill currently tells the LLM "Colors: Black & white only".
That's correct for the mono panel but misleading once the B-variant
support lands — the bot needs to understand:

  - which physical panel is in play (selected via OCG_DISPLAY_VARIANT)
  - that there is no `RED:` directive it can emit
  - that red rendering is system-initiated (today: low battery <20 %)
  - the standing rule: red is an accent, never a background

This avoids the failure mode where the LLM, having been told "you can
control colors", asks for or describes red usage that doesn't exist —
or worse, instructs the bot to flood the screen red.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 gotchi-skills/display/SKILL.md | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/gotchi-skills/display/SKILL.md b/gotchi-skills/display/SKILL.md
index 3dc120b..3b3c289 100644
--- a/gotchi-skills/display/SKILL.md
+++ b/gotchi-skills/display/SKILL.md
@@ -74,9 +74,19 @@ Custom faces from `data/custom_faces.json` are merged with defaults on each rend
 ## Display Info
 
 - **Size:** 250x122 pixels
-- **Colors:** Black & white only
-- **Refresh:** ~2-3 seconds
-- **Ghosting:** Use `--full` to clear
+- **Variants:** two physical panels share the same code path (selected via `OCG_DISPLAY_VARIANT`):
+  - `mono` (default, `epd2in13_V4`): 2-color **black & white**, fast (~2 s) refresh, supports partial updates so face changes feel snappy.
+  - `b` (`epd2in13b_V4`): 3-color **black + red + white**, full refresh only (~15-20 s per update). The red plane is reserved for system-initiated warning accents — see "Color rule" below.
+- **Refresh:** ~2-3 s mono, ~15-20 s on B variant
+- **Ghosting:** Use `--full` to clear (mono only — B always full-refreshes)
+
+## Color rule (B variant)
+
+You **cannot** emit a "make this red" command — there is no `RED:` directive in the FACE/SAY/DISPLAY protocol. Red usage is decided by the bot's runtime code, not the LLM.
+
+When you DO see something rendered red on a B-variant panel, it means a system-level **warning** is active (today: low battery, < 20 %). Treat red as a hint to the user, not as an aesthetic.
+
+If you ever extend the protocol with an explicit red channel (e.g. a future `WARN:` directive), the rule remains: **red is an accent, never a background**. Never instruct the bot to "fill the screen red" or "make everything red" — that defeats the warning channel and looks broken.
 
 ## Do not
 

From 2fa300d3e0cfb2b1b511795ce9fba5ee8a945385 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 03:39:37 +0200
Subject: [PATCH 15/21] =?UTF-8?q?fix(battery):=20UPS=20HAT=20(C)=20is=201S?=
 =?UTF-8?q?=20not=202S=20=E2=80=94=20fix=20percentage=20formula?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Waveshare's UPS HAT (C) ships with a single 18650 cell (1S), not the
2S pack the original code assumed. A fully-charged 4.2 V cell mapped
to (4.2 − 6.0) / 2.4 = −0.75 → clamped to 0 %, so users always saw
"empty" regardless of actual charge.

Switch the linear voltage→percent map to 1S range:
  empty 3.0 V → 0 %
  full  4.2 V → 100 %

Verified on hardware: 4.09 V → 91 % (consistent with a near-full
cell with ~5 % discharge tolerance).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/hardware/battery.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/hardware/battery.py b/src/hardware/battery.py
index ccb2708..41386dd 100644
--- a/src/hardware/battery.py
+++ b/src/hardware/battery.py
@@ -3,10 +3,11 @@
 
 Optional hardware addon: https://www.waveshare.com/wiki/UPS_HAT_(C)
 
-Returns voltage, current, charge state and a 0-100 percentage based on the
-2x 18650 pack (3.0–4.2 V per cell, 6.0–8.4 V total). Auto-detects the
-sensor; if absent or I2C disabled, every public function returns None
-without raising — callers can use `is_available()` to gate UI.
+Returns voltage, current, charge state and a 0-100 percentage based on
+the **single 18650 cell** Waveshare ships with the UPS HAT (C)
+(3.0 V empty → 4.2 V full). Auto-detects the sensor; if absent or
+I2C is disabled, every public function returns ``None`` without
+raising — callers can use ``is_available()`` to gate UI.
 
 Adapted from Waveshare's INA219.py demo, simplified to a single-shot
 reader (the bot polls infrequently — no need for shared state).
@@ -104,12 +105,14 @@ def _calibrate(bus) -> None:
 
 
 def _percentage_from_voltage(volts: float) -> int:
-    """Map bus voltage of a 2S 18650 pack to a 0–100 percentage.
+    """Map bus voltage of a 1S 18650 cell to a 0–100 percentage.
 
-    Empty ≈ 6.0 V (3.0 V/cell), full ≈ 8.4 V (4.2 V/cell).
-    Linear approximation — close enough for a status indicator.
+    UPS HAT (C) is a single-cell (1S) design. Empty ≈ 3.0 V,
+    full ≈ 4.2 V. Linear approximation — close enough for a status
+    indicator; real Li-ion cells have a non-linear discharge curve
+    but the user mostly cares about "low / mid / high".
     """
-    pct = (volts - 6.0) / (8.4 - 6.0) * 100.0
+    pct = (volts - 3.0) / (4.2 - 3.0) * 100.0
     return max(0, min(100, int(round(pct))))
 
 

From 9a3f5c16f664de844a67accda16bf154eceb3bf1 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 03:58:01 +0200
Subject: [PATCH 16/21] fix: restore handlers + main + config after partial
 branch-checkout damage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An earlier `git checkout feat/bot-rag-integration -- src/...` to bring
the cleaned RAG client onto deploy/all-features replaced the merged
deploy versions of bot/handlers.py, main.py, src/config.py with the
PR #10 branch's narrower versions, dropping cmd_model / cb_model /
cmd_update / cmd_battery and the OLLAMA_* config. The bot crash-looped
on startup (ImportError: cannot import name 'cmd_model').

Restore the merged versions (sourced from 288aa3d, the last good
deploy commit) and re-add the cmd_rag / RAG_* additions on top so all
features coexist. No upstream PR is affected — these files on the
upstream PR branches stay scoped to their feature.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/bot/handlers.py | 384 +++++++++++++++++++++++++++++++++++---------
 src/config.py       |   7 +-
 src/main.py         |   3 +-
 3 files changed, 314 insertions(+), 80 deletions(-)

diff --git a/src/bot/handlers.py b/src/bot/handlers.py
index 0a5f651..55a6a25 100644
--- a/src/bot/handlers.py
+++ b/src/bot/handlers.py
@@ -8,7 +8,7 @@
 import tempfile
 from pathlib import Path
 
-from telegram import Update
+from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
 from telegram.constants import ChatAction
 from telegram.ext import ContextTypes
 
@@ -29,7 +29,7 @@
 from memory.summarize import optimize_history
 from cron.scheduler import add_cron_job, list_cron_jobs, remove_cron_job
 from skills.loader import get_eligible_skills
-from config import LLM_PRESETS, OPENAI_API_KEY
+from config import LLM_PRESETS, OPENAI_API_KEY, OLLAMA_API_BASE
 from llm.prompts import build_system_context, build_vault_context
 
 log = logging.getLogger(__name__)
@@ -443,81 +443,6 @@ async def cmd_recall(update: Update, context: ContextTypes.DEFAULT_TYPE):
     await update.message.reply_text(msg)
 
 
-async def cmd_rag(update: Update, context: ContextTypes.DEFAULT_TYPE):
-    """Ad-hoc query against the configured RAG knowledge vault.
-
-    Usage:
-      /rag <query>          → search, top 5 hits
-      /rag --top 10 <query> → search, top 10 hits
-      /rag                  → show config + reachability
-    """
-    if not is_allowed(update.effective_user.id, update.effective_chat.id):
-        return
-
-    from llm import rag_client
-
-    args = list(context.args or [])
-    if not args:
-        # Status mode — show config + reachability
-        if not rag_client.is_configured():
-            await update.message.reply_text(
-                "🧠 *RAG* not configured.\n\n"
-                "Set `RAG_API_URL=http://your-rag-host:8765` in `.env` and restart the bot.",
-                parse_mode="Markdown",
-            )
-            return
-        h = rag_client.health()
-        if h is None:
-            from config import RAG_API_URL
-            await update.message.reply_text(
-                f"🧠 RAG configured at `{RAG_API_URL}` but unreachable.",
-                parse_mode="Markdown",
-            )
-            return
-        comps = h.get("components") or []
-        lines = [f"✅ RAG *{h.get('version','?')}* online", "", "*Components:*"]
-        for c in comps:
-            sym = "✅" if c.get("healthy") else "❌"
-            lines.append(f"  {sym} {c.get('name')} ({c.get('latency_ms', '?'):.1f} ms)" if isinstance(c.get('latency_ms'), (int, float)) else f"  {sym} {c.get('name')}")
-        await update.message.reply_text("\n".join(lines), parse_mode="Markdown")
-        return
-
-    # Parse --top N
-    top_k = 5
-    if args[0] == "--top" and len(args) >= 3:
-        try:
-            top_k = max(1, min(int(args[1]), 50))
-            args = args[2:]
-        except ValueError:
-            pass
-
-    query = " ".join(args).strip()
-    if not query:
-        await update.message.reply_text("Usage: `/rag <query>`", parse_mode="Markdown")
-        return
-
-    if not rag_client.is_configured():
-        await update.message.reply_text("🧠 RAG not configured. Set `RAG_API_URL` in `.env`.", parse_mode="Markdown")
-        return
-
-    await update.message.chat.send_action(action=ChatAction.TYPING)
-    import asyncio
-    response = await asyncio.to_thread(rag_client.query, query, top_k)
-    if response is None:
-        await update.message.reply_text("❌ RAG service unreachable.")
-        return
-
-    formatted = rag_client.format_hits(response, max_chars=3500)
-    duration = response.get("duration_ms", 0)
-    reranked = " (reranked)" if response.get("reranked") else ""
-    header = f"🧠 *{len(response.get('hits') or [])} hits* in {duration:.0f} ms{reranked}\n\n"
-    # Telegram Markdown is finicky; use plain text body inside backticks
-    await update.message.reply_text(
-        header + "```\n" + formatted + "\n```",
-        parse_mode="Markdown",
-    )
-
-
 async def cmd_vault(update: Update, context: ContextTypes.DEFAULT_TYPE):
     """Show vault status."""
     user = update.effective_user
@@ -961,11 +886,314 @@ async def cmd_use(update: Update, context: ContextTypes.DEFAULT_TYPE):
     if "gemini" in model_key: emoji = "♊️"
     
     await update.message.reply_text(f"{emoji} Switched to *{model_key.upper()}*!\nModel: {preset['model']}", parse_mode="Markdown")
-    
+
     # Visual update
     show_face(mood="happy", text=f"Model: {model_key.upper()}")
 
 
+# --- /model command: inline-button picker with live Ollama discovery ---
+
+_MODEL_EMOJI = {"gemini": "♊️", "glm": "🇨🇳", "ollama": "🦙"}
+
+
+def _ollama_list_with_capabilities(timeout: float = 4.0) -> list[dict]:
+    """Fetch installed Ollama models + capabilities. Returns [{name, supports_tools}]."""
+    import requests
+    base = (OLLAMA_API_BASE or "").rstrip("/")
+    if not base:
+        return []
+    try:
+        r = requests.get(f"{base}/api/tags", timeout=timeout)
+        r.raise_for_status()
+        names = [m.get("name") for m in r.json().get("models", []) if m.get("name")]
+    except Exception as e:
+        log.warning(f"Ollama /api/tags failed: {e}")
+        return []
+
+    out = []
+    for name in names:
+        supports = False
+        try:
+            sr = requests.post(f"{base}/api/show", json={"model": name}, timeout=timeout)
+            if sr.ok:
+                caps = sr.json().get("capabilities") or []
+                supports = "tools" in caps
+        except Exception:
+            pass
+        out.append({"name": name, "supports_tools": supports})
+    return out
+
+
+def _top_model_markup(current: str) -> InlineKeyboardMarkup:
+    rows = []
+    for key in LLM_PRESETS.keys():
+        emoji = _MODEL_EMOJI.get(key, "🔹")
+        active = LLM_PRESETS[key]["model"] == current or (
+            key == "ollama" and isinstance(current, str) and current.startswith("ollama_chat/")
+        )
+        marker = " ✅" if active else ""
+        suffix = " ▸" if key == "ollama" else ""
+        rows.append([InlineKeyboardButton(f"{emoji} {key}{marker}{suffix}", callback_data=f"model:{key}")])
+    return InlineKeyboardMarkup(rows)
+
+
+async def cmd_model(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Show inline buttons to switch LLM model. With argument acts like /use."""
+    if not is_allowed(update.effective_user.id, update.effective_chat.id):
+        return
+
+    if context.args:
+        return await cmd_use(update, context)
+
+    router = get_router()
+    current = router.litellm.model
+    text = f"🦄 *Current:* `{current}`\n\nPick a model:"
+    await update.message.reply_text(text, parse_mode="Markdown", reply_markup=_top_model_markup(current))
+
+
+async def cb_model(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Callback for /model inline buttons (presets + Ollama submenu)."""
+    import asyncio
+    query = update.callback_query
+    if not is_allowed(query.from_user.id, query.message.chat_id):
+        await query.answer("Not allowed", show_alert=True)
+        return
+    await query.answer()
+
+    data = query.data or ""
+    router = get_router()
+
+    # Specific Ollama model switch: omd:<name>
+    if data.startswith("omd:"):
+        model_name = data.split(":", 1)[1]
+        full = f"ollama_chat/{model_name}"
+        router.litellm.set_model(full, OLLAMA_API_BASE)
+        router.force_lite = True
+        await query.edit_message_text(
+            f"🦙 Switched to *Ollama / {model_name}*\n`{full}`",
+            parse_mode="Markdown"
+        )
+        show_face(mood="happy", text=f"Ollama: {model_name[:20]}")
+        return
+
+    key = data.split(":", 1)[-1]
+
+    # Back to top menu
+    if key == "back":
+        await query.edit_message_text(
+            f"🦄 *Current:* `{router.litellm.model}`\n\nPick a model:",
+            parse_mode="Markdown",
+            reply_markup=_top_model_markup(router.litellm.model)
+        )
+        return
+
+    # Ollama: fetch and show submenu (only tool-capable models)
+    if key == "ollama":
+        await query.edit_message_text("🦙 Fetching models from Ollama server…")
+        models = await asyncio.to_thread(_ollama_list_with_capabilities)
+
+        if not models:
+            await query.edit_message_text(
+                f"❌ Could not reach Ollama at `{OLLAMA_API_BASE}`",
+                parse_mode="Markdown",
+                reply_markup=InlineKeyboardMarkup([[InlineKeyboardButton("◂ Back", callback_data="model:back")]])
+            )
+            return
+
+        tool_models = [m for m in models if m["supports_tools"]]
+        show_models = tool_models if tool_models else models
+        note = "" if tool_models else "\n⚠️ _No tool-capable models found, showing all_"
+
+        rows = []
+        for m in show_models:
+            name = m["name"]
+            cb = f"omd:{name}"
+            if len(cb.encode("utf-8")) > 60:
+                continue  # Telegram callback_data limit (64 bytes)
+            tag = "🔧" if m["supports_tools"] else "🔸"
+            rows.append([InlineKeyboardButton(f"{tag} {name}", callback_data=cb)])
+        rows.append([InlineKeyboardButton("◂ Back", callback_data="model:back")])
+
+        await query.edit_message_text(
+            f"🦙 *Ollama models* ({len(show_models)}){note}\n\n🔧 = supports tools",
+            parse_mode="Markdown",
+            reply_markup=InlineKeyboardMarkup(rows)
+        )
+        return
+
+    # Static presets (gemini, glm)
+    if key not in LLM_PRESETS:
+        await query.edit_message_text("❌ Unknown model.")
+        return
+
+    preset = LLM_PRESETS[key]
+    router.litellm.set_model(preset["model"], preset["api_base"])
+    router.force_lite = True
+
+    emoji = _MODEL_EMOJI.get(key, "🔹")
+    await query.edit_message_text(
+        f"{emoji} Switched to *{key.upper()}*\n`{preset['model']}`",
+        parse_mode="Markdown"
+    )
+    show_face(mood="happy", text=f"Model: {key.upper()}")
+
+
+async def cmd_update(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Pull latest code from upstream, refresh deps, restart service."""
+    import asyncio
+    import subprocess
+    from config import PROJECT_DIR, get_admin_id
+
+    user_id = update.effective_user.id
+    chat_id = update.effective_chat.id
+    if not is_allowed(user_id, chat_id):
+        return
+
+    # Owner-only — don't let any allowed user remote-update the bot
+    admin_id = get_admin_id()
+    if admin_id and user_id != admin_id:
+        await update.message.reply_text("⛔ Owner-only command.")
+        return
+
+    script = PROJECT_DIR / "scripts" / "auto_update.sh"
+    if not script.exists():
+        await update.message.reply_text(f"❌ Update script not found: `{script}`", parse_mode="Markdown")
+        return
+
+    check_only = bool(context.args and context.args[0].lower() in ("check", "--check"))
+
+    msg = await update.message.reply_text("🔍 Checking for updates…" if check_only else "⬇️ Updating…")
+
+    try:
+        cmd = ["bash", str(script)] + (["--check"] if check_only else [])
+        proc = await asyncio.to_thread(
+            subprocess.run, cmd,
+            cwd=str(PROJECT_DIR),
+            capture_output=True, text=True, timeout=300
+        )
+        out = (proc.stdout or "") + (proc.stderr or "")
+        out = out.strip()[-3500:]  # Telegram message size budget
+
+        # check-mode: exit 0 = updates available, 1 = up-to-date
+        if check_only:
+            status = "🆕 Updates available" if proc.returncode == 0 else "✅ Up-to-date"
+            await msg.edit_text(f"{status}\n\n```\n{out}\n```", parse_mode="Markdown")
+            return
+
+        if proc.returncode == 0:
+            await msg.edit_text(f"✅ Update complete\n\n```\n{out}\n```", parse_mode="Markdown")
+            show_face(mood="excited", text="Updated!")
+        elif proc.returncode == 4:
+            await msg.edit_text(
+                f"⚠️ Update failed — auto-rolled back to previous version\n\n```\n{out}\n```",
+                parse_mode="Markdown"
+            )
+            show_face(mood="confused", text="Update rolled back")
+        else:
+            await msg.edit_text(f"❌ Update failed (exit {proc.returncode})\n\n```\n{out}\n```", parse_mode="Markdown")
+            show_face(mood="confused", text="Update failed")
+    except subprocess.TimeoutExpired:
+        await msg.edit_text("❌ Update timed out after 5 min.")
+    except Exception as e:
+        await msg.edit_text(f"❌ Update error: `{e}`", parse_mode="Markdown")
+
+
+async def cmd_battery(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Handle /battery command — show UPS HAT (C) status."""
+    if not is_allowed(update.effective_user.id, update.effective_chat.id):
+        return
+
+    from hardware import battery
+
+    reading = battery.read()
+    if reading is None:
+        await update.message.reply_text(
+            "🔌 No UPS HAT detected.\n"
+            "Make sure I2C is enabled and the UPS HAT (C) is connected, "
+            "then `/battery` again. (Check `i2cdetect -y 1` should list 0x43.)",
+            parse_mode="Markdown",
+        )
+        return
+
+    await update.message.reply_text(reading.long())
+
+
+async def cmd_rag(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    """Ad-hoc query against the configured RAG knowledge vault.
+
+    Usage:
+      /rag <query>          → search, top 5 hits
+      /rag --top 10 <query> → search, top 10 hits
+      /rag                  → show config + reachability
+    """
+    if not is_allowed(update.effective_user.id, update.effective_chat.id):
+        return
+
+    from llm import rag_client
+
+    args = list(context.args or [])
+    if not args:
+        if not rag_client.is_configured():
+            await update.message.reply_text(
+                "🧠 *RAG* not configured.\n\n"
+                "Set `RAG_API_URL=http://your-rag-host:8765` in `.env` and restart the bot.",
+                parse_mode="Markdown",
+            )
+            return
+        h = rag_client.health()
+        if h is None:
+            from config import RAG_API_URL
+            await update.message.reply_text(
+                f"🧠 RAG configured at `{RAG_API_URL}` but unreachable.",
+                parse_mode="Markdown",
+            )
+            return
+        comps = h.get("components") or []
+        lines = [f"✅ RAG *{h.get('version','?')}* online", "", "*Components:*"]
+        for c in comps:
+            sym = "✅" if c.get("healthy") else "❌"
+            lat = c.get("latency_ms")
+            if isinstance(lat, (int, float)):
+                lines.append(f"  {sym} {c.get('name')} ({lat:.1f} ms)")
+            else:
+                lines.append(f"  {sym} {c.get('name')}")
+        await update.message.reply_text("\n".join(lines), parse_mode="Markdown")
+        return
+
+    top_k = 5
+    if args[0] == "--top" and len(args) >= 3:
+        try:
+            top_k = max(1, min(int(args[1]), 50))
+            args = args[2:]
+        except ValueError:
+            pass
+
+    query = " ".join(args).strip()
+    if not query:
+        await update.message.reply_text("Usage: `/rag <query>`", parse_mode="Markdown")
+        return
+
+    if not rag_client.is_configured():
+        await update.message.reply_text("🧠 RAG not configured. Set `RAG_API_URL` in `.env`.", parse_mode="Markdown")
+        return
+
+    await update.message.chat.send_action(action=ChatAction.TYPING)
+    import asyncio
+    response = await asyncio.to_thread(rag_client.query, query, top_k)
+    if response is None:
+        await update.message.reply_text("❌ RAG service unreachable.")
+        return
+
+    formatted = rag_client.format_hits(response, max_chars=3500)
+    duration = response.get("duration_ms", 0)
+    reranked = " (reranked)" if response.get("reranked") else ""
+    header = f"🧠 *{len(response.get('hits') or [])} hits* in {duration:.0f} ms{reranked}\n\n"
+    await update.message.reply_text(
+        header + "```\n" + formatted + "\n```",
+        parse_mode="Markdown",
+    )
+
+
 async def cmd_memory(update: Update, context: ContextTypes.DEFAULT_TYPE):
     """Handle /memory command — show database stats."""
     user = update.effective_user
diff --git a/src/config.py b/src/config.py
index aafac39..0227b89 100644
--- a/src/config.py
+++ b/src/config.py
@@ -34,10 +34,11 @@ def _env_flag(name: str, default: bool = False) -> bool:
 GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini/gemini-1.5-flash")
 GEMINI_API_BASE = os.environ.get("GEMINI_API_BASE", "")  # Optional override for Z.ai/OpenAI
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
+OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5:14b")
+OLLAMA_API_BASE = os.environ.get("OLLAMA_API_BASE", "http://ollama-server:11434")
 
 # Optional external RAG (Retrieval-Augmented Generation) service.
 # When RAG_API_URL is empty the rag tools degrade gracefully (no-op).
-# Expected REST contract is documented in src/llm/rag_client.py.
 RAG_API_URL = os.environ.get("RAG_API_URL", "").rstrip("/")
 RAG_API_KEY = os.environ.get("RAG_API_KEY", "")
 RAG_DEFAULT_COLLECTIONS = [
@@ -64,6 +65,10 @@ def _env_flag(name: str, default: bool = False) -> bool:
     "glm": {
         "model": "anthropic/glm-5.1",
         "api_base": "https://api.z.ai/api/anthropic"
+    },
+    "ollama": {
+        "model": f"ollama_chat/{OLLAMA_MODEL}",
+        "api_base": OLLAMA_API_BASE
     }
 }
 
diff --git a/src/main.py b/src/main.py
index 510d5dd..c205585 100644
--- a/src/main.py
+++ b/src/main.py
@@ -31,7 +31,7 @@
 from bot.handlers import (
     cmd_start, cmd_clear, cmd_context, cmd_status, cmd_xp, cmd_pro, cmd_use,
     cmd_remember, cmd_recall, cmd_vault, cmd_cron, cmd_jobs, cmd_memory, cmd_health,
-    cmd_model, cb_model, cmd_update, cmd_battery, handle_message, handle_voice
+    cmd_model, cb_model, cmd_update, cmd_battery, cmd_rag, handle_message, handle_voice
 )
 from bot.heartbeat import send_heartbeat
 from hooks.runner import run_hook, HookEvent, discover_and_load_hooks
@@ -274,6 +274,7 @@ async def post_init(application: Application):
     app.add_handler(CommandHandler("remember", cmd_remember))
     app.add_handler(CommandHandler("recall", cmd_recall))
     app.add_handler(CommandHandler("vault", cmd_vault))
+    app.add_handler(CommandHandler("rag", cmd_rag))
     app.add_handler(CommandHandler("cron", cmd_cron))
     app.add_handler(CommandHandler("jobs", cmd_jobs))
 

From 00462e4ba4a844a3420953a62ead78b28c7c43f2 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 04:13:31 +0200
Subject: [PATCH 17/21] fix(display): propagate BOT_NAME + move battery to
 footer centre
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two issues seen on B-variant hardware after bot-driven display
updates (FACE: / SAY: from LLM output):

1. Top-left always showed "Gotchi" instead of the configured BOT_NAME
   (e.g. "Clotchi"). Cause: `sudo` strips env_reset Defaults, so the
   subprocess's `os.environ.get("BOT_NAME")` fell back to the literal
   "Gotchi" default. Fix: also propagate BOT_NAME, OWNER_NAME and
   BOT_LANGUAGE through the existing `sudo /usr/bin/env VAR=val ...`
   wrapper that already handles OCG_DISPLAY_VARIANT and friends.

2. Battery suffix in the header stats line pushed the bot name on
   the top-left off-screen / under other content when the line got
   long. Move it into the footer centre instead — the footer has
   spare horizontal space between the status text on the left and
   the XP indicator on the right. On B variant a low-charge battery
   renders into the red layer (red text accent) instead of black;
   above the threshold or on mono panels it stays black.

Status text is now truncated to 30 chars (was 35) so it doesn't
overlap the centred battery cell on long messages.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/hardware/display.py |  6 ++++-
 src/ui/gotchi_ui.py     | 49 ++++++++++++++++++++++-------------------
 2 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/src/hardware/display.py b/src/hardware/display.py
index baa95f3..82093a3 100644
--- a/src/hardware/display.py
+++ b/src/hardware/display.py
@@ -106,7 +106,11 @@ def update_display(mood: str = None, text: str = None, full_refresh: bool = Fals
     # modern kernel renders inverted colors.
     propagate_env = {
         k: v for k, v in os.environ.items()
-        if k in ("OCG_DISPLAY_VARIANT", "GPIOZERO_PIN_FACTORY", "OCG_UPS_BUS", "OCG_UPS_ADDR")
+        if k in (
+            "OCG_DISPLAY_VARIANT", "GPIOZERO_PIN_FACTORY",
+            "OCG_UPS_BUS", "OCG_UPS_ADDR",
+            "BOT_NAME", "OWNER_NAME", "BOT_LANGUAGE",
+        )
     }
     cmd = ["sudo", "/usr/bin/env"]
     cmd.extend(f"{k}={v}" for k, v in propagate_env.items())
diff --git a/src/ui/gotchi_ui.py b/src/ui/gotchi_ui.py
index d17897a..f554c85 100644
--- a/src/ui/gotchi_ui.py
+++ b/src/ui/gotchi_ui.py
@@ -306,29 +306,13 @@ def draw_text_with_fallback(draw, xy, text, font, fallback_font, fill=0):
         draw.text((2, 1), display_name, font=font_ui, fill=0)
         
         # Right: Stats (Formatted clearly)
-        # e.g. T:45C | Free:120M | 14:00 [| 🔋87%]
-        # The battery suffix renders as RED text on the B variant when low,
-        # otherwise black like the rest. Red is an accent only — never the
-        # background — so a healthy battery looks identical to the legacy
-        # display.
+        # e.g. T:45C | Free:120M | 14:00
+        # Battery info is rendered separately in the footer (not here) so the
+        # bot name on the left isn't pushed off-screen by long stats lines.
         txt_stats = f"T:{stats['temp']}°C | Free:{stats['mem_avail']}MB | {now}"
-        battery_suffix = f" | {battery_text}" if battery_text else ""
-        full_stats = txt_stats + battery_suffix
-        bbox = draw.textbbox((0, 0), full_stats, font=font_ui)
+        bbox = draw.textbbox((0, 0), txt_stats, font=font_ui)
         w = bbox[2] - bbox[0]
-        stats_x = WIDTH - w - 2
-
-        if battery_suffix and red_draw is not None and battery_low:
-            # Split: prefix in black layer, battery suffix in red layer only.
-            # Result on B panel: prefix renders black, suffix renders red.
-            bbox_pre = draw.textbbox((0, 0), txt_stats, font=font_ui)
-            pre_w = bbox_pre[2] - bbox_pre[0]
-            draw.text((stats_x, 1), txt_stats, font=font_ui, fill=0)
-            red_draw.text((stats_x + pre_w, 1), battery_suffix, font=font_ui, fill=0)
-        else:
-            # Mono variant, healthy battery, or no battery present:
-            # one black draw call, no red layer touched.
-            draw.text((stats_x, 1), full_stats, font=font_ui, fill=0)
+        draw.text((WIDTH - w - 2, 1), txt_stats, font=font_ui, fill=0)
         
         # Line
         draw.line((0, HEADER_H, WIDTH, HEADER_H), fill=0)
@@ -370,13 +354,32 @@ def draw_text_with_fallback(draw, xy, text, font, fallback_font, fill=0):
         except Exception:
             xp_str = ""
         
-        # Draw status on left, XP on right
-        draw.text((4, HEIGHT - FOOTER_H + 1), status_text[:35], font=font_ui, fill=0)
+        # Footer layout: status (left) | battery (centre) | XP (right).
+        # The battery cell lives in the footer rather than the header so the
+        # bot name on the top-left has room and the panel can show all three
+        # at once. On the B variant we render the battery suffix into the
+        # red layer when battery_low — otherwise normal black ink.
+        draw.text((4, HEIGHT - FOOTER_H + 1), status_text[:30], font=font_ui, fill=0)
+
+        xp_w = 0
         if xp_str:
             bbox_xp = draw.textbbox((0, 0), xp_str, font=font_ui)
             xp_w = bbox_xp[2] - bbox_xp[0]
             draw.text((WIDTH - xp_w - 4, HEIGHT - FOOTER_H + 1), xp_str, font=font_ui, fill=0)
 
+        if battery_text:
+            bbox_bat = draw.textbbox((0, 0), battery_text, font=font_ui)
+            bat_w = bbox_bat[2] - bbox_bat[0]
+            bat_x = (WIDTH - bat_w) // 2
+            bat_y = HEIGHT - FOOTER_H + 1
+            if red_draw is not None and battery_low:
+                # Render battery in the red layer only — appears red on the
+                # B panel, signalling low charge as an accent (never a
+                # background).
+                red_draw.text((bat_x, bat_y), battery_text, font=font_ui, fill=0)
+            else:
+                draw.text((bat_x, bat_y), battery_text, font=font_ui, fill=0)
+
         # 4. Main Content (Face + Bubble)
         
         # Face selection — THE SINGLE SOURCE OF TRUTH!

From 140d3bd9c9be080685b67ff9cd9cc77c024c9a2f Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 04:16:29 +0200
Subject: [PATCH 18/21] fix(auto_mood): don't duplicate header metrics in the
 footer status text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`apply_auto_mood()` returns a (mood, text) tuple. The text becomes the
footer status_text on the E-Ink panel, while the header simultaneously
renders the same metrics in its always-on stats line
(`T:51°C | Free:79MB | …`). For low RAM / high temp the auto-mood text
read e.g. "Low RAM: 79MB" or "Hot! 51°C" — exactly what the header
already shows, just one frame older. The two numbers drift between
frames and the duplicate crowds an already tight 250×122 layout.

Drop the metric values from the warning text and keep the warning
itself ("RAM low", "Running hot", "OVERHEATING!", "OOM!"). The header
keeps reporting the live numbers; the footer adds the qualitative
warning beside them. No mood mapping changes, no thresholds change,
no API change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/hardware/auto_mood.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/hardware/auto_mood.py b/src/hardware/auto_mood.py
index 6f07668..02a4f60 100644
--- a/src/hardware/auto_mood.py
+++ b/src/hardware/auto_mood.py
@@ -76,18 +76,23 @@ def get_auto_mood() -> tuple[str, str]:
     # Priority checks (highest priority first)
     
     # 1. CRITICAL states
+    # Note: keep the auto-mood text concise and free of metric values that
+    # the header already displays (T:°C, Free:MB). The header is rendered on
+    # every frame and re-shows the live numbers; duplicating them in the
+    # footer status_text creates two values that drift out of sync between
+    # frames and crowds the layout.
     if temp >= TEMP_CRITICAL:
-        return "dead", f"OVERHEATING {temp}°C!"
-    
+        return "dead", "OVERHEATING!"
+
     if ram_free <= RAM_CRITICAL:
-        return "dead", f"OOM! {ram_free}MB left"
-    
-    # 2. Warning states  
+        return "dead", "OOM!"
+
+    # 2. Warning states
     if temp >= TEMP_HOT:
-        return "nervous", f"Hot! {temp}°C"
-    
+        return "nervous", "Running hot"
+
     if ram_free <= RAM_LOW:
-        return "nervous", f"Low RAM: {ram_free}MB"
+        return "nervous", "RAM low"
     
     # 3. Achievement states
     if uptime_seconds >= UPTIME_LEGEND:

From 420539ee1313b65edb492b50f4299b855ec4ee36 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 04:32:58 +0200
Subject: [PATCH 19/21] fix(heartbeat): pin language at BOTH ends of the
 reflection prompt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Single-side pinning (only at the end) wasn't enough — the long English
HEARTBEAT.md template still pulled the model into English on a
fraction of heartbeats even when BOT_LANGUAGE was set. Wrap the prompt
in the configured language at the start AND at the end so whichever
side the model anchors to, the language is set.

The front pin uses the user's language directly (e.g. German), making
the very first thing the model reads a strong directive in the target
language. The end pin restates the requirement just before generation
starts. Both reference each other so it's clear they're the same rule.

Behaviour for `BOT_LANGUAGE=en` or unset: no change, no extra prompt,
no change.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/bot/heartbeat.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/bot/heartbeat.py b/src/bot/heartbeat.py
index 8325720..4dc7439 100644
--- a/src/bot/heartbeat.py
+++ b/src/bot/heartbeat.py
@@ -295,10 +295,11 @@ async def send_heartbeat(context):
 
     prompt += "\n\n[Reflect. Think out loud. Then FACE: and SAY:]"
 
-    # The HEARTBEAT.md template is English; without a final language pin
-    # the model defaults to English even when BOT_LANGUAGE is set, because
-    # the long English user prompt overpowers the system-level directive.
-    # Reinforce the pin here so the reflection itself follows BOT_LANGUAGE.
+    # The HEARTBEAT.md template is English; without a hard language pin
+    # the model defaults to English part of the time even when BOT_LANGUAGE
+    # is set, because the long English user prompt overpowers the
+    # system-level directive. Pin the language at BOTH ends of the prompt
+    # so the model can't drift no matter where its attention is anchored.
     from config import BOT_LANGUAGE
     _LANG_NAMES = {
         "de": "Deutsch", "en": "English", "ru": "Русский", "es": "Español",
@@ -308,7 +309,19 @@ async def send_heartbeat(context):
     _lang_code = (BOT_LANGUAGE or "").strip().lower()
     if _lang_code and _lang_code != "en":
         _lang_name = _LANG_NAMES.get(_lang_code, _lang_code)
-        prompt += f"\n\nIMPORTANT: write the reflection text and the SAY: bubble in **{_lang_name}**, not English. The template above is English only because it's a system instruction — your output must be in {_lang_name}."
+        # Front pin: forces language before the English template starts.
+        prompt = (
+            f"## Sprache der Antwort: {_lang_name}\n\n"
+            f"Schreibe **die GESAMTE Antwort** auf {_lang_name}. Jedes Wort, jeder "
+            f"Satz, auch die SAY:- und FACE:-Zeilen am Ende. Die Vorlage unten ist "
+            f"nur auf Englisch, weil sie eine Systemanweisung ist — DEINE Antwort "
+            f"muss auf {_lang_name} sein.\n\n---\n\n"
+        ) + prompt
+        # End pin: last word the model reads before generating its first token.
+        prompt += (
+            f"\n\n[Reflektiere auf {_lang_name}. Antworte auf {_lang_name}. "
+            f"Nicht auf Englisch. {_lang_name} only.]"
+        )
     
     # 7. Call LLM
     router = get_router()

From 5829c39fdca407e5b16e8a6e49ac18b99a36263c Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 04:44:54 +0200
Subject: [PATCH 20/21] =?UTF-8?q?feat(mcp):=20bot=20as=20MCP=20client=20?=
 =?UTF-8?q?=E2=80=94=20minimal=20SSE=20client=20+=20LLM=20tools?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lets the bot consume tools advertised by any external MCP server
that speaks the SSE transport, without dragging in the official
`mcp[cli]` Python package (it pulls `cryptography`, `pydantic-settings`,
`starlette`, `uvicorn`, `pyjwt`, `httpx-sse`, `sse-starlette`,
`python-multipart` — non-trivial RAM hit on a 512 MB Pi Zero 2W).

What's added
- src/llm/rag_mcp_client.py — hand-rolled MCP-over-SSE client,
  ~250 LoC, stdlib-only + `requests` (already in the venv via
  litellm). Background thread reads the SSE stream and dispatches
  JSON-RPC responses by id; sync `connect()` / `initialize()` /
  `list_tools()` / `call_tool(name, args)` API. A module-level
  `get_client()` returns a lazy singleton so multiple tool calls
  share one SSE connection.
- Two new LLM tools wired into TOOL_MAP:
    `mcp_list_tools()` — return advertised tool names + descriptions
    `mcp_call_tool(name, arguments)` — invoke by name; arguments is
                                       a JSON object passed as a string
                                       (the LLM emits one).
  Both gracefully no-op when the MCP path isn't configured, returning
  a clear hint instead of raising.

Activation
- Env var `RAG_TRANSPORT=rest|mcp` (default `rest`). When `mcp`,
  `RAG_API_URL` is interpreted as the MCP-SSE base URL (e.g.
  `http://your-rag-host:8766`).
- Reuses `RAG_API_KEY` for optional Bearer auth.
- No new top-level dependencies.

Tested against rag-core's MCP-SSE endpoint
(advertised tools: rag_search, rag_persist, rag_status,
 rag_list_collections, rag_recall_session, rag_session_announce,
 rag_session_forget). `tools/list` returns the catalog; `tools/call`
 dispatches and returns the rendered text content correctly.

Out of scope (separate follow-ups)
- Auto-registration of advertised MCP tools as first-class TOOL_MAP
  entries (each with its own typed JSON schema). Today the LLM has
  to look at `mcp_list_tools` then construct an `mcp_call_tool` call
  itself; auto-registration would let it call them as if native.
- Multi-server support (today: single MCP server via RAG_API_URL).
- Async transport / WebSocket fallback.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                 |  14 +-
 src/llm/litellm_connector.py |  73 +++++++
 src/llm/rag_mcp_client.py    | 363 ++++++++++++++++++++++++++++++-----
 3 files changed, 400 insertions(+), 50 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d54db95..6c1c4fe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,15 +5,18 @@ All notable changes to the OpenClawGotchi project will be documented in this fil
 ## [Unreleased] - 2026-05-09
 
 ### Added
-- **External RAG service integration via REST**: opt-in connector to any RAG (Retrieval-Augmented Generation) backend that exposes a small documented HTTP contract (see `src/llm/rag_client.py` module docstring). New `/rag` Telegram command (`/rag`, `/rag <query>`, `/rag --top N <query>`). LLM tools `query_rag(query, top_k)` and `persist_to_rag(text, title, tags)`. Env vars: `RAG_API_URL` (empty disables), `RAG_API_KEY`, `RAG_DEFAULT_COLLECTIONS`. `src/llm/rag_mcp_client.py` ships as a roadmap stub for a future MCP-client follow-up.
+- **External RAG service integration via REST**: opt-in connector to any RAG (Retrieval-Augmented Generation) backend that exposes a small documented HTTP contract (see `src/llm/rag_client.py` module docstring). New `/rag` Telegram command (`/rag`, `/rag <query>`, `/rag --top N <query>`). LLM tools `query_rag(query, top_k)` and `persist_to_rag(text, title, tags)`. Env vars: `RAG_API_URL` (empty disables), `RAG_API_KEY`, `RAG_DEFAULT_COLLECTIONS`.
+- **Bot can also act as a generic MCP client over SSE.** New module `src/llm/rag_mcp_client.py` is a hand-rolled minimal MCP-over-SSE client (~250 LoC) using only the stdlib + `requests` (already in the venv via litellm). Speaks just enough of the MCP spec to do `initialize` + `tools/list` + `tools/call` against any SSE-transport server — no `mcp[cli]` dependency, so the Pi Zero 2W's RAM budget is respected (the official package pulls in `cryptography`, `pydantic-settings`, `starlette`, `uvicorn`, etc.).
+- **Two new LLM tools for MCP**: `mcp_list_tools()` (discover) and `mcp_call_tool(name, arguments)` (invoke). The agent decides which advertised tool to call. Activates only when `RAG_TRANSPORT=mcp` is set in the environment.
+- New env var `RAG_TRANSPORT=rest|mcp` (default `rest`). When `mcp` is selected, `RAG_API_URL` is interpreted as the MCP-SSE base URL (e.g. `http://your-rag-host:8766`).
 - **`/model` Telegram command**: inline-keyboard model picker. Without args it opens buttons for every preset (gemini, glm, ollama). With an argument (`/model glm`) it falls through to the existing `/use` flow. `/use` and `/switch` remain as text aliases.
 - **Live Ollama discovery**: tapping `🦙 ollama ▸` queries the configured Ollama server (`/api/tags` + `/api/show`), filters by `capabilities.tools`, and only lists tool-capable models. Falls back to all installed models with a warning when none advertise tools. Includes `◂ Back` button and a graceful "could not reach server" state. New env vars: `OLLAMA_MODEL` (default `qwen2.5:14b`) and `OLLAMA_API_BASE` (placeholder default `http://ollama-server:11434`).
 - **Persistent model choice**: `/model` and `/use` now write the selection to `data/active_model.json` (gitignored). On startup `LiteLLMConnector` restores it before falling back to `DEFAULT_LITE_PRESET`. Survives `systemctl restart` and reboots.
 - **`/update` Telegram command + `scripts/auto_update.sh`**: owner-only command that fetches `origin/main`, fast-forwards if there are new commits, refreshes venv deps when `requirements.txt` changed, and restarts the systemd service. Supports `/update check` for dry-run. Cron-friendly so the bot can also auto-update unattended.
 - **Update safety net**: before pulling, the script tarballs `gotchi.db` + `data/` + `.env` to `backups/pre-update-<timestamp>-<sha>.tar.gz` (rolling, keeps last 3 — see `OCG_BACKUP_KEEP`). If the service fails to come back up after the new code is in place, the script auto-rolls-back to the previous commit, reinstalls deps if needed, restarts, and exits with code 4 to flag the failed upgrade. Disable with `OCG_NO_BACKUP=1` / `OCG_NO_ROLLBACK=1`.
 - **`gotchi-update` sudoers entry** in `setup.sh`: lets the bot user `systemctl restart gotchi-bot.service` without a password — needed by `/update` and the unattended cron path.
-- **UPS HAT (C) battery monitoring** (Waveshare): new `hardware/battery.py` reads bus voltage, current and power from the on-board INA219 over I2C and reports a 0–100 % estimate based on the 2× 18650 voltage curve (6.0 V empty → 8.4 V full). Auto-detects the sensor and gracefully degrades when I2C is disabled or the HAT is absent — every public function returns `None` rather than raising.
-- **`/battery` Telegram command**: shows the current reading (`🔋 87 % — 8.12 V, +120 mA (charging, 974 mW)`) or a friendly "no UPS HAT detected" hint with `i2cdetect` instructions.
+- **UPS HAT (C) battery monitoring** (Waveshare): new `hardware/battery.py` reads bus voltage, current and power from the on-board INA219 over I2C and reports a 0–100 % estimate based on the 1× 18650 voltage curve (3.0 V empty → 4.2 V full). Auto-detects the sensor and gracefully degrades when I2C is disabled or the HAT is absent — every public function returns `None` rather than raising.
+- **`/battery` Telegram command**: shows the current reading (`🔋 87 % — 4.05 V, +120 mA (charging, 974 mW)`) or a friendly "no UPS HAT detected" hint with `i2cdetect` instructions.
 - **System status line includes battery** (when present): `get_stats_string()` adds a `[BATTERY] …` line, so heartbeat reflections and the bot's self-awareness pick up battery state automatically.
 - **Optional dep `smbus2`** added to `requirements.txt` (pure-Python, ~30 KB). Drop the line to disable battery support entirely.
 
@@ -25,6 +28,11 @@ All notable changes to the OpenClawGotchi project will be documented in this fil
 - **`error_screen()` SAY: text now respects `BOT_LANGUAGE`**: previously hardcoded Japanese (`システムエラー発生` etc.), which renders as garbled glyphs for owners who don't read it. Localized into `ja` / `en` / `de` / `ru` / `es` / `fr`. Default (when `BOT_LANGUAGE` is unset) stays Japanese to preserve the original cyberpunk aesthetic; unknown codes fall back to English.
 - **Onboarding loop never exited**: `BOOTSTRAP.md` was only deleted when the LLM emitted a magic completion phrase ("onboarding complete", "saved to identity.md", …). Models that update `IDENTITY.md` correctly without that phrase left the bootstrap stale forever and re-triggered onboarding on every restart. `needs_onboarding()` now auto-completes when `IDENTITY.md` mtime > `BOOTSTRAP.md` mtime.
 
+### Notes
+- The MCP client uses a sync API throughout — slots into the existing TOOL_MAP dispatcher without async plumbing.
+- A single background thread reads the SSE stream and routes JSON-RPC responses by id; one connected client is reused per process via a lazy singleton.
+- Graceful degradation: when the MCP server is unreachable or `RAG_TRANSPORT` isn't set, the new tools return informative no-op strings; the bot stays alive.
+
 ## [Unreleased] - 2026-04-29
 
 ### Added
diff --git a/src/llm/litellm_connector.py b/src/llm/litellm_connector.py
index 8b33eca..5c1ac86 100644
--- a/src/llm/litellm_connector.py
+++ b/src/llm/litellm_connector.py
@@ -645,6 +645,62 @@ def persist_to_rag(text: str, title: str = "", tags: str = "") -> str:
     return f"Persisted to vault. Server: {str(response)[:200]}"
 
 
+def mcp_list_tools() -> str:
+    """List the tools advertised by the configured MCP server.
+
+    Activates only when ``RAG_TRANSPORT=mcp`` is set in the environment
+    (the bot's REST RAG path is the default). Useful for the LLM to
+    discover what's available before calling ``mcp_call_tool``. Returns
+    a compact rendered list with name + description so the LLM picks
+    the right tool without needing the full JSON Schema.
+    """
+    from llm import rag_mcp_client
+    if not rag_mcp_client.is_enabled():
+        return "MCP transport not enabled (set RAG_TRANSPORT=mcp + RAG_API_URL pointing at the SSE base)."
+    client = rag_mcp_client.get_client()
+    if client is None:
+        return "MCP client unavailable (server unreachable or not configured)."
+    try:
+        tools = client.list_tools()
+    except Exception as e:
+        return f"MCP list_tools failed: {e}"
+    if not tools:
+        return "(no tools advertised by the MCP server)"
+    out = [f"{len(tools)} MCP tool(s) available:"]
+    for t in tools:
+        name = t.get("name", "?")
+        desc = (t.get("description") or "").split("\n")[0][:120]
+        out.append(f"  - {name}: {desc}")
+    return "\n".join(out)
+
+
+def mcp_call_tool(name: str, arguments: str = "{}") -> str:
+    """Invoke a tool on the configured MCP server by name.
+
+    ``arguments`` is a JSON string (the LLM emits one). Returns the
+    server's response, flattened to readable text. Activates only
+    when ``RAG_TRANSPORT=mcp``. Use ``mcp_list_tools`` first to see
+    what's available.
+    """
+    from llm import rag_mcp_client
+    if not rag_mcp_client.is_enabled():
+        return "MCP transport not enabled (set RAG_TRANSPORT=mcp)."
+    client = rag_mcp_client.get_client()
+    if client is None:
+        return "MCP client unavailable."
+    try:
+        args = json.loads(arguments) if arguments else {}
+        if not isinstance(args, dict):
+            return "Error: arguments must be a JSON object"
+    except json.JSONDecodeError as e:
+        return f"Error: invalid arguments JSON: {e}"
+    try:
+        result = client.call_tool(name, args)
+    except Exception as e:
+        return f"MCP call_tool({name}) failed: {e}"
+    return rag_mcp_client.extract_text_content(result)[:4000]
+
+
 def health_check() -> str:
     """
     Run system health check. Use this to diagnose problems!
@@ -1111,6 +1167,19 @@ def manage_service(service: str, action: str = "status") -> str:
             "tags": {"type": "string", "description": "Optional comma-separated tags"}
         }, "required": ["text"]}
     }},
+    {"type": "function", "function": {
+        "name": "mcp_list_tools",
+        "description": "List the tools advertised by the configured MCP server (RAG_API_URL when RAG_TRANSPORT=mcp). Use BEFORE mcp_call_tool to discover what's available. Disabled when RAG_TRANSPORT is not 'mcp'.",
+        "parameters": {"type": "object", "properties": {}, "required": []}
+    }},
+    {"type": "function", "function": {
+        "name": "mcp_call_tool",
+        "description": "Invoke a tool on the configured MCP server by name. Use mcp_list_tools first. `arguments` is a JSON object encoded as a string. Disabled when RAG_TRANSPORT is not 'mcp'.",
+        "parameters": {"type": "object", "properties": {
+            "name": {"type": "string", "description": "Tool name as advertised by the server"},
+            "arguments": {"type": "string", "description": "JSON object literal as a string, e.g. '{\"query\":\"hello\",\"top_k\":3}'"}
+        }, "required": ["name"]}
+    }},
     {"type": "function", "function": {
         "name": "add_custom_face",
         "description": "Add a custom face to data/custom_faces.json. After adding, the face becomes available immediately. ALWAYS output FACE: <name> and SAY: <short text> in your FINAL reply to the user so they see the new face on the E-Ink display.",
@@ -1174,6 +1243,8 @@ def manage_service(service: str, action: str = "status") -> str:
     "vault_search": vault_search,
     "query_rag": query_rag,
     "persist_to_rag": persist_to_rag,
+    "mcp_list_tools": mcp_list_tools,
+    "mcp_call_tool": mcp_call_tool,
 }
 
 
@@ -1208,6 +1279,8 @@ def manage_service(service: str, action: str = "status") -> str:
     "vault_search": "🔎",
     "query_rag": "🧠",
     "persist_to_rag": "💾",
+    "mcp_list_tools": "🛠",
+    "mcp_call_tool": "🔌",
 }
 
 
diff --git a/src/llm/rag_mcp_client.py b/src/llm/rag_mcp_client.py
index 68950df..7c2983d 100644
--- a/src/llm/rag_mcp_client.py
+++ b/src/llm/rag_mcp_client.py
@@ -1,67 +1,336 @@
 """
-Skeleton for future MCP-client integration with an external RAG service.
-
-Today the bot talks to a RAG service via plain REST (`rag_client.py`) which
-is small, sync and dependency-light — fits the Pi Zero 2W's RAM budget.
-A future Option B would talk to the RAG service's MCP-SSE endpoint instead,
-which gives access to whatever tools that server exposes (`rag_search`,
-`rag_persist`, `rag_status`, …) dynamically rather than the curated REST
-surface this module wraps.
-
-Why it isn't wired in yet:
-
-  - the official `mcp` Python package pulls in `httpx[http2]`, `pydantic`,
-    `anyio`, etc. — non-trivial RAM hit on a 512 MB device
-  - SSE keeps a long-lived connection open per client, which doesn't play
-    nicely with the bot's "spawn-and-die" subprocess pattern for display
-    updates and other side jobs
-  - REST gets us 90 % of the value (search + persist) at 10 % of the cost
-
-When this gets activated:
-  1. add `mcp[cli]>=1.x` to requirements.txt
-  2. flesh out `RagMcpClient` below: `__aenter__`, `list_tools`,
-     `call_tool(name, args)`, retry/reconnect on SSE drops
-  3. extend `litellm_connector.TOOL_MAP` with a generic `rag_mcp_tool`
-     dispatcher OR auto-register every advertised MCP tool at startup
-  4. honour the same env vars as `rag_client.py` (`RAG_API_URL` /
-     `RAG_API_KEY`) but route to the MCP-SSE port (typically 8766)
-
-Until then this module is intentionally a placeholder so `import` doesn't
-break and the architectural shape is visible in the source.
+Minimal MCP-over-SSE client for openclawgotchi.
+
+Why hand-rolled instead of `mcp[cli]`?
+  The official PyPI `mcp` package pulls in `cryptography` (~4.7 MB),
+  `pydantic-settings`, `starlette`, `uvicorn`, `pyjwt`, `httpx-sse`,
+  `sse-starlette`, `python-multipart` — non-trivial RAM hit on the
+  Pi Zero 2W (512 MB total, ~50 MB headroom in practice). This module
+  speaks just enough of the MCP spec to do `initialize` + `tools/list`
+  + `tools/call` against an SSE-transport server, using only `requests`
+  (already in the venv via litellm) plus a small SSE-line parser.
+
+Wire protocol it speaks:
+  GET  {base_url}/sse                   — long-poll SSE stream
+       first event: ``event: endpoint\\ndata: /messages?session_id=…``
+                    (relative to base_url)
+       further events: ``event: message\\ndata: <JSON-RPC response>``
+  POST {endpoint_url}                   — send JSON-RPC requests
+       body: {"jsonrpc":"2.0","id":<n>,"method":<m>,"params":<p>}
+
+Public surface:
+  client = MCPSSEClient(base_url, api_key=None)
+  client.connect()        # opens SSE, waits for endpoint
+  client.initialize()     # MCP handshake
+  tools = client.list_tools()
+  result = client.call_tool(name, {arg: value, ...})
+  client.close()
+
+All synchronous — designed to slot into the bot's existing sync
+TOOL_MAP dispatcher in litellm_connector.py without async plumbing.
+
+Activation is gated by env var ``RAG_TRANSPORT=mcp`` (default
+``rest``). When ``mcp`` is selected, ``RAG_API_URL`` is interpreted as
+the MCP-SSE base URL (e.g. ``http://your-rag-host:8766``). When the
+client is unreachable, callers fall back to None / empty results so
+the bot stays alive.
 """
 
 from __future__ import annotations
 
+import json
 import logging
+import os
+import threading
+import time
+from typing import Any, Optional
 
 log = logging.getLogger(__name__)
 
-# Conventional SSE port for RAG-style MCP servers. Kept here so any future
-# activator only has to flip a flag rather than hunt constants. Override
-# via env or constructor when the actual server uses something else.
-DEFAULT_MCP_SSE_PORT = 8766
+DEFAULT_TIMEOUT_S = 15.0
+DEFAULT_PROTOCOL_VERSION = "2024-11-05"
 
 
 def is_enabled() -> bool:
-    """Always False today — flips on in the future Option B PR."""
-    return False
+    """True when the bot is configured to use MCP for RAG."""
+    return os.environ.get("RAG_TRANSPORT", "rest").strip().lower() == "mcp"
+
 
+class MCPSSEClient:
+    """Thread-safe synchronous MCP client over SSE.
 
-class RagMcpClient:
-    """Placeholder. Construct + call methods are stubs that raise."""
+    A single background thread reads the SSE stream and routes
+    JSON-RPC responses back to whichever caller invoked the matching
+    request id. Notifications are silently dropped — we don't act on
+    server-pushed events today.
+    """
 
-    def __init__(self, base_url: str, api_key: str | None = None):
-        self.base_url = base_url
+    def __init__(
+        self,
+        base_url: str,
+        api_key: Optional[str] = None,
+        timeout: float = DEFAULT_TIMEOUT_S,
+        client_name: str = "openclawgotchi",
+        client_version: str = "0.1",
+    ):
+        self.base_url = base_url.rstrip("/")
         self.api_key = api_key
+        self.timeout = timeout
+        self.client_name = client_name
+        self.client_version = client_version
+
+        # Lazy import — keeps module-import cheap even if the bot never
+        # actually opens an MCP connection.
+        import requests
+        self._requests = requests
+        self._session = requests.Session()
+
+        self._endpoint_url: Optional[str] = None
+        self._endpoint_event = threading.Event()
+        self._responses: dict[int, dict] = {}
+        self._response_events: dict[int, threading.Event] = {}
+        self._next_id = 1
+        self._id_lock = threading.Lock()
+        self._stop = threading.Event()
+        self._sse_thread: Optional[threading.Thread] = None
+        self._initialized = False
+        self._init_lock = threading.Lock()
+
+    # ---- internals -------------------------------------------------------
+
+    def _headers(self) -> dict[str, str]:
+        h = {"Accept": "application/json, text/event-stream"}
+        if self.api_key:
+            h["Authorization"] = f"Bearer {self.api_key}"
+        return h
+
+    def _next_request_id(self) -> int:
+        with self._id_lock:
+            i = self._next_id
+            self._next_id += 1
+            return i
+
+    def _sse_loop(self) -> None:
+        """Read the SSE stream forever (until close()) and dispatch messages."""
+        url = f"{self.base_url}/sse"
+        try:
+            r = self._session.get(
+                url,
+                headers=self._headers(),
+                stream=True,
+                timeout=(self.timeout, None),  # connect timeout, then no read timeout
+            )
+            r.raise_for_status()
+            event_type: Optional[str] = None
+            data_buf: list[str] = []
+            for raw_line in r.iter_lines(decode_unicode=True):
+                if self._stop.is_set():
+                    break
+                if raw_line is None:
+                    continue
+                line = raw_line.rstrip("\r")
+                if line == "":
+                    # Dispatch the buffered event.
+                    if event_type and data_buf:
+                        self._dispatch(event_type, "\n".join(data_buf))
+                    event_type = None
+                    data_buf = []
+                    continue
+                if line.startswith(":"):
+                    # Comment / heartbeat.
+                    continue
+                if line.startswith("event:"):
+                    event_type = line[6:].strip()
+                elif line.startswith("data:"):
+                    data_buf.append(line[5:].lstrip(" "))
+        except Exception as e:
+            log.warning(f"MCP SSE stream closed: {e}")
+        finally:
+            # Wake any pending callers so they don't hang forever.
+            for evt in self._response_events.values():
+                evt.set()
+
+    def _dispatch(self, event_type: str, data: str) -> None:
+        if event_type == "endpoint":
+            # Server tells us where to POST messages. Path may be relative.
+            ep = data if data.startswith(("http://", "https://")) else f"{self.base_url}{data}"
+            self._endpoint_url = ep
+            self._endpoint_event.set()
+            log.debug(f"MCP endpoint: {ep}")
+            return
+        if event_type == "message":
+            try:
+                msg = json.loads(data)
+            except Exception as e:
+                log.warning(f"MCP non-JSON message: {e}")
+                return
+            msg_id = msg.get("id")
+            if msg_id is None:
+                # Notification. Today we ignore these.
+                return
+            if msg_id in self._response_events:
+                self._responses[msg_id] = msg
+                self._response_events[msg_id].set()
+
+    def _request(self, method: str, params: Optional[dict] = None) -> Any:
+        if self._endpoint_url is None:
+            raise RuntimeError("MCP client not connected (call .connect() first)")
+        req_id = self._next_request_id()
+        body: dict[str, Any] = {"jsonrpc": "2.0", "id": req_id, "method": method}
+        if params is not None:
+            body["params"] = params
+
+        evt = threading.Event()
+        self._response_events[req_id] = evt
+        try:
+            r = self._session.post(
+                self._endpoint_url,
+                json=body,
+                headers=self._headers(),
+                timeout=self.timeout,
+            )
+            if r.status_code >= 400:
+                raise RuntimeError(f"MCP {method} HTTP {r.status_code}: {r.text[:200]}")
+            if not evt.wait(timeout=self.timeout):
+                raise TimeoutError(f"MCP {method}: no response within {self.timeout}s")
+            resp = self._responses.pop(req_id, None)
+            if resp is None:
+                raise RuntimeError(f"MCP {method}: stream closed before response")
+            if "error" in resp:
+                raise RuntimeError(f"MCP {method} error: {resp['error']}")
+            return resp.get("result")
+        finally:
+            self._response_events.pop(req_id, None)
+
+    def _notify(self, method: str, params: Optional[dict] = None) -> None:
+        if self._endpoint_url is None:
+            raise RuntimeError("MCP client not connected")
+        body: dict[str, Any] = {"jsonrpc": "2.0", "method": method}
+        if params is not None:
+            body["params"] = params
+        try:
+            self._session.post(
+                self._endpoint_url,
+                json=body,
+                headers=self._headers(),
+                timeout=self.timeout,
+            )
+        except Exception as e:
+            log.warning(f"MCP notification {method} failed: {e}")
+
+    # ---- public API ------------------------------------------------------
+
+    def connect(self) -> None:
+        """Open the SSE stream and wait for the server's endpoint event."""
+        if self._sse_thread is not None and self._sse_thread.is_alive():
+            return
+        self._stop.clear()
+        self._endpoint_event.clear()
+        self._sse_thread = threading.Thread(
+            target=self._sse_loop,
+            daemon=True,
+            name="mcp-sse-reader",
+        )
+        self._sse_thread.start()
+        if not self._endpoint_event.wait(timeout=self.timeout):
+            self.close()
+            raise TimeoutError(f"MCP {self.base_url}/sse: no endpoint event within {self.timeout}s")
+
+    def initialize(self) -> dict:
+        """Run the MCP `initialize` handshake. Idempotent."""
+        with self._init_lock:
+            if self._initialized:
+                return {"already": True}
+            result = self._request("initialize", {
+                "protocolVersion": DEFAULT_PROTOCOL_VERSION,
+                "capabilities": {},
+                "clientInfo": {"name": self.client_name, "version": self.client_version},
+            })
+            # Required notification per MCP spec.
+            self._notify("notifications/initialized")
+            self._initialized = True
+            return result
+
+    def list_tools(self) -> list[dict]:
+        """Return list of tools the server advertises."""
+        if not self._initialized:
+            self.initialize()
+        result = self._request("tools/list")
+        return list(result.get("tools", [])) if isinstance(result, dict) else []
+
+    def call_tool(self, name: str, arguments: Optional[dict] = None) -> dict:
+        """Call a tool by name. Returns the raw MCP `tools/call` result dict."""
+        if not self._initialized:
+            self.initialize()
+        return self._request("tools/call", {
+            "name": name,
+            "arguments": arguments or {},
+        })
+
+    def close(self) -> None:
+        """Stop the SSE reader and clean up. Safe to call multiple times."""
+        self._stop.set()
+        try:
+            self._session.close()
+        except Exception:
+            pass
+        self._endpoint_url = None
+        self._initialized = False
+
+
+# ---- module-level convenience: a singleton client lazily reused ---------
+
+_singleton: Optional[MCPSSEClient] = None
+_singleton_lock = threading.Lock()
+
+
+def get_client() -> Optional[MCPSSEClient]:
+    """Return a connected, initialized MCPSSEClient, or None when disabled.
+
+    On first call (per process) this opens the SSE stream and runs
+    `initialize`. Subsequent calls reuse the same client. If the
+    server is unreachable, returns None (callers fall back).
+    """
+    if not is_enabled():
+        return None
+
+    base = os.environ.get("RAG_API_URL", "").rstrip("/")
+    if not base:
+        return None
+    api_key = os.environ.get("RAG_API_KEY", "") or None
 
-    async def __aenter__(self):
-        raise NotImplementedError("RagMcpClient is a roadmap stub — see module docstring")
+    global _singleton
+    with _singleton_lock:
+        if _singleton is None:
+            try:
+                client = MCPSSEClient(base, api_key=api_key)
+                client.connect()
+                client.initialize()
+                _singleton = client
+            except Exception as e:
+                log.warning(f"MCP client setup failed ({base}): {e}")
+                return None
+        return _singleton
 
-    async def __aexit__(self, *exc):
-        return False
 
-    async def list_tools(self) -> list[dict]:
-        raise NotImplementedError("RagMcpClient is a roadmap stub")
+def extract_text_content(call_result: dict) -> str:
+    """Pull a printable string out of an MCP `tools/call` result.
 
-    async def call_tool(self, name: str, args: dict) -> dict:
-        raise NotImplementedError("RagMcpClient is a roadmap stub")
+    MCP results have a `content` array of typed parts. We concatenate
+    text parts; non-text parts are summarised by their type.
+    """
+    if not isinstance(call_result, dict):
+        return str(call_result)
+    parts = call_result.get("content")
+    if not isinstance(parts, list):
+        return json.dumps(call_result)[:2000]
+    out: list[str] = []
+    for p in parts:
+        if not isinstance(p, dict):
+            continue
+        t = p.get("type")
+        if t == "text":
+            out.append(str(p.get("text", "")))
+        else:
+            out.append(f"[{t}]")
+    return "\n".join(out).strip()

From cd631b0b824a583aa76572ff0b298d525ebaa7f4 Mon Sep 17 00:00:00 2001
From: Smilez1985 <237579773+Smilez1985@users.noreply.github.com>
Date: Sat, 9 May 2026 05:09:29 +0200
Subject: [PATCH 21/21] feat(mcp): auto-register MCP tools as first-class +
 RAG-aware system prompt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When RAG_TRANSPORT=mcp and the MCP server is reachable at startup,
discover its advertised tools via tools/list and register each one as
a first-class TOOL_MAP entry with full JSON-Schema. The LLM then calls
e.g. `rag_search(query=..., top_k=3)` directly instead of the two-hop
`mcp_list_tools` → `mcp_call_tool` indirection. Names that collide
with an existing TOOL_MAP entry are skipped. Failures are logged but
never crash the bot.

A new system-prompt section "External Memory (MCP)" lists which tools
were registered and instructs the bot to:
  - search RAG BEFORE answering questions about user preferences,
    project rules, decisions, or past context
  - persist durable lessons via the persist tool
  - optionally announce session context once per conversation

Why: the previous PR exposed `mcp_list_tools` / `mcp_call_tool` as
generic glue, but the LLM wouldn't reach for them on its own — and
even when it did, the two-hop indirection wasted turns. Auto-
registration lets the agent use the RAG as its durable memory the
same way it already uses `remember_fact` / `recall_facts` for the
in-process store; the prompt block tells it WHEN.

Both `mcp_list_tools` / `mcp_call_tool` remain available as fallback
for ad-hoc discovery and tools that show up after bot start.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/llm/litellm_connector.py | 90 ++++++++++++++++++++++++++++++++++++
 src/llm/prompts.py           | 36 +++++++++++++++
 2 files changed, 126 insertions(+)

diff --git a/src/llm/litellm_connector.py b/src/llm/litellm_connector.py
index 5c1ac86..5cc46e5 100644
--- a/src/llm/litellm_connector.py
+++ b/src/llm/litellm_connector.py
@@ -1284,6 +1284,96 @@ def manage_service(service: str, action: str = "status") -> str:
 }
 
 
+# ============================================================
+# MCP TOOL AUTO-REGISTRATION
+# ============================================================
+# When RAG_TRANSPORT=mcp and the MCP server is reachable, discover
+# its advertised tools at module-init time and register each as a
+# first-class TOOL_MAP entry (with full JSON-Schema). The LLM then
+# calls e.g. ``rag_search(query=..., top_k=3)`` directly instead of
+# the two-hop ``mcp_list_tools`` → ``mcp_call_tool`` indirection.
+# Names that collide with an existing TOOL_MAP entry are skipped.
+# Failures are logged but never crash the bot.
+
+_MCP_REGISTERED_TOOLS: list[str] = []
+
+
+def _make_mcp_tool_wrapper(tool_name: str):
+    """Build a kwargs-based callable that invokes ``tool_name`` over MCP."""
+    def _mcp_tool(**kwargs) -> str:
+        from llm import rag_mcp_client
+        client = rag_mcp_client.get_client()
+        if client is None:
+            return f"MCP unavailable for {tool_name}"
+        try:
+            result = client.call_tool(tool_name, kwargs)
+        except Exception as e:
+            return f"MCP {tool_name} failed: {e}"
+        return rag_mcp_client.extract_text_content(result)[:4000]
+    _mcp_tool.__name__ = tool_name
+    return _mcp_tool
+
+
+def _register_mcp_tools_at_startup() -> int:
+    """Discover MCP tools and add them as first-class TOOL_MAP entries.
+
+    Idempotent: a tool already in TOOL_MAP (collision with a built-in
+    name) is skipped. Returns the number of newly-registered tools.
+    Safe to call multiple times.
+    """
+    from llm import rag_mcp_client
+    if not rag_mcp_client.is_enabled():
+        return 0
+    client = rag_mcp_client.get_client()
+    if client is None:
+        return 0
+    try:
+        tools = client.list_tools()
+    except Exception as e:
+        log.warning(f"MCP auto-registration: list_tools failed: {e}")
+        return 0
+
+    registered = 0
+    for tool in tools:
+        name = tool.get("name")
+        if not name or name in TOOL_MAP:
+            continue
+        desc = (tool.get("description") or name)[:1024]
+        schema = tool.get("inputSchema") or {"type": "object", "properties": {}}
+        TOOLS.append({
+            "type": "function",
+            "function": {
+                "name": name,
+                "description": desc,
+                "parameters": schema,
+            },
+        })
+        TOOL_MAP[name] = _make_mcp_tool_wrapper(name)
+        _TOOL_ICONS.setdefault(name, "🔌")
+        _MCP_REGISTERED_TOOLS.append(name)
+        registered += 1
+
+    if registered:
+        log.info(
+            "MCP auto-registered %d tool(s) as first-class: %s",
+            registered, ", ".join(_MCP_REGISTERED_TOOLS),
+        )
+    return registered
+
+
+# Try at import time. Server unreachable → silent no-op (the existing
+# mcp_list_tools / mcp_call_tool fallback path remains usable).
+try:
+    _register_mcp_tools_at_startup()
+except Exception as _mcp_reg_err:
+    log.warning("MCP auto-registration skipped: %s", _mcp_reg_err)
+
+
+def get_registered_mcp_tools() -> list[str]:
+    """Return names of MCP tools registered as first-class. Used by prompts."""
+    return list(_MCP_REGISTERED_TOOLS)
+
+
 def _format_tool_action(func_name: str, args: dict, result: str) -> str:
     """Format a single tool action for the user summary."""
     icon = _TOOL_ICONS.get(func_name, "🔧")
diff --git a/src/llm/prompts.py b/src/llm/prompts.py
index cc9152c..a62b0b2 100644
--- a/src/llm/prompts.py
+++ b/src/llm/prompts.py
@@ -31,6 +31,37 @@ def _language_directive() -> str:
         f"Only switch language if the user clearly writes to you in another language; "
         f"in that case, mirror their language. Never use a third language."
     )
+
+
+def _mcp_memory_directive() -> str:
+    """Tell the bot which external MCP tools are first-class and when to use them.
+
+    Only emitted when at least one MCP tool was successfully auto-registered
+    at startup; otherwise empty so non-MCP deployments stay unchanged.
+    """
+    try:
+        from llm.litellm_connector import get_registered_mcp_tools
+    except Exception:
+        return ""
+    tools = get_registered_mcp_tools()
+    if not tools:
+        return ""
+    bullet_list = ", ".join(f"`{t}`" for t in tools)
+    return (
+        "\n---\n## External Memory (MCP)\n"
+        f"You have first-class access to a long-term RAG memory via these tools: {bullet_list}.\n"
+        "Treat the RAG as your durable memory — your in-process facts and chat history are short-term.\n"
+        "Use it proactively, not just when the user asks:\n"
+        "- BEFORE answering questions about user preferences, project rules, decisions, or past context: "
+        "call `rag_search` (or the equivalent recall tool) to ground your reply in stored knowledge. "
+        "Do not assume from memory alone if a relevant rule might exist.\n"
+        "- WHEN you learn something durable (preferences, decisions, project facts, hard-won lessons): "
+        "call `rag_persist` to save it. Skip casual chat. Prefer short, factual notes with tags.\n"
+        "- If `rag_session_announce` exists, you may announce session context once per conversation "
+        "so the server can route you to the right collections.\n"
+        "Silent rule: if the user later corrects you because you ignored a stored rule, the fault is yours "
+        "for not having searched — search first, answer second."
+    )
 from hardware.system import get_stats_string
 import json
 
@@ -209,6 +240,11 @@ def build_system_context(user_message: str = "") -> str:
     if lang:
         parts.append(lang)
 
+    # External MCP memory — only when at least one MCP tool was auto-registered
+    mcp_block = _mcp_memory_directive()
+    if mcp_block:
+        parts.append(mcp_block)
+
     # Add custom faces list if any
     custom_faces = _load_custom_faces_list()
     if custom_faces: