Qredence · Zochory · May 30, 2026 · gemini-code-assist · May 30, 2026 · gemini-code-assist
diff --git a/src/fleet_rlm/runtime/modules/escalating.py b/src/fleet_rlm/runtime/modules/escalating.py
@@ -13,6 +13,7 @@
 from __future__ import annotations
 
 import logging
+import re
 from typing import Any
 
 import dspy
@@ -26,12 +27,28 @@
 
 ESCALATION_SENTINEL = "[TOOLS NEEDED]"
 _RLM_FALLBACK_WARNING = "RLM escalation failed; returned a lightweight fallback response."
+_LIVE_WEB_URL_RE = re.compile(r"https?://[^\s<>'\"]+", flags=re.IGNORECASE)
+_LIVE_WEB_REQUEST_RE = re.compile(
+    r"\b("
+    r"browse|download|fetch|open|read|retrieve|scrape|summari[sz]e"
+    r")\b.*\b("
+    r"internet|online|page|pdf|site|url|web|website"
+    r")\b",
+    flags=re.IGNORECASE,
+)
-_LIVE_WEB_REQUEST_RE = re.compile(
-    r"\b("
-    r"browse|download|fetch|open|read|retrieve|scrape|summari[sz]e"
-    r")\b.*\b("
-    r"internet|online|page|pdf|site|url|web|website"
-    r")\b",
-    flags=re.IGNORECASE,
-)
+_LIVE_WEB_ACTION_RE = re.compile(
+    r"\b(browse|download|fetch|open|read|retrieve|scrape|summari[sz]e)\b",
+    flags=re.IGNORECASE,
+)
+_LIVE_WEB_TARGET_RE = re.compile(
+    r"\b(internet|online|page|pdf|site|url|web|website)\b",
+    flags=re.IGNORECASE,
+)
-_LIVE_WEB_REQUEST_RE = re.compile(
-    r"\b("
-    r"browse|download|fetch|open|read|retrieve|scrape|summari[sz]e"
-    r")\b.*\b("
-    r"internet|online|page|pdf|site|url|web|website"
-    r")\b",
-    flags=re.IGNORECASE,
-)
+_LIVE_WEB_ACTION_RE = re.compile(
+    r"\b(browse|download|fetch|open|read|retrieve|scrape|summari[sz]e)\b",
+    flags=re.IGNORECASE,
+)
+_LIVE_WEB_TARGET_RE = re.compile(
+    r"\b(internet|online|page|pdf|site|url|web|website)\b",
+    flags=re.IGNORECASE,
+)
 
 
 def _is_rlm_execution_mode(execution_mode: str) -> bool:
     return execution_mode in {"rlm", "rlm_only"}
 
 
+def _requires_live_web_tools(user_request: str) -> bool:
+    """Return whether a turn should skip lightweight chat and use web-capable tools."""
+    if _LIVE_WEB_URL_RE.search(user_request):
+        return True
+    return bool(_LIVE_WEB_REQUEST_RE.search(user_request))
-def _requires_live_web_tools(user_request: str) -> bool:
-    """Return whether a turn should skip lightweight chat and use web-capable tools."""
-    if _LIVE_WEB_URL_RE.search(user_request):
-        return True
-    return bool(_LIVE_WEB_REQUEST_RE.search(user_request))
+def _requires_live_web_tools(user_request: str) -> bool:
+    """Return whether a turn should skip lightweight chat and use web-capable tools."""
+    if _LIVE_WEB_URL_RE.search(user_request):
+        return True
+    return bool(_LIVE_WEB_ACTION_RE.search(user_request) and _LIVE_WEB_TARGET_RE.search(user_request))
-def _requires_live_web_tools(user_request: str) -> bool:
-    """Return whether a turn should skip lightweight chat and use web-capable tools."""
-    if _LIVE_WEB_URL_RE.search(user_request):
-        return True
-    return bool(_LIVE_WEB_REQUEST_RE.search(user_request))
+def _requires_live_web_tools(user_request: str) -> bool:
+    """Return whether a turn should skip lightweight chat and use web-capable tools."""
+    if _LIVE_WEB_URL_RE.search(user_request):
+        return True
+    return bool(_LIVE_WEB_ACTION_RE.search(user_request) and _LIVE_WEB_TARGET_RE.search(user_request))
+
+
 def _history_value(message: Any, *keys: str) -> str:
     if isinstance(message, dict):
         for key in keys:
@@ -191,6 +208,14 @@ def forward(
                 history=history,
                 conversation_summary=conversation_summary,
             )
+        if _requires_live_web_tools(user_request):
+            logger.debug("EscalatingFleetModule: routing live-web request to RLM path")
+            return self._run_rlm(
+                user_request=user_request,
+                core_memory=core_memory,
+                history=history,
+                conversation_summary=conversation_summary,
+            )
 
         prediction = self.respond(
             user_request=user_request,

diff --git a/src/fleet_rlm/runtime/tools/document_tools.py b/src/fleet_rlm/runtime/tools/document_tools.py
@@ -119,7 +119,8 @@ def _suffix_from_url(url: str, headers: dict[str, str]) -> str:
     if url_suffix:
         return url_suffix
 
-    content_type = headers.get("Content-Type", "").split(";")[0].strip().lower()
+    content_type_header = next((value for key, value in headers.items() if key.lower() == "content-type"), "")
+    content_type = content_type_header.split(";")[0].strip().lower()
     return _CONTENT_TYPE_SUFFIX_MAP.get(content_type, ".txt")
 
 
@@ -270,6 +271,7 @@ def list_documents() -> dict[str, Any]:
     }
 
 
+@tool_fn
 def fetch_document_text(url_or_path: str) -> dict[str, Any]:
     """Fetch and extract text from an HTTP(S) document URL.
 

diff --git a/tests/unit/runtime/test_escalating_module.py b/tests/unit/runtime/test_escalating_module.py
@@ -69,6 +69,19 @@ def test_rlm_path_triggered_by_sentinel_in_reasoning(self) -> None:
         module._rlm.assert_called_once()
         assert getattr(result, "answer", None) == "deep answer"
 
+    def test_url_fetch_request_forces_rlm_before_lightweight_response(self) -> None:
+        module = _make_module()
+        _stub_respond(module, reasoning="I cannot browse the live web.", response="no web access")
+        rlm_pred = _FakePrediction(answer="fetched document")
+        module._rlm = MagicMock(return_value=rlm_pred)
+        _stub_summarize(module)
+
+        result = module(user_request="fetch https://arxiv.org/pdf/2512.24601 please", execution_mode="auto")
+
+        module.respond.assert_not_called()
+        module._rlm.assert_called_once()
+        assert getattr(result, "answer", None) == "fetched document"
-    def test_url_fetch_request_forces_rlm_before_lightweight_response(self) -> None:
-        module = _make_module()
-        _stub_respond(module, reasoning="I cannot browse the live web.", response="no web access")
-        rlm_pred = _FakePrediction(answer="fetched document")
-        module._rlm = MagicMock(return_value=rlm_pred)
-        _stub_summarize(module)
-
-        result = module(user_request="fetch https://arxiv.org/pdf/2512.24601 please", execution_mode="auto")
-
-        module.respond.assert_not_called()
-        module._rlm.assert_called_once()
-        assert getattr(result, "answer", None) == "fetched document"
+    def test_url_fetch_request_forces_rlm_before_lightweight_response(self) -> None:
+        module = _make_module()
+        _stub_respond(module, reasoning="I cannot browse the live web.", response="no web access")
+        rlm_pred = _FakePrediction(answer="fetched document")
+        module._rlm = MagicMock(return_value=rlm_pred)
+        _stub_summarize(module)
+
+        # Test URL-based routing
+        result = module(user_request="fetch https://arxiv.org/pdf/2512.24601 please", execution_mode="auto")
+        module.respond.assert_not_called()
+        module._rlm.assert_called_once()
+        assert getattr(result, "answer", None) == "fetched document"
+
+        # Test keyword-based routing (order-independent)
+        module.respond.reset_mock()
+        module._rlm.reset_mock()
+        result_kw = module(user_request="please browse the internet for me", execution_mode="auto")
+        module.respond.assert_not_called()
+        module._rlm.assert_called_once()
+        assert getattr(result_kw, "answer", None) == "fetched document"
-    def test_url_fetch_request_forces_rlm_before_lightweight_response(self) -> None:
-        module = _make_module()
-        _stub_respond(module, reasoning="I cannot browse the live web.", response="no web access")
-        rlm_pred = _FakePrediction(answer="fetched document")
-        module._rlm = MagicMock(return_value=rlm_pred)
-        _stub_summarize(module)
-
-        result = module(user_request="fetch https://arxiv.org/pdf/2512.24601 please", execution_mode="auto")
-
-        module.respond.assert_not_called()
-        module._rlm.assert_called_once()
-        assert getattr(result, "answer", None) == "fetched document"
+    def test_url_fetch_request_forces_rlm_before_lightweight_response(self) -> None:
+        module = _make_module()
+        _stub_respond(module, reasoning="I cannot browse the live web.", response="no web access")
+        rlm_pred = _FakePrediction(answer="fetched document")
+        module._rlm = MagicMock(return_value=rlm_pred)
+        _stub_summarize(module)
+
+        # Test URL-based routing
+        result = module(user_request="fetch https://arxiv.org/pdf/2512.24601 please", execution_mode="auto")
+        module.respond.assert_not_called()
+        module._rlm.assert_called_once()
+        assert getattr(result, "answer", None) == "fetched document"
+
+        # Test keyword-based routing (order-independent)
+        module.respond.reset_mock()
+        module._rlm.reset_mock()
+        result_kw = module(user_request="please browse the internet for me", execution_mode="auto")
+        module.respond.assert_not_called()
+        module._rlm.assert_called_once()
+        assert getattr(result_kw, "answer", None) == "fetched document"
+
     def test_force_escalate_skips_cot(self) -> None:
         module = _make_module()
         _stub_respond(module)

diff --git a/tests/unit/runtime/test_phase3_tools.py b/tests/unit/runtime/test_phase3_tools.py
@@ -17,7 +17,14 @@ def test_phase3_tools_are_registered() -> None:
 
     names = set(list_react_tool_names(discover_tools()))
 
-    assert {"web_search", "fetch_page", "search_knowledge", "load_skill", "load_document"} <= names
+    assert {
+        "web_search",
+        "fetch_page",
+        "fetch_document_text",
+        "search_knowledge",
+        "load_skill",
+        "load_document",
+    } <= names
 
 
 def test_load_document_persists_and_searches_knowledge(tmp_path: Path) -> None:

diff --git a/tests/unit/runtime/test_tools.py b/tests/unit/runtime/test_tools.py
@@ -341,6 +341,14 @@ def test_chunk_document_and_load_document_helpers_use_text_and_directories(tmp_p
     }
 
 
+def test_suffix_from_url_uses_case_insensitive_content_type_for_pdf() -> None:
+    from fleet_rlm.runtime.tools.document_tools import _suffix_from_url
+
+    suffix = _suffix_from_url("https://arxiv.org/pdf/2512.24601", {"content-type": "application/pdf"})
+
+    assert suffix == ".pdf"
+
+
 def test_download_url_removes_partial_temp_file_on_size_limit(
     tmp_path: Path,
     monkeypatch: pytest.MonkeyPatch,