diff --git a/src/fleet_rlm/runtime/modules/escalating.py b/src/fleet_rlm/runtime/modules/escalating.py index 7f726eaa..a1593f47 100644 --- a/src/fleet_rlm/runtime/modules/escalating.py +++ b/src/fleet_rlm/runtime/modules/escalating.py @@ -13,6 +13,7 @@ from __future__ import annotations import logging +import re from typing import Any import dspy @@ -26,12 +27,28 @@ ESCALATION_SENTINEL = "[TOOLS NEEDED]" _RLM_FALLBACK_WARNING = "RLM escalation failed; returned a lightweight fallback response." +_LIVE_WEB_URL_RE = re.compile(r"https?://[^\s<>'\"]+", flags=re.IGNORECASE) +_LIVE_WEB_REQUEST_RE = re.compile( + r"\b(" + r"browse|download|fetch|open|read|retrieve|scrape|summari[sz]e" + r")\b.*\b(" + r"internet|online|page|pdf|site|url|web|website" + r")\b", + flags=re.IGNORECASE, +) def _is_rlm_execution_mode(execution_mode: str) -> bool: return execution_mode in {"rlm", "rlm_only"} +def _requires_live_web_tools(user_request: str) -> bool: + """Return whether a turn should skip lightweight chat and use web-capable tools.""" + if _LIVE_WEB_URL_RE.search(user_request): + return True + return bool(_LIVE_WEB_REQUEST_RE.search(user_request)) + + def _history_value(message: Any, *keys: str) -> str: if isinstance(message, dict): for key in keys: @@ -191,6 +208,14 @@ def forward( history=history, conversation_summary=conversation_summary, ) + if _requires_live_web_tools(user_request): + logger.debug("EscalatingFleetModule: routing live-web request to RLM path") + return self._run_rlm( + user_request=user_request, + core_memory=core_memory, + history=history, + conversation_summary=conversation_summary, + ) prediction = self.respond( user_request=user_request, diff --git a/src/fleet_rlm/runtime/tools/document_tools.py b/src/fleet_rlm/runtime/tools/document_tools.py index 86757018..512480ff 100644 --- a/src/fleet_rlm/runtime/tools/document_tools.py +++ b/src/fleet_rlm/runtime/tools/document_tools.py @@ -119,7 +119,8 @@ def _suffix_from_url(url: str, headers: dict[str, str]) -> str: if url_suffix: return url_suffix - content_type = headers.get("Content-Type", "").split(";")[0].strip().lower() + content_type_header = next((value for key, value in headers.items() if key.lower() == "content-type"), "") + content_type = content_type_header.split(";")[0].strip().lower() return _CONTENT_TYPE_SUFFIX_MAP.get(content_type, ".txt") @@ -270,6 +271,7 @@ def list_documents() -> dict[str, Any]: } +@tool_fn def fetch_document_text(url_or_path: str) -> dict[str, Any]: """Fetch and extract text from an HTTP(S) document URL. diff --git a/tests/unit/runtime/test_escalating_module.py b/tests/unit/runtime/test_escalating_module.py index 0a28d1d8..df474461 100644 --- a/tests/unit/runtime/test_escalating_module.py +++ b/tests/unit/runtime/test_escalating_module.py @@ -69,6 +69,19 @@ def test_rlm_path_triggered_by_sentinel_in_reasoning(self) -> None: module._rlm.assert_called_once() assert getattr(result, "answer", None) == "deep answer" + def test_url_fetch_request_forces_rlm_before_lightweight_response(self) -> None: + module = _make_module() + _stub_respond(module, reasoning="I cannot browse the live web.", response="no web access") + rlm_pred = _FakePrediction(answer="fetched document") + module._rlm = MagicMock(return_value=rlm_pred) + _stub_summarize(module) + + result = module(user_request="fetch https://arxiv.org/pdf/2512.24601 please", execution_mode="auto") + + module.respond.assert_not_called() + module._rlm.assert_called_once() + assert getattr(result, "answer", None) == "fetched document" + def test_force_escalate_skips_cot(self) -> None: module = _make_module() _stub_respond(module) diff --git a/tests/unit/runtime/test_phase3_tools.py b/tests/unit/runtime/test_phase3_tools.py index d0f68bf6..0a8ac2f3 100644 --- a/tests/unit/runtime/test_phase3_tools.py +++ b/tests/unit/runtime/test_phase3_tools.py @@ -17,7 +17,14 @@ def test_phase3_tools_are_registered() -> None: names = set(list_react_tool_names(discover_tools())) - assert {"web_search", "fetch_page", "search_knowledge", "load_skill", "load_document"} <= names + assert { + "web_search", + "fetch_page", + "fetch_document_text", + "search_knowledge", + "load_skill", + "load_document", + } <= names def test_load_document_persists_and_searches_knowledge(tmp_path: Path) -> None: diff --git a/tests/unit/runtime/test_tools.py b/tests/unit/runtime/test_tools.py index 8485c0f6..12a11270 100644 --- a/tests/unit/runtime/test_tools.py +++ b/tests/unit/runtime/test_tools.py @@ -341,6 +341,14 @@ def test_chunk_document_and_load_document_helpers_use_text_and_directories(tmp_p } +def test_suffix_from_url_uses_case_insensitive_content_type_for_pdf() -> None: + from fleet_rlm.runtime.tools.document_tools import _suffix_from_url + + suffix = _suffix_from_url("https://arxiv.org/pdf/2512.24601", {"content-type": "application/pdf"}) + + assert suffix == ".pdf" + + def test_download_url_removes_partial_temp_file_on_size_limit( tmp_path: Path, monkeypatch: pytest.MonkeyPatch,