Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions tests/tools/test_browser_cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Regression tests for browser session cleanup and screenshot recovery."""

from unittest.mock import patch


class TestScreenshotPathRecovery:
def test_extracts_standard_absolute_path(self):
from tools.browser_tool import _extract_screenshot_path_from_text

assert (
_extract_screenshot_path_from_text("Screenshot saved to /tmp/foo.png")
== "/tmp/foo.png"
)

def test_extracts_quoted_absolute_path(self):
from tools.browser_tool import _extract_screenshot_path_from_text

assert (
_extract_screenshot_path_from_text(
"Screenshot saved to '/Users/david/.hermes/browser_screenshots/shot.png'"
)
== "/Users/david/.hermes/browser_screenshots/shot.png"
)


class TestBrowserCleanup:
def setup_method(self):
from tools import browser_tool

self.browser_tool = browser_tool
self.orig_active_sessions = browser_tool._active_sessions.copy()
self.orig_session_last_activity = browser_tool._session_last_activity.copy()
self.orig_recording_sessions = browser_tool._recording_sessions.copy()
self.orig_cleanup_done = browser_tool._cleanup_done

def teardown_method(self):
self.browser_tool._active_sessions.clear()
self.browser_tool._active_sessions.update(self.orig_active_sessions)
self.browser_tool._session_last_activity.clear()
self.browser_tool._session_last_activity.update(self.orig_session_last_activity)
self.browser_tool._recording_sessions.clear()
self.browser_tool._recording_sessions.update(self.orig_recording_sessions)
self.browser_tool._cleanup_done = self.orig_cleanup_done

def test_cleanup_browser_clears_tracking_state(self):
browser_tool = self.browser_tool
browser_tool._active_sessions["task-1"] = {
"session_name": "sess-1",
"bb_session_id": None,
}
browser_tool._session_last_activity["task-1"] = 123.0

with (
patch("tools.browser_tool._maybe_stop_recording") as mock_stop,
patch(
"tools.browser_tool._run_browser_command",
return_value={"success": True},
) as mock_run,
patch("tools.browser_tool.os.path.exists", return_value=False),
):
browser_tool.cleanup_browser("task-1")

assert "task-1" not in browser_tool._active_sessions
assert "task-1" not in browser_tool._session_last_activity
mock_stop.assert_called_once_with("task-1")
mock_run.assert_called_once_with("task-1", "close", [], timeout=10)

def test_browser_close_delegates_to_cleanup_browser(self):
import json

browser_tool = self.browser_tool
browser_tool._active_sessions["task-2"] = {"session_name": "sess-2"}

with patch("tools.browser_tool.cleanup_browser") as mock_cleanup:
result = json.loads(browser_tool.browser_close("task-2"))

assert result == {"success": True, "closed": True}
mock_cleanup.assert_called_once_with("task-2")

def test_emergency_cleanup_clears_all_tracking_state(self):
browser_tool = self.browser_tool
browser_tool._cleanup_done = False
browser_tool._active_sessions["task-1"] = {"session_name": "sess-1"}
browser_tool._active_sessions["task-2"] = {"session_name": "sess-2"}
browser_tool._session_last_activity["task-1"] = 1.0
browser_tool._session_last_activity["task-2"] = 2.0
browser_tool._recording_sessions.update({"task-1", "task-2"})

with patch("tools.browser_tool.cleanup_all_browsers") as mock_cleanup_all:
browser_tool._emergency_cleanup_all_sessions()

mock_cleanup_all.assert_called_once_with()
assert browser_tool._active_sessions == {}
assert browser_tool._session_last_activity == {}
assert browser_tool._recording_sessions == set()
assert browser_tool._cleanup_done is True
212 changes: 102 additions & 110 deletions tools/browser_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import json
import logging
import os
import re
import signal
import subprocess
import shutil
Expand Down Expand Up @@ -165,63 +166,18 @@ def _emergency_cleanup_all_sessions():
if not _active_sessions:
return

logger.info("Emergency cleanup: closing %s active session(s)...", len(_active_sessions))

try:
if _is_local_mode():
# Local mode: just close agent-browser sessions via CLI
for task_id, session_info in list(_active_sessions.items()):
session_name = session_info.get("session_name")
if session_name:
try:
browser_cmd = _find_agent_browser()
task_socket_dir = os.path.join(
_socket_safe_tmpdir(),
f"agent-browser-{session_name}"
)
env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir}
subprocess.run(
browser_cmd.split() + ["--session", session_name, "--json", "close"],
capture_output=True, timeout=5, env=env,
)
logger.info("Closed local session %s", session_name)
except Exception as e:
logger.debug("Error closing local session %s: %s", session_name, e)
else:
# Cloud mode: release Browserbase sessions via API
api_key = os.environ.get("BROWSERBASE_API_KEY")
project_id = os.environ.get("BROWSERBASE_PROJECT_ID")

if not api_key or not project_id:
logger.warning("Cannot cleanup - missing BROWSERBASE credentials")
return
logger.info("Emergency cleanup: closing %s active session(s)...",
len(_active_sessions))

for task_id, session_info in list(_active_sessions.items()):
bb_session_id = session_info.get("bb_session_id")
if bb_session_id:
try:
response = requests.post(
f"https://api.browserbase.com/v1/sessions/{bb_session_id}",
headers={
"X-BB-API-Key": api_key,
"Content-Type": "application/json"
},
json={
"projectId": project_id,
"status": "REQUEST_RELEASE"
},
timeout=5 # Short timeout for cleanup
)
if response.status_code in (200, 201, 204):
logger.info("Closed session %s", bb_session_id)
else:
logger.warning("Failed to close session %s: HTTP %s", bb_session_id, response.status_code)
except Exception as e:
logger.error("Error closing session %s: %s", bb_session_id, e)

_active_sessions.clear()
try:
cleanup_all_browsers()
except Exception as e:
logger.error("Emergency cleanup error: %s", e)
finally:
with _cleanup_lock:
_active_sessions.clear()
_session_last_activity.clear()
_recording_sessions.clear()


# Register cleanup via atexit only. Previous versions installed SIGINT/SIGTERM
Expand Down Expand Up @@ -640,18 +596,13 @@ def _create_browserbase_session(task_id: str) -> Dict[str, str]:


def _create_local_session(task_id: str) -> Dict[str, str]:
"""Create a lightweight local browser session (no cloud API call).

Returns the same dict shape as ``_create_browserbase_session`` so the rest
of the code can treat both modes uniformly.
"""
import uuid
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
logger.info("Created local browser session %s", session_name)
session_name = f"h_{uuid.uuid4().hex[:10]}"
logger.info("Created local browser session %s for task %s", session_name, task_id)
return {
"session_name": session_name,
"bb_session_id": None, # Not applicable in local mode
"cdp_url": None, # Not applicable in local mode
"bb_session_id": None,
"cdp_url": None,
"features": {"local": True},
}

Expand Down Expand Up @@ -772,6 +723,27 @@ def _find_agent_browser() -> str:
)


def _extract_screenshot_path_from_text(text: str) -> Optional[str]:
"""Extract a screenshot file path from agent-browser human-readable output."""
if not text:
return None

patterns = [
r"Screenshot saved to ['\"](?P<path>/[^'\"]+?\.png)['\"]",
r"Screenshot saved to (?P<path>/\S+?\.png)(?:\s|$)",
r"(?P<path>/\S+?\.png)(?:\s|$)",
]

for pattern in patterns:
match = re.search(pattern, text)
if match:
path = match.group("path").strip().strip("'\"")
if path:
return path

return None


def _run_browser_command(
task_id: str,
command: str,
Expand Down Expand Up @@ -841,9 +813,22 @@ def _run_browser_command(
command, task_id, task_socket_dir, len(task_socket_dir))

browser_env = {**os.environ}
# Ensure PATH includes standard dirs (systemd services may have minimal PATH)
if "/usr/bin" not in browser_env.get("PATH", "").split(":"):
browser_env["PATH"] = f"{browser_env.get('PATH', '')}:{_SANE_PATH}"

# Prefer Hermes-managed Node, but still ensure standard dirs exist for
# service/minimal environments where PATH may be incomplete.
hermes_home = Path(os.environ.get(
"HERMES_HOME", Path.home() / ".hermes"))
hermes_node_bin = str(hermes_home / "node" / "bin")

existing_path = browser_env.get("PATH", "")
path_parts = [p for p in existing_path.split(":") if p]
candidate_dirs = [hermes_node_bin] + [p for p in _SANE_PATH.split(":") if p]

for part in reversed(candidate_dirs):
if os.path.isdir(part) and part not in path_parts:
path_parts.insert(0, part)

browser_env["PATH"] = ":".join(path_parts)
browser_env["AGENT_BROWSER_SOCKET_DIR"] = task_socket_dir

result = subprocess.run(
Expand All @@ -866,11 +851,11 @@ def _run_browser_command(
command, " ".join(cmd_parts[:4]) + "...",
(result.stderr or "")[:200])

# Parse JSON output
if result.stdout.strip():
stdout_text = result.stdout.strip()

if stdout_text:
try:
parsed = json.loads(result.stdout.strip())
# Warn if snapshot came back empty (common sign of daemon/CDP issues)
parsed = json.loads(stdout_text)
if command == "snapshot" and parsed.get("success"):
snap_data = parsed.get("data", {})
if not snap_data.get("snapshot") and not snap_data.get("refs"):
Expand All @@ -879,13 +864,33 @@ def _run_browser_command(
"returncode=%s", result.returncode)
return parsed
except json.JSONDecodeError:
# Non-JSON output indicates agent-browser crash or version mismatch
raw = result.stdout.strip()[:500]
raw = stdout_text[:2000]
logger.warning("browser '%s' returned non-JSON output (rc=%s): %s",
command, result.returncode, raw[:200])
command, result.returncode, raw[:500])

if command == "screenshot":
stderr_text = (result.stderr or "").strip()
combined_text = "\n".join(
part for part in [stdout_text, stderr_text] if part
)
recovered_path = _extract_screenshot_path_from_text(combined_text)

if recovered_path and Path(recovered_path).exists():
logger.info(
"browser 'screenshot' recovered file from non-JSON output: %s",
recovered_path,
)
return {
"success": True,
"data": {
"path": recovered_path,
"raw": raw,
},
}

return {
"success": True,
"data": {"raw": raw}
"success": False,
"error": f"Non-JSON output from agent-browser for '{command}': {raw}"
}

# Check for errors
Expand Down Expand Up @@ -1258,38 +1263,19 @@ def browser_close(task_id: Optional[str] = None) -> str:
JSON string with close result
"""
effective_task_id = task_id or "default"

# Stop auto-recording before closing
_maybe_stop_recording(effective_task_id)

result = _run_browser_command(effective_task_id, "close", [])

# Close the backend session (Browserbase API in cloud mode, nothing extra in local mode)
session_key = task_id if task_id and task_id in _active_sessions else "default"
if session_key in _active_sessions:
session_info = _active_sessions[session_key]
bb_session_id = session_info.get("bb_session_id")
if bb_session_id:
# Cloud mode: release the Browserbase session via API
try:
config = _get_browserbase_config()
_close_browserbase_session(bb_session_id, config["api_key"], config["project_id"])
except Exception as e:
logger.warning("Could not close BrowserBase session: %s", e)
del _active_sessions[session_key]

if result.get("success"):
return json.dumps({
"success": True,
"closed": True
}, ensure_ascii=False)
else:
# Even if close fails, session was released
return json.dumps({
"success": True,
"closed": True,
"warning": result.get("error", "Session may not have been active")
}, ensure_ascii=False)

with _cleanup_lock:
had_session = effective_task_id in _active_sessions

cleanup_browser(effective_task_id)

response = {
"success": True,
"closed": True,
}
if not had_session:
response["warning"] = "Session may not have been active"
return json.dumps(response, ensure_ascii=False)


def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str:
Expand Down Expand Up @@ -1481,9 +1467,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
_cleanup_old_screenshots(screenshots_dir, max_age_hours=24)

# Take screenshot using agent-browser
screenshot_args = [str(screenshot_path)]
screenshot_args = []
if annotate:
screenshot_args.insert(0, "--annotate")
screenshot_args.append("--annotate")
screenshot_args.append("--full")
screenshot_args.append(str(screenshot_path))
result = _run_browser_command(
effective_task_id,
"screenshot",
Expand All @@ -1498,7 +1486,11 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
"success": False,
"error": f"Failed to take screenshot ({mode} mode): {error_detail}"
}, ensure_ascii=False)


actual_screenshot_path = result.get("data", {}).get("path")
if actual_screenshot_path:
screenshot_path = Path(actual_screenshot_path)

# Check if screenshot file was created
if not screenshot_path.exists():
mode = "local" if _is_local_mode() else "cloud"
Expand Down