diff --git a/webqa_agent/actions/action_executor.py b/webqa_agent/actions/action_executor.py
index a9dd6cd..3bb8085 100644
--- a/webqa_agent/actions/action_executor.py
+++ b/webqa_agent/actions/action_executor.py
@@ -2,6 +2,8 @@
import logging
from typing import Dict, List, Optional
+from webqa_agent.actions.action_handler import action_context_var
+
class ActionExecutor:
def __init__(self, action_handler):
@@ -66,31 +68,131 @@ async def _execute_clear(self, action):
"""Execute clear action on an input field."""
if not self._validate_params(action, ["locate.id"]):
return {"success": False, "message": "Missing locate.id for clear action"}
+
success = await self._actions.clear(action.get("locate").get("id"))
+
+ # Read action context for detailed error information
+ ctx = action_context_var.get()
+
if success:
return {"success": True, "message": "Clear action successful."}
else:
- return {"success": False, "message": "Clear action failed. The element might not be clearable."}
+ # Enrich error message with context
+ base_message = "Clear action failed."
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error
+ }
+
+ # Make message more specific based on error type
+ if ctx.error_type == "element_not_found":
+ base_message = "Clear failed: Element not found on page."
+ elif ctx.error_type == "element_not_typeable":
+ base_message = "Clear failed: Element cannot be cleared."
+ elif ctx.error_type == "playwright_error":
+ base_message = "Clear failed: Browser interaction error."
+ else:
+ base_message = "Clear action failed. The element might not be clearable."
+
+ return {
+ "success": False,
+ "message": base_message,
+ "error_details": error_details
+ }
async def _execute_tap(self, action):
"""Execute tap/click action."""
if not self._validate_params(action, ["locate.id"]):
return {"success": False, "message": "Missing locate.id for tap action"}
+
success = await self._actions.click(action.get("locate").get("id"))
+
+ # Read action context for detailed error information
+ ctx = action_context_var.get()
+
if success:
return {"success": True, "message": "Tap action successful."}
else:
- return {"success": False, "message": "Tap action failed. The element might not be clickable."}
+ # Enrich error message with context
+ base_message = "Tap action failed."
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error
+ }
+
+ # Make message more specific based on error type
+ if ctx.error_type == "scroll_failed":
+ base_message = f"Tap failed: Could not scroll element into viewport after {ctx.scroll_attempts} attempts."
+ elif ctx.error_type == "scroll_timeout_lazy_loading":
+ base_message = f"Tap failed: Element viewport positioning succeeded but page content unstable after {ctx.scroll_attempts} attempts."
+ elif ctx.error_type == "element_not_found":
+ base_message = f"Tap failed: Element not found on page."
+ elif ctx.error_type == "element_not_clickable":
+ base_message = f"Tap failed: Element exists but is not clickable."
+ elif ctx.error_type == "playwright_error":
+ base_message = f"Tap failed: Browser interaction error."
+ else:
+ base_message = "Tap action failed. The element might not be clickable."
+
+ return {
+ "success": False,
+ "message": base_message,
+ "error_details": error_details # NEW: additional metadata, won't break existing consumers
+ }
async def _execute_hover(self, action):
"""Execute hover action."""
if not self._validate_params(action, ["locate.id"]):
return {"success": False, "message": "Missing locate.id for hover action"}
+
success = await self._actions.hover(action.get("locate").get("id"))
+
+ # Read action context for detailed error information
+ ctx = action_context_var.get()
+
if success:
return {"success": True, "message": "Hover action successful."}
else:
- return {"success": False, "message": "Hover action failed. The element might not be hoverable."}
+ # Enrich error message with context
+ base_message = "Hover action failed."
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error
+ }
+
+ # Make message more specific based on error type
+ if ctx.error_type == "scroll_failed":
+ base_message = f"Hover failed: Could not scroll element into viewport after {ctx.scroll_attempts} attempts."
+ elif ctx.error_type == "element_not_found":
+ base_message = f"Hover failed: Element not found on page or missing coordinates."
+ elif ctx.error_type == "playwright_error":
+ base_message = f"Hover failed: Browser interaction error."
+ else:
+ base_message = "Hover action failed. The element might not be hoverable."
+
+ return {
+ "success": False,
+ "message": base_message,
+ "error_details": error_details
+ }
async def _execute_sleep(self, action):
"""Execute sleep/wait action."""
@@ -110,12 +212,44 @@ async def _execute_input(self, action):
success = await self._actions.type(
action.get("locate").get("id"), value, clear_before_type=clear_before_type
)
+
+ # Read action context for detailed error information
+ ctx = action_context_var.get()
+
if success:
return {"success": True, "message": "Input action successful."}
else:
+ # Enrich error message with context
+ base_message = "Input action failed."
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error
+ }
+
+ # Make message more specific based on error type
+ if ctx.error_type == "scroll_failed":
+ base_message = f"Input failed: Could not scroll element into viewport after {ctx.scroll_attempts} attempts."
+ elif ctx.error_type == "element_not_found":
+ base_message = f"Input failed: Element not found on page."
+ elif ctx.error_type == "element_not_typeable":
+ base_message = f"Input failed: Element exists but cannot accept text input."
+ elif ctx.error_type == "element_not_clickable":
+ base_message = f"Input failed: Could not focus element for typing."
+ elif ctx.error_type == "playwright_error":
+ base_message = f"Input failed: Browser interaction error."
+ else:
+ base_message = "Input action failed. The element might not be available for typing."
+
return {
"success": False,
- "message": "Input action failed. The element might not be available for typing.",
+ "message": base_message,
+ "error_details": error_details
}
except Exception as e:
logging.error(f"Action '_execute_input' execution failed: {str(e)}")
@@ -139,11 +273,39 @@ async def _execute_keyboard_press(self, action):
"""Execute keyboard press action."""
if not self._validate_params(action, ["param.value"]):
return {"success": False, "message": "Missing param.value for keyboard press action"}
+
success = await self._actions.keyboard_press(action.get("param").get("value"))
+
+ # Read action context for detailed error information
+ ctx = action_context_var.get()
+
if success:
return {"success": True, "message": "Keyboard press successful."}
else:
- return {"success": False, "message": "Keyboard press failed."}
+ # Enrich error message with context
+ base_message = "Keyboard press failed."
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error
+ }
+
+ # Make message more specific based on error type
+ if ctx.error_type == "playwright_error":
+ base_message = "Keyboard press failed: Browser interaction error."
+ else:
+ base_message = "Keyboard press failed."
+
+ return {
+ "success": False,
+ "message": base_message,
+ "error_details": error_details
+ }
async def _execute_get_new_page(self):
"""Execute get new page action."""
@@ -157,11 +319,43 @@ async def _execute_upload(self, action, file_path):
"""Execute upload action."""
if not self._validate_params(action, ["locate.id"]):
return {"success": False, "message": "Missing locate.id for upload action"}
+
success = await self._actions.upload_file(action.get("locate").get("id"), file_path)
+
+ # Read action context for detailed error information
+ ctx = action_context_var.get()
+
if success:
return {"success": True, "message": "File upload successful."}
else:
- return {"success": False, "message": "File upload failed."}
+ # Enrich error message with context
+ base_message = "File upload failed."
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error
+ }
+
+ # Make message more specific based on error type
+ if ctx.error_type == "file_upload_failed":
+ base_message = "File upload failed: Operation error."
+ elif ctx.error_type == "element_not_found":
+ base_message = "File upload failed: No file input element found on page."
+ elif ctx.error_type == "playwright_error":
+ base_message = "File upload failed: Browser interaction error."
+ else:
+ base_message = "File upload failed."
+
+ return {
+ "success": False,
+ "message": base_message,
+ "error_details": error_details
+ }
async def _execute_select_dropdown(self, action):
"""Execute select dropdown action."""
@@ -299,34 +493,172 @@ async def _execute_go_to_page(self, action):
page = getattr(self._actions, 'page', None)
if page:
navigation_performed = await self._actions.smart_navigate_to_page(page, url)
- message = "Navigated to page" if navigation_performed else "Already on target page"
- return {"success": True, "message": message}
+
+ # Read action context for detailed error information
+ ctx = action_context_var.get()
+
+ if navigation_performed or navigation_performed is None:
+ message = "Navigated to page" if navigation_performed else "Already on target page"
+ return {"success": True, "message": message}
+ else:
+ # Navigation failed, enrich error message with context
+ base_message = "Navigation to page failed."
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error
+ }
+
+ # Make message more specific based on error type
+ if ctx.error_type == "playwright_error":
+ base_message = f"Navigation failed: Browser interaction error."
+ else:
+ base_message = f"Navigation failed: {ctx.error_reason or 'Unknown reason'}"
+
+ return {
+ "success": False,
+ "message": base_message,
+ "error_details": error_details
+ }
# Fallback to regular navigation
if hasattr(self._actions, 'go_to_page') and hasattr(self._actions, 'page'):
await self._actions.go_to_page(self._actions.page, url)
- return {"success": True, "message": "Successfully navigated to page"}
+
+ # Read action context for detailed error information
+ ctx = action_context_var.get()
+
+ # Check if navigation succeeded by checking context
+ if not ctx or not ctx.error_type:
+ return {"success": True, "message": "Successfully navigated to page"}
+ else:
+ # Navigation failed, enrich error message with context
+ base_message = "Navigation to page failed."
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error
+ }
+
+ if ctx.error_type == "playwright_error":
+ base_message = f"Navigation failed: Browser interaction error."
+ else:
+ base_message = f"Navigation failed: {ctx.error_reason or 'Unknown reason'}"
+
+ return {
+ "success": False,
+ "message": base_message,
+ "error_details": error_details
+ }
return {"success": False, "message": "Navigation method not available"}
except Exception as e:
logging.error(f"Go to page action failed: {str(e)}")
- return {"success": False, "message": f"Navigation failed: {str(e)}", "playwright_error": str(e)}
+
+ # Read action context for any additional error information
+ ctx = action_context_var.get()
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error or str(e)
+ }
+ else:
+ error_details = {
+ "error_type": "playwright_error",
+ "error_reason": "Navigation failed with an exception",
+ "attempted_strategies": [],
+ "element_info": {},
+ "playwright_error": str(e)
+ }
+
+ return {
+ "success": False,
+ "message": f"Navigation failed: {str(e)}",
+ "error_details": error_details
+ }
async def _execute_go_back(self):
"""Execute browser back navigation action."""
try:
if hasattr(self._actions, 'go_back'):
success = await self._actions.go_back()
+
+ # Read action context for detailed error information
+ ctx = action_context_var.get()
+
if success:
return {"success": True, "message": "Successfully navigated back to previous page"}
else:
- return {"success": False, "message": "Go back navigation failed"}
+ # Navigation failed, enrich error message with context
+ base_message = "Go back navigation failed."
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error
+ }
+
+ # Make message more specific based on error type
+ if ctx.error_type == "playwright_error":
+ base_message = f"Go back failed: Browser interaction error."
+ else:
+ base_message = f"Go back failed: {ctx.error_reason or 'Unknown reason'}"
+ else:
+ base_message = "Go back navigation failed. No previous page in history or navigation not possible."
+
+ return {
+ "success": False,
+ "message": base_message,
+ "error_details": error_details
+ }
else:
return {"success": False, "message": "Go back action not supported by action handler"}
except Exception as e:
logging.error(f"Go back action failed: {str(e)}")
- return {"success": False, "message": f"Go back failed: {str(e)}", "playwright_error": str(e)}
+
+ # Read action context for any additional error information
+ ctx = action_context_var.get()
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error or str(e)
+ }
+ else:
+ error_details = {
+ "error_type": "playwright_error",
+ "error_reason": "Go back navigation failed with an exception",
+ "attempted_strategies": [],
+ "element_info": {},
+ "playwright_error": str(e)
+ }
+
+ return {
+ "success": False,
+ "message": f"Go back failed: {str(e)}",
+ "error_details": error_details
+ }
async def _execute_mouse(self, action):
"""Unified mouse action supporting move and wheel.
@@ -340,9 +672,9 @@ async def _execute_mouse(self, action):
param = action.get("param")
if not param or not isinstance(param, dict):
return {"success": False, "message": "Missing or invalid param for mouse action"}
-
+
op = param.get("op")
-
+
# Auto-detect if op not provided or empty
if not op:
if "x" in param and "y" in param:
@@ -355,39 +687,123 @@ async def _execute_mouse(self, action):
if op == "move":
if not self._validate_params(action, ["param.x", "param.y"]):
return {"success": False, "message": "Missing x or y coordinates for mouse move"}
-
+
x = param.get("x")
y = param.get("y")
-
+
# Validate coordinates are numbers
if not isinstance(x, (int, float)) or not isinstance(y, (int, float)):
return {"success": False, "message": "x and y coordinates must be numbers"}
-
+
success = await self._actions.mouse_move(x, y)
+
+ # Read action context for detailed error information
+ ctx = action_context_var.get()
+
if success:
return {"success": True, "message": f"Mouse moved to ({x}, {y})"}
else:
- return {"success": False, "message": "Mouse move action failed"}
+ # Mouse move failed, enrich error message with context
+ base_message = "Mouse move action failed."
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error
+ }
+
+ # Make message more specific based on error type
+ if ctx.error_type == "playwright_error":
+ base_message = f"Mouse move failed: Browser interaction error."
+ else:
+ base_message = f"Mouse move failed: {ctx.error_reason or 'Unknown reason'}"
+ else:
+ base_message = f"Mouse move to ({x}, {y}) failed. The operation might not be supported."
+
+ return {
+ "success": False,
+ "message": base_message,
+ "error_details": error_details
+ }
elif op == "wheel":
# Default missing keys to 0
dx = param.get("deltaX", 0)
dy = param.get("deltaY", 0)
-
+
# Validate deltas are numbers
if not isinstance(dx, (int, float)) or not isinstance(dy, (int, float)):
return {"success": False, "message": "deltaX and deltaY must be numbers"}
-
+
success = await self._actions.mouse_wheel(dx, dy)
+
+ # Read action context for detailed error information
+ ctx = action_context_var.get()
+
if success:
return {"success": True, "message": f"Mouse wheel scrolled (deltaX: {dx}, deltaY: {dy})"}
else:
- return {"success": False, "message": "Mouse wheel action failed"}
+ # Mouse wheel failed, enrich error message with context
+ base_message = "Mouse wheel action failed."
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error
+ }
+
+ # Make message more specific based on error type
+ if ctx.error_type == "playwright_error":
+ base_message = f"Mouse wheel scroll failed: Browser interaction error."
+ else:
+ base_message = f"Mouse wheel scroll failed: {ctx.error_reason or 'Unknown reason'}"
+ else:
+ base_message = f"Mouse wheel scroll (deltaX: {dx}, deltaY: {dy}) failed. The operation might not be supported."
+
+ return {
+ "success": False,
+ "message": base_message,
+ "error_details": error_details
+ }
else:
logging.error(f"Unknown mouse op: {op}. Expected 'move' or 'wheel'.")
return {"success": False, "message": f"Unknown mouse operation: {op}. Expected 'move' or 'wheel'"}
-
+
except Exception as e:
logging.error(f"Mouse action execution failed: {str(e)}")
- return {"success": False, "message": f"Mouse action failed with an exception: {e}"}
+
+ # Read action context for any additional error information
+ ctx = action_context_var.get()
+ error_details = {}
+
+ if ctx and ctx.error_type:
+ error_details = {
+ "error_type": ctx.error_type,
+ "error_reason": ctx.error_reason,
+ "attempted_strategies": ctx.attempted_strategies,
+ "element_info": ctx.element_info,
+ "playwright_error": ctx.playwright_error or str(e)
+ }
+ else:
+ error_details = {
+ "error_type": "playwright_error",
+ "error_reason": "Mouse action failed with an exception",
+ "attempted_strategies": [],
+ "element_info": {},
+ "playwright_error": str(e)
+ }
+
+ return {
+ "success": False,
+ "message": f"Mouse action failed with an exception: {e}",
+ "error_details": error_details
+ }
diff --git a/webqa_agent/actions/action_handler.py b/webqa_agent/actions/action_handler.py
index bc88718..fe9ff30 100644
--- a/webqa_agent/actions/action_handler.py
+++ b/webqa_agent/actions/action_handler.py
@@ -1,8 +1,9 @@
-import asyncio
import base64
import json
import os
import re
+from contextvars import ContextVar
+from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union
from playwright.async_api import Page
@@ -10,6 +11,58 @@
from webqa_agent.browser.driver import *
+# ===== Action Context Infrastructure for Error Propagation =====
+
+action_context_var: ContextVar[Optional['ActionContext']] = ContextVar('action_context', default=None)
+
+
+@dataclass
+class ActionContext:
+ """Stores detailed error context for action execution.
+
+ This context is propagated through the execution chain using contextvars,
+ allowing detailed error information to be passed without changing return types.
+ """
+ error_type: Optional[str] = None
+ error_reason: Optional[str] = None
+ attempted_strategies: List[str] = field(default_factory=list)
+ element_info: Dict[str, Any] = field(default_factory=dict)
+ scroll_attempts: int = 0
+ max_scroll_attempts: int = 0
+ playwright_error: Optional[str] = None
+
+ def set_error(self, error_type: str, reason: str, **kwargs):
+ """Set error information with optional additional fields."""
+ self.error_type = error_type
+ self.error_reason = reason
+ for key, value in kwargs.items():
+ setattr(self, key, value)
+
+ def reset(self):
+ """Reset context for a new action."""
+ self.error_type = None
+ self.error_reason = None
+ self.attempted_strategies = []
+ self.element_info = {}
+ self.scroll_attempts = 0
+ self.max_scroll_attempts = 0
+ self.playwright_error = None
+
+
+# Error type constants for consistent classification
+ERROR_SCROLL_FAILED = "scroll_failed"
+ERROR_SCROLL_TIMEOUT = "scroll_timeout_lazy_loading"
+ERROR_ELEMENT_NOT_FOUND = "element_not_found"
+ERROR_NOT_CLICKABLE = "element_not_clickable"
+ERROR_NOT_TYPEABLE = "element_not_typeable"
+ERROR_ELEMENT_OBSCURED = "element_obscured"
+ERROR_DROPDOWN_NO_MATCH = "dropdown_no_match"
+ERROR_DROPDOWN_NOT_FOUND = "dropdown_not_found"
+ERROR_FILE_UPLOAD_FAILED = "file_upload_failed"
+ERROR_ACTION_TIMEOUT = "action_timeout"
+ERROR_PLAYWRIGHT = "playwright_error"
+
+
class ActionHandler:
def __init__(self):
self.page_data = {}
@@ -258,7 +311,242 @@ async def perform_scroll(): # Execute scroll operation
return True
+ async def ensure_element_in_viewport(self, element_id: str, max_retries: int = 3, base_wait_time: float = 0.5) -> bool:
+ """Ensure element is in viewport by scrolling if needed with enhanced edge case handling.
+
+ This method enables full-page planning mode where elements can be planned
+ from a full-page screenshot but may be outside the viewport during execution.
+
+ Handles edge cases:
+ - Lazy-loaded content that appears after scrolling
+ - Infinite scroll pages with dynamic content
+ - Slow-loading pages with delayed element rendering
+
+ Args:
+ element_id: Element ID to scroll to
+ max_retries: Maximum retry attempts for lazy-loaded content (default: 3)
+ base_wait_time: Base wait time in seconds, will be adaptive (default: 0.5)
+
+ Returns:
+ bool: True if element is in viewport (or successfully scrolled to), False otherwise
+ """
+ # Initialize action context for error propagation
+ ctx = ActionContext()
+ action_context_var.set(ctx)
+ ctx.max_scroll_attempts = max_retries
+ ctx.element_info = {"element_id": element_id, "action": "ensure_viewport"}
+
+ element = self.page_element_buffer.get(str(element_id))
+ if not element:
+ logging.warning(f'Element {element_id} not found in buffer for viewport check')
+ ctx.set_error(
+ ERROR_ELEMENT_NOT_FOUND,
+ f"Element {element_id} not found in page element buffer",
+ element_id=element_id
+ )
+ return False
+
+ # Check if element is already in viewport
+ is_in_viewport = element.get('isInViewport', True)
+ if is_in_viewport:
+ logging.debug(f'Element {element_id} already in viewport, no scroll needed')
+ return True
+
+ logging.info(f'Element {element_id} is outside viewport, scrolling to make it visible')
+
+ # Get element selectors
+ selector = element.get('selector')
+ xpath = element.get('xpath')
+
+ # Retry loop for handling lazy-loaded content
+ for attempt in range(max_retries):
+ try:
+ ctx.scroll_attempts = attempt + 1
+ # Adaptive wait time increases with retries for slow-loading content
+ current_wait_time = base_wait_time * (1 + attempt * 0.5)
+
+ # Strategy 1: Use Playwright's scroll_into_view_if_needed (most reliable)
+ if self._is_valid_css_selector(selector):
+ try:
+ ctx.attempted_strategies.append(f"css_selector_attempt_{attempt + 1}")
+ await self.page.locator(selector).scroll_into_view_if_needed(timeout=5000)
+ logging.debug(f'Scrolled to element {element_id} using CSS selector (attempt {attempt + 1})')
+
+ # Wait for scroll animation + potential lazy-loading
+ await asyncio.sleep(current_wait_time)
+
+ # Verify page stability after scroll (for dynamic content)
+ await self._wait_for_page_stability()
+ return True
+ except Exception as css_error:
+ ctx.playwright_error = str(css_error)
+ if attempt < max_retries - 1:
+ logging.debug(f'CSS selector scroll failed on attempt {attempt + 1}: {css_error}, retrying...')
+ await asyncio.sleep(current_wait_time)
+ continue
+ else:
+ logging.debug(f'CSS selector scroll failed after {max_retries} attempts: {css_error}, trying XPath')
+
+ # Strategy 2: Try XPath if CSS fails
+ if xpath:
+ try:
+ ctx.attempted_strategies.append(f"xpath_attempt_{attempt + 1}")
+ await self.page.locator(f'xpath={xpath}').scroll_into_view_if_needed(timeout=5000)
+ logging.debug(f'Scrolled to element {element_id} using XPath (attempt {attempt + 1})')
+
+ # Wait for scroll animation + potential lazy-loading
+ await asyncio.sleep(current_wait_time)
+
+ # Verify page stability after scroll
+ await self._wait_for_page_stability()
+ return True
+ except Exception as xpath_error:
+ ctx.playwright_error = str(xpath_error)
+ if attempt < max_retries - 1:
+ logging.debug(f'XPath scroll failed on attempt {attempt + 1}: {xpath_error}, retrying...')
+ await asyncio.sleep(current_wait_time)
+ continue
+ else:
+ logging.debug(f'XPath scroll failed after {max_retries} attempts: {xpath_error}, trying coordinate-based scroll')
+
+ # Strategy 3: Fallback to coordinate-based scrolling with retry support
+ center_y = element.get('center_y')
+ if center_y is not None:
+ ctx.attempted_strategies.append(f"coordinates_attempt_{attempt + 1}")
+ viewport_height = await self.page.evaluate('window.innerHeight')
+ current_scroll_y = await self.page.evaluate('window.scrollY')
+
+ # Calculate target scroll position (center element in viewport)
+ target_scroll_y = center_y - viewport_height / 2
+ target_scroll_y = max(0, target_scroll_y) # Don't scroll above page top
+
+ # Log scroll operation for debugging
+ logging.debug(f'Scrolling element {element_id}: current scroll position={current_scroll_y}, target scroll position={target_scroll_y}')
+
+ # Perform scroll with smooth behavior
+ await self.page.evaluate(f'window.scrollTo({{top: {target_scroll_y}, behavior: "smooth"}})')
+ logging.debug(f'Scrolled to element {element_id} using coordinates (y={target_scroll_y}, attempt {attempt + 1})')
+
+ # Adaptive wait time for smooth scroll + lazy loading
+ await asyncio.sleep(current_wait_time + 0.3) # Extra time for smooth scroll
+
+ # Verify page stability after scroll
+ page_stable = await self._wait_for_page_stability()
+ if not page_stable:
+ # Page not stable, likely lazy-loading
+ if attempt == max_retries - 1:
+ ctx.set_error(
+ ERROR_SCROLL_TIMEOUT,
+ f"Element {element_id} viewport positioning succeeded but page content unstable after {max_retries} attempts, possible lazy-loading or infinite scroll",
+ selector=selector,
+ xpath=xpath,
+ center_y=center_y
+ )
+ return True
+
+ # If all strategies failed but we have more retries, wait and continue
+ if attempt < max_retries - 1:
+ logging.debug(f'All scroll strategies failed on attempt {attempt + 1}, waiting before retry...')
+ await asyncio.sleep(current_wait_time * 2) # Longer wait between full retry cycles
+ continue
+
+ except Exception as e:
+ ctx.playwright_error = str(e)
+ if attempt < max_retries - 1:
+ logging.warning(f'Error scrolling to element {element_id} on attempt {attempt + 1}: {e}, retrying...')
+ await asyncio.sleep(current_wait_time)
+ continue
+ else:
+ logging.error(f'Error scrolling to element {element_id} after {max_retries} attempts: {e}')
+ ctx.set_error(
+ ERROR_SCROLL_FAILED,
+ f"All scroll strategies failed after {max_retries} attempts with exception: {str(e)}",
+ selector=selector,
+ xpath=xpath
+ )
+ return False
+
+ # Final failure: all retries exhausted
+ logging.warning(f'Could not scroll to element {element_id} after {max_retries} attempts: no valid selectors or all strategies failed')
+ ctx.set_error(
+ ERROR_SCROLL_FAILED,
+ f"Could not scroll to element after {max_retries} attempts: no valid selectors or all scroll strategies (CSS, XPath, coordinates) failed",
+ selector=selector,
+ xpath=xpath,
+ has_valid_selector=self._is_valid_css_selector(selector) if selector else False,
+ has_xpath=xpath is not None,
+ has_coordinates=element.get('center_y') is not None
+ )
+ return False
+
+ async def _wait_for_page_stability(self, timeout: float = 2.0, check_interval: float = 0.5) -> bool:
+ """Wait for page to stabilize after scroll (handles lazy-loading and dynamic content).
+
+ Args:
+ timeout: Maximum time to wait for stability (default: 2.0 seconds)
+ check_interval: Interval between stability checks (default: 0.5 seconds)
+
+ Returns:
+ bool: True if page stabilized, False if timeout reached
+ """
+ try:
+ elapsed = 0.0
+ last_height = await self.page.evaluate('document.body.scrollHeight')
+
+ while elapsed < timeout:
+ await asyncio.sleep(check_interval)
+ elapsed += check_interval
+
+ current_height = await self.page.evaluate('document.body.scrollHeight')
+
+ # If page height hasn't changed, consider it stable
+ if current_height == last_height:
+ logging.debug(f'Page stabilized after {elapsed:.1f}s')
+ return True
+
+ last_height = current_height
+
+ logging.debug(f'Page stability timeout after {timeout}s (content may still be loading)')
+ return False
+
+ except Exception as e:
+ logging.warning(f'Error checking page stability: {e}')
+ return False
+
+ async def _convert_document_to_viewport_coords(self, x: float, y: float) -> tuple[float, float]:
+ """Convert document coordinates to viewport coordinates.
+
+ Document coordinates are relative to the entire page (top-left of document).
+ Viewport coordinates are relative to the visible area (top-left of viewport).
+
+ Playwright's mouse operations use viewport coordinates, while our crawler
+ captures document coordinates. This method performs the necessary conversion.
+
+ Args:
+ x: Document X coordinate (from element center_x)
+ y: Document Y coordinate (from element center_y)
+
+ Returns:
+ Tuple of (viewport_x, viewport_y)
+
+ Example:
+ # Element at document position (500, 1200) with scroll at (0, 800)
+ # viewport_y = 1200 - 800 = 400 (element is 400px from top of viewport)
+ """
+ scroll_x = await self.page.evaluate('window.pageXOffset || document.documentElement.scrollLeft')
+ scroll_y = await self.page.evaluate('window.pageYOffset || document.documentElement.scrollTop')
+ viewport_x = x - scroll_x
+ viewport_y = y - scroll_y
+ return (viewport_x, viewport_y)
+
async def click(self, id) -> bool:
+ # Initialize action context for error propagation
+ # Note: If ensure_element_in_viewport is called, it will set its own context
+ # We only need to initialize context for click-specific failures
+ ctx = ActionContext()
+ action_context_var.set(ctx)
+ ctx.element_info = {"element_id": str(id), "action": "click"}
+
# Inject JavaScript into the page to remove the target attribute from all links
js = """
links = document.getElementsByTagName("a");
@@ -273,6 +561,11 @@ async def click(self, id) -> bool:
element = self.page_element_buffer.get(id)
if not element:
logging.error(f'Element with id {id} not found in buffer for click action.')
+ ctx.set_error(
+ ERROR_ELEMENT_NOT_FOUND,
+ f"Element {id} not found in page element buffer for click action",
+ element_id=id
+ )
return False
logging.debug(
@@ -281,9 +574,34 @@ async def click(self, id) -> bool:
except Exception as e:
logging.error(f'failed to get element {id}, element: {self.page_element_buffer.get(id)}, error: {e}')
+ ctx.set_error(
+ ERROR_PLAYWRIGHT,
+ f"Exception while retrieving element {id} from buffer: {str(e)}",
+ element_id=id,
+ playwright_error=str(e)
+ )
return False
- return await self.click_using_coordinates(element, id)
+ # Ensure element is in viewport before clicking (for full-page planning mode)
+ if not await self.ensure_element_in_viewport(id):
+ logging.error(f'Cannot click element {id}: failed to scroll element into viewport after multiple attempts')
+ # Context already populated by ensure_element_in_viewport, preserve it
+ return False
+
+ # Attempt click - if it fails, populate context with click-specific error
+ click_result = await self.click_using_coordinates(element, id)
+ if not click_result:
+ # Get current context to check if error already set by click_using_coordinates
+ current_ctx = action_context_var.get()
+ if current_ctx and not current_ctx.error_type:
+ current_ctx.set_error(
+ ERROR_NOT_CLICKABLE,
+ f"Element {id} found and in viewport, but click action failed",
+ element_id=id,
+ tag_name=element.get('tagName'),
+ selector=element.get('selector')
+ )
+ return click_result
async def click_using_coordinates(self, element, id) -> bool:
"""Helper function to click using coordinates."""
@@ -291,11 +609,13 @@ async def click_using_coordinates(self, element, id) -> bool:
y = element.get('center_y')
try:
if x is not None and y is not None:
- logging.debug(f'mouse click at element {id}, coordinate=({x}, {y})')
+ # Convert document coordinates to viewport coordinates
+ viewport_x, viewport_y = await self._convert_document_to_viewport_coords(x, y)
+ logging.debug(f'Mouse click at element {id}, document coordinates=({x}, {y}), viewport coordinates=({viewport_x}, {viewport_y})')
try:
- await self.page.mouse.click(x, y)
+ await self.page.mouse.click(viewport_x, viewport_y)
except Exception as e:
- logging.error(f'mouse click error: {e}\nwith coordinates: ({x}, {y})')
+ logging.error(f'Mouse click error: {e}\nDocument coordinates: ({x}, {y}), Viewport coordinates: ({viewport_x}, {viewport_y})')
return True
else:
logging.error('Coordinates not found in element data')
@@ -305,27 +625,59 @@ async def click_using_coordinates(self, element, id) -> bool:
return False
async def hover(self, id) -> bool:
+ # Initialize action context for error propagation
+ ctx = ActionContext()
+ action_context_var.set(ctx)
+ ctx.element_info = {"element_id": str(id), "action": "hover"}
+
element = self.page_element_buffer.get(str(id))
if not element:
logging.error(f'Element with id {id} not found in buffer for hover action.')
+ ctx.set_error(
+ ERROR_ELEMENT_NOT_FOUND,
+ f"Element {id} not found in page element buffer for hover action",
+ element_id=id
+ )
return False
logging.debug(
f"Attempting to hover over element: id={id}, tagName='{element.get('tagName')}', innerText='{element.get('innerText', '').strip()[:50]}', selector='{element.get('selector')}'"
)
- scroll_y = await self.page.evaluate('() => window.scrollY')
+ # Ensure element is in viewport before hovering (for full-page planning mode)
+ if not await self.ensure_element_in_viewport(str(id)):
+ logging.error(f'Cannot hover over element {id}: failed to scroll element into viewport after multiple attempts')
+ # Context already populated by ensure_element_in_viewport, preserve it
+ return False
- x = element.get('center_x')
- y = element.get('center_y')
- if x is not None and y is not None:
- y = y - scroll_y
- logging.debug(f'mouse hover at ({x}, {y})')
- await self.page.mouse.move(x, y)
- await asyncio.sleep(0.5)
- return True
- else:
- logging.error('Coordinates not found in element data')
+ try:
+ x = element.get('center_x')
+ y = element.get('center_y')
+ if x is not None and y is not None:
+ # Convert document coordinates to viewport coordinates
+ viewport_x, viewport_y = await self._convert_document_to_viewport_coords(x, y)
+ logging.debug(f'Mouse hover at element {id}, document coordinates=({x}, {y}), viewport coordinates=({viewport_x}, {viewport_y})')
+ await self.page.mouse.move(viewport_x, viewport_y)
+ await asyncio.sleep(0.5)
+ return True
+ else:
+ logging.error('Coordinates not found in element data')
+ ctx.set_error(
+ ERROR_ELEMENT_NOT_FOUND,
+ f"Element {id} missing coordinate information (center_x or center_y)",
+ element_id=id,
+ has_center_x=x is not None,
+ has_center_y=element.get('center_y') is not None
+ )
+ return False
+ except Exception as e:
+ logging.error(f'Hover action failed for element {id}: {e}')
+ ctx.set_error(
+ ERROR_PLAYWRIGHT,
+ f"Hover action failed with exception: {str(e)}",
+ element_id=id,
+ playwright_error=str(e)
+ )
return False
async def wait(self, timeMs) -> bool:
@@ -345,16 +697,32 @@ async def wait(self, timeMs) -> bool:
async def type(self, id, text, clear_before_type: bool = False) -> bool:
"""Types text into the specified element, optionally clearing it
first."""
+ # Initialize action context for error propagation
+ ctx = ActionContext()
+ action_context_var.set(ctx)
+ ctx.element_info = {"element_id": str(id), "action": "type", "text_length": len(text), "clear_before_type": clear_before_type}
+
try:
element = self.page_element_buffer.get(str(id))
if not element:
logging.error(f'Element with id {id} not found in buffer for type action.')
+ ctx.set_error(
+ ERROR_ELEMENT_NOT_FOUND,
+ f"Element {id} not found in page element buffer for type action",
+ element_id=id
+ )
return False
logging.debug(
f"Attempting to type into element: id={id}, tagName='{element.get('tagName')}', innerText='{element.get('innerText', '').strip()[:50]}', selector='{element.get('selector')}', clear_before_type={clear_before_type}"
)
+ # Ensure element is in viewport before typing (for full-page planning mode)
+ if not await self.ensure_element_in_viewport(str(id)):
+ logging.error(f'Cannot type into element {id}: failed to scroll element into viewport after multiple attempts')
+ # Context already populated by ensure_element_in_viewport, preserve it
+ return False
+
if clear_before_type:
if not await self.clear(id):
logging.warning(f'Failed to clear element {id} before typing, but will attempt to type anyway.')
@@ -362,10 +730,24 @@ async def type(self, id, text, clear_before_type: bool = False) -> bool:
# click element to get focus
try:
if not await self.click(str(id)):
+ # Context already populated by click(), check and enhance if needed
+ current_ctx = action_context_var.get()
+ if current_ctx and not current_ctx.error_type:
+ current_ctx.set_error(
+ ERROR_NOT_CLICKABLE,
+ f"Cannot type into element {id}: failed to click element for focus",
+ element_id=id
+ )
return False
except Exception as e:
logging.error(f"Error 'type' clicking using coordinates: {e}")
logging.error(f'id type {type(id)}, id: {id}')
+ ctx.set_error(
+ ERROR_PLAYWRIGHT,
+ f"Exception while clicking element {id} to focus for typing: {str(e)}",
+ element_id=id,
+ playwright_error=str(e)
+ )
return False
await asyncio.sleep(1)
@@ -380,6 +762,7 @@ async def type(self, id, text, clear_before_type: bool = False) -> bool:
logging.debug(f"Typed '{text}' into element {id} using CSS selector: {selector}")
except Exception as css_error:
logging.warning(f'CSS selector type failed for element {id}: {css_error}')
+ ctx.playwright_error = str(css_error)
# CSS selector failed, try XPath
xpath = element.get('xpath')
if xpath:
@@ -390,9 +773,26 @@ async def type(self, id, text, clear_before_type: bool = False) -> bool:
logging.error(
f'Both CSS and XPath type failed for element {id}. CSS error: {css_error}, XPath error: {xpath_error}'
)
+ ctx.set_error(
+ ERROR_NOT_TYPEABLE,
+ f"Both CSS selector and XPath strategies failed to type into element {id}",
+ element_id=id,
+ selector=selector,
+ xpath=xpath,
+ css_error=str(css_error),
+ xpath_error=str(xpath_error)
+ )
return False
else:
logging.error(f'CSS selector type failed and no XPath available for element {id}')
+ ctx.set_error(
+ ERROR_NOT_TYPEABLE,
+ f"CSS selector failed to type into element {id} and no XPath fallback available",
+ element_id=id,
+ selector=selector,
+ has_xpath=False,
+ playwright_error=str(css_error)
+ )
return False
else:
logging.warning(f'Invalid CSS selector format for element {id}: {selector}')
@@ -404,15 +804,36 @@ async def type(self, id, text, clear_before_type: bool = False) -> bool:
logging.debug(f"Typed '{text}' into element {id} using XPath: {xpath}")
except Exception as xpath_error:
logging.error(f'XPath type failed for element {id}: {xpath_error}')
+ ctx.set_error(
+ ERROR_NOT_TYPEABLE,
+ f"XPath strategy failed to type into element {id} (invalid CSS selector)",
+ element_id=id,
+ selector=selector,
+ xpath=xpath,
+ playwright_error=str(xpath_error)
+ )
return False
else:
logging.error(f'Invalid CSS selector and no XPath available for element {id}')
+ ctx.set_error(
+ ERROR_NOT_TYPEABLE,
+ f"Invalid CSS selector format and no XPath available for element {id}",
+ element_id=id,
+ selector=selector,
+ has_xpath=False
+ )
return False
await asyncio.sleep(1)
return True
except Exception as e:
logging.error(f'Failed to type into element {id}: {e}')
+ ctx.set_error(
+ ERROR_PLAYWRIGHT,
+ f"Unexpected exception during type action: {str(e)}",
+ element_id=id,
+ playwright_error=str(e)
+ )
return False
@staticmethod
@@ -460,10 +881,20 @@ def _is_valid_css_selector(selector: str) -> bool:
async def clear(self, id) -> bool:
"""Clears the text in the specified input element."""
+ # Initialize action context for error propagation
+ ctx = ActionContext()
+ action_context_var.set(ctx)
+ ctx.element_info = {"element_id": str(id), "action": "clear"}
+
try:
element_to_clear = self.page_element_buffer.get(str(id))
if not element_to_clear:
logging.error(f'Element with id {id} not found in buffer for clear action.')
+ ctx.set_error(
+ ERROR_ELEMENT_NOT_FOUND,
+ f"Element {id} not found in page element buffer for clear action",
+ element_id=id
+ )
return False
logging.debug(
@@ -490,6 +921,7 @@ async def clear(self, id) -> bool:
logging.debug(f'Cleared input for element {id} using CSS selector: {selector}')
except Exception as css_error:
logging.warning(f'CSS selector clear failed for element {id}: {css_error}')
+ ctx.playwright_error = str(css_error)
# CSS selector failed, try XPath
xpath = element_to_clear.get('xpath')
if xpath:
@@ -500,9 +932,26 @@ async def clear(self, id) -> bool:
logging.error(
f'Both CSS and XPath clear failed for element {id}. CSS error: {css_error}, XPath error: {xpath_error}'
)
+ ctx.set_error(
+ ERROR_NOT_TYPEABLE,
+ f"Both CSS selector and XPath strategies failed to clear element {id}",
+ element_id=id,
+ selector=selector,
+ xpath=xpath,
+ css_error=str(css_error),
+ xpath_error=str(xpath_error)
+ )
return False
else:
logging.error(f'CSS selector clear failed and no XPath available for element {id}')
+ ctx.set_error(
+ ERROR_NOT_TYPEABLE,
+ f"CSS selector failed to clear element {id} and no XPath fallback available",
+ element_id=id,
+ selector=selector,
+ has_xpath=False,
+ playwright_error=str(css_error)
+ )
return False
else:
logging.warning(f'Invalid CSS selector format for element {id}: {selector}')
@@ -514,15 +963,36 @@ async def clear(self, id) -> bool:
logging.debug(f'Cleared input for element {id} using XPath: {xpath}')
except Exception as xpath_error:
logging.error(f'XPath clear failed for element {id}: {xpath_error}')
+ ctx.set_error(
+ ERROR_NOT_TYPEABLE,
+ f"XPath strategy failed to clear element {id} (invalid CSS selector)",
+ element_id=id,
+ selector=selector,
+ xpath=xpath,
+ playwright_error=str(xpath_error)
+ )
return False
else:
logging.error(f'Invalid CSS selector and no XPath available for element {id}')
+ ctx.set_error(
+ ERROR_NOT_TYPEABLE,
+ f"Invalid CSS selector format and no XPath available for element {id}",
+ element_id=id,
+ selector=selector,
+ has_xpath=False
+ )
return False
await asyncio.sleep(0.5)
return True
except Exception as e:
logging.error(f'Failed to clear element {id}: {e}')
+ ctx.set_error(
+ ERROR_PLAYWRIGHT,
+ f"Unexpected exception during clear action: {str(e)}",
+ element_id=id,
+ playwright_error=str(e)
+ )
return False
async def keyboard_press(self, key) -> bool:
@@ -534,9 +1004,24 @@ async def keyboard_press(self, key) -> bool:
Returns:
bool: True if success, False if failed
"""
- await self.page.keyboard.press(key)
- await asyncio.sleep(1)
- return True
+ # Initialize action context for error propagation
+ ctx = ActionContext()
+ action_context_var.set(ctx)
+ ctx.element_info = {"action": "keyboard_press", "key": key}
+
+ try:
+ await self.page.keyboard.press(key)
+ await asyncio.sleep(1)
+ return True
+ except Exception as e:
+ logging.error(f"Keyboard press failed for key '{key}': {e}")
+ ctx.set_error(
+ ERROR_PLAYWRIGHT,
+ f"Keyboard press action failed for key '{key}'",
+ key=key,
+ playwright_error=str(e)
+ )
+ return False
async def b64_page_screenshot(self, full_page=False, file_path=None, file_name=None, save_to_log=True):
"""Get page screenshot (Base64 encoded)
@@ -636,6 +1121,11 @@ async def upload_file(self, id, file_path: Union[str, List[str]]) -> bool:
Returns:
bool: True if success, False if failed
"""
+ # Initialize action context for error propagation
+ ctx = ActionContext()
+ action_context_var.set(ctx)
+ ctx.element_info = {"element_id": str(id), "action": "upload", "file_path": str(file_path)}
+
try:
# Support single file and multiple files
if isinstance(file_path, str):
@@ -644,19 +1134,32 @@ async def upload_file(self, id, file_path: Union[str, List[str]]) -> bool:
file_paths = file_path
else:
logging.error(f'file_path must be str or list, got {type(file_path)}')
+ ctx.set_error(
+ ERROR_FILE_UPLOAD_FAILED,
+ f"Invalid file_path type: expected str or list, got {type(file_path)}",
+ file_path_type=str(type(file_path))
+ )
return False
valid_file_paths = []
+ missing_files = []
for fp in file_paths:
if not fp or not isinstance(fp, str):
continue
if not os.path.exists(fp):
logging.error(f'File not found: {fp}')
+ missing_files.append(fp)
continue
valid_file_paths.append(fp)
if not valid_file_paths:
logging.error('No valid files to upload.')
+ ctx.set_error(
+ ERROR_FILE_UPLOAD_FAILED,
+ f"No valid files to upload. Missing files: {', '.join(missing_files) if missing_files else 'None'}",
+ missing_files=missing_files,
+ provided_paths=file_paths
+ )
return False
# Get file extension for accept check
@@ -690,6 +1193,11 @@ async def upload_file(self, id, file_path: Union[str, List[str]]) -> bool:
if not file_inputs:
logging.error('No file input elements found')
+ ctx.set_error(
+ ERROR_ELEMENT_NOT_FOUND,
+ "No file input elements found on page for upload action",
+ element_id=id
+ )
return False
# Find compatible input elements
@@ -711,6 +1219,11 @@ async def upload_file(self, id, file_path: Union[str, List[str]]) -> bool:
except Exception as e:
logging.error(f'Upload failed: {str(e)}')
+ ctx.set_error(
+ ERROR_FILE_UPLOAD_FAILED,
+ f"File upload failed with exception: {str(e)}",
+ playwright_error=str(e)
+ )
return False
async def get_dropdown_options(self, id) -> Dict[str, Any]:
@@ -1406,9 +1919,14 @@ async def drag(self, source_coords, target_coords):
target_y = target_coords.get('y')
try:
+ # Convert document coordinates to viewport coordinates
+ viewport_source_x, viewport_source_y = await self._convert_document_to_viewport_coords(source_x, source_y)
+ viewport_target_x, viewport_target_y = await self._convert_document_to_viewport_coords(target_x, target_y)
+
+ logging.debug(f'Drag action: source document=({source_x}, {source_y}) -> viewport=({viewport_source_x}, {viewport_source_y}), target document=({target_x}, {target_y}) -> viewport=({viewport_target_x}, {viewport_target_y})')
# move to start position
- await self.page.mouse.move(source_x, source_y)
+ await self.page.mouse.move(viewport_source_x, viewport_source_y)
await asyncio.sleep(0.1)
# press mouse
@@ -1416,14 +1934,14 @@ async def drag(self, source_coords, target_coords):
await asyncio.sleep(0.1)
# drag to target position
- await self.page.mouse.move(target_x, target_y)
+ await self.page.mouse.move(viewport_target_x, viewport_target_y)
await asyncio.sleep(0.1)
# release mouse
await self.page.mouse.up()
await asyncio.sleep(0.2)
- logging.debug(f'Drag completed from ({source_x}, {source_y}) to ({target_x}, {target_y})')
+ logging.debug(f'Drag completed from viewport ({viewport_source_x}, {viewport_source_y}) to ({viewport_target_x}, {viewport_target_y})')
return True
except Exception as e:
@@ -1432,20 +1950,41 @@ async def drag(self, source_coords, target_coords):
async def mouse_move(self, x: int | float, y: int | float) -> bool:
"""Move mouse to absolute coordinates (x, y)."""
+ # Initialize action context for error propagation
+ ctx = ActionContext()
+ action_context_var.set(ctx)
+ ctx.element_info = {"action": "mouse_move", "x": x, "y": y}
+
try:
# Coerce to numbers in case strings are provided
target_x = float(x)
target_y = float(y)
- await self.page.mouse.move(target_x, target_y)
- logging.info(f"mouse move to ({target_x}, {target_y})")
+
+ # Convert document coordinates to viewport coordinates
+ viewport_x, viewport_y = await self._convert_document_to_viewport_coords(target_x, target_y)
+
+ logging.info(f"Mouse move: document=({target_x}, {target_y}) -> viewport=({viewport_x}, {viewport_y})")
+ await self.page.mouse.move(viewport_x, viewport_y)
await asyncio.sleep(0.1)
return True
except Exception as e:
logging.error(f"Mouse move failed: {str(e)}")
+ ctx.set_error(
+ ERROR_PLAYWRIGHT,
+ f"Mouse move action failed to position ({x}, {y})",
+ target_x=target_x if 'target_x' in locals() else x,
+ target_y=target_y if 'target_y' in locals() else y,
+ playwright_error=str(e)
+ )
return False
async def mouse_wheel(self, delta_x: int | float = 0, delta_y: int | float = 0) -> bool:
"""Scroll the mouse wheel by delta values."""
+ # Initialize action context for error propagation
+ ctx = ActionContext()
+ action_context_var.set(ctx)
+ ctx.element_info = {"action": "mouse_wheel", "deltaX": delta_x, "deltaY": delta_y}
+
try:
dx = float(delta_x) if delta_x is not None else 0.0
dy = float(delta_y) if delta_y is not None else 0.0
@@ -1455,4 +1994,11 @@ async def mouse_wheel(self, delta_x: int | float = 0, delta_y: int | float = 0)
return True
except Exception as e:
logging.error(f"Mouse wheel failed: {str(e)}")
+ ctx.set_error(
+ ERROR_PLAYWRIGHT,
+ f"Mouse wheel action failed with delta ({delta_x}, {delta_y})",
+ deltaX=dx if 'dx' in locals() else delta_x,
+ deltaY=dy if 'dy' in locals() else delta_y,
+ playwright_error=str(e)
+ )
return False
\ No newline at end of file
diff --git a/webqa_agent/llm/prompt.py b/webqa_agent/llm/prompt.py
index 0048a6e..11b79f9 100644
--- a/webqa_agent/llm/prompt.py
+++ b/webqa_agent/llm/prompt.py
@@ -14,12 +14,11 @@ class LLMPrompt:
## Context Provided
- **`pageDescription (interactive elements)`**: A map of all interactive elements on the page, each with a unique ID. Use these IDs for actions.
- - **`page_structure (full text content)`**: The complete text content of the page, including non-interactive elements.
- **`Screenshot`**: A visual capture of the current page state.
## Objective
- Decompose the user's instruction into a **series of actionable steps**, each representing a single UI interaction.
- - **Unified Context Analysis**: You MUST analyze BOTH `pageDescription` and `page_structure` together. Use `page_structure` to understand the meaning and context of the interactive elements in `pageDescription` (e.g., matching a label to a nearby input field). This unified view is critical for making correct decisions.
+ - **Unified Context Analysis**: Analyze the `pageDescription` together with the visual `Screenshot`. Use the screenshot to understand the spatial layout and context of the interactive elements (e.g., matching a label to a nearby input field based on their visual positions). This unified view is critical for making correct decisions.
- Identify and locate the target element if applicable.
- Validate if the planned target matches the user's intent, especially in cases of **duplicate or ambiguous elements**.
- Avoid redundant operations such as repeated scrolling or re-executing completed steps.
@@ -187,8 +186,8 @@ class LLMPrompt:
- Example: if you see element '1' with internal id 917, use "id": "1" in your action
### Contextual Decision Making:
- - **Crucially, use the `page_structure` (full text content) to understand the context of the interactive elements from `pageDescription`**. For example, if `page_structure` shows "Username:" next to an input field, you know that input field is for the username.
- - If you see error text like "Invalid email format" in `page_structure`, use this information to correct your next action.
+ - **Crucially, use the `Screenshot` to understand the context of the interactive elements from `pageDescription`**. For example, if the screenshot shows "Username:" next to an input field, you know that input field is for the username.
+ - If you see error text like "Invalid email format" in the screenshot, use this information to correct your next action.
### Supported Actions:
- Tap: Click on a specified page element (such as a button or link). Typically used to trigger a click event.
diff --git a/webqa_agent/testers/case_gen/graph.py b/webqa_agent/testers/case_gen/graph.py
index 7250628..7a3c99a 100644
--- a/webqa_agent/testers/case_gen/graph.py
+++ b/webqa_agent/testers/case_gen/graph.py
@@ -85,30 +85,25 @@ async def plan_test_cases(state: MainGraphState) -> Dict[str, List[Dict[str, Any
logging.info(f"Deep crawling page structure and elements for initial test plan...")
page = await ui_tester.get_current_page()
dp = DeepCrawler(page)
- await dp.crawl(highlight=True, viewport_only=True)
+ await dp.crawl(highlight=True, viewport_only=False)
screenshot = await ui_tester._actions.b64_page_screenshot(
- file_name="plan_or_replan", save_to_log=False, full_page=False
+ file_name="plan_or_replan", save_to_log=False, full_page=True
)
await dp.remove_marker()
- await dp.crawl(highlight=False, filter_text=True, viewport_only=True)
+ await dp.crawl(highlight=False, filter_text=True, viewport_only=False)
page_structure = dp.get_text()
logging.debug(f"----- plan cases ---- Page structure: {page_structure}")
business_objectives = state.get("business_objectives", "No specific business objectives provided.")
- completed_cases = state.get("completed_cases")
language = state.get('language', 'zh-CN')
system_prompt = get_test_case_planning_system_prompt(
business_objectives=business_objectives,
- completed_cases=completed_cases,
language=language,
)
user_prompt = get_test_case_planning_user_prompt(
state_url=state["url"],
- completed_cases=completed_cases,
- reflection_history=state.get("reflection_history"),
- remaining_objectives=state.get("remaining_objectives"),
)
logging.info("Generating initial test plan - Sending request to LLM...")
@@ -283,7 +278,7 @@ async def reflect_and_replan(state: MainGraphState) -> dict:
# Use DeepCrawler to get interactive elements mapping and highlighted screenshot
logging.info(f"Deep crawling page structure and elements for reflection and replanning analysis...")
dp = DeepCrawler(page)
- curr = await dp.crawl(highlight=True, viewport_only=True)
+ curr = await dp.crawl(highlight=True, viewport_only=False)
# Include position information for better replanning decisions
reflect_template = [
str(ElementKey.TAG_NAME),
@@ -294,9 +289,9 @@ async def reflect_and_replan(state: MainGraphState) -> dict:
]
page_content_summary = curr.clean_dict(reflect_template)
logging.debug(f"current page crawled result: {page_content_summary}")
- screenshot = await ui_tester._actions.b64_page_screenshot(file_name="reflection", save_to_log=False, full_page=False)
+ screenshot = await ui_tester._actions.b64_page_screenshot(file_name="reflection", save_to_log=False, full_page=True)
await dp.remove_marker()
- await dp.crawl(highlight=False, filter_text=True, viewport_only=True)
+ await dp.crawl(highlight=False, filter_text=True, viewport_only=False)
page_structure = dp.get_text()
logging.debug(f"----- reflection ---- Page structure: {page_structure}")
diff --git a/webqa_agent/testers/case_gen/prompts/agent_prompts.py b/webqa_agent/testers/case_gen/prompts/agent_prompts.py
index 734f6b4..8a3015c 100644
--- a/webqa_agent/testers/case_gen/prompts/agent_prompts.py
+++ b/webqa_agent/testers/case_gen/prompts/agent_prompts.py
@@ -27,6 +27,12 @@ def get_execute_system_prompt(case: dict) -> str:
- **Layout Comprehension**: Analyze the layout to understand the spatial relationship between elements, which is crucial for complex interactions.
- **Anomaly Detection**: Identify unexpected visual states like error pop-ups, unloaded content, or graphical glitches that may not be present in the text structure.
+**IMPORTANT - Automatic Viewport Management**:
+The system automatically handles element visibility through intelligent scrolling. When you interact with elements (click, hover, type), the system will automatically scroll to ensure the element is in the viewport before performing the action. You do NOT need to manually scroll to elements or worry about elements being outside the visible area. Simply reference elements by their identifiers, and the system will handle viewport positioning automatically.
+
+**IMPORTANT - Screenshot Context**:
+The screenshots you receive during test execution show ONLY the current viewport (visible portion of the page), not the entire webpage. While test planning may reference elements from full-page screenshots, your execution screenshots are viewport-limited. This is intentional - the automatic viewport management system ensures that any element you need to interact with will be scrolled into the viewport before your action executes. If you cannot see an element in the current screenshot but it was referenced in the test plan, trust that the system will handle the scrolling automatically.
+
## Available Tools
You have access to two specialized testing tools:
@@ -281,6 +287,43 @@ def get_execute_system_prompt(case: dict) -> str:
2. Check for dynamic content appearance
3. Retry interaction after content stabilization
+### Pattern 5: Automatic Scroll Management Failures
+**Scenario**: Element interaction fails due to scroll or viewport positioning issues
+**Recognition Signals**:
+- Error messages containing "element not in viewport", "not visible", "not clickable", or "scroll failed"
+- Element was referenced in test plan from full-page screenshot but not visible in current viewport
+- Interaction timeout errors for elements that should exist
+
+**Understanding the Issue**:
+The system uses automatic viewport management with intelligent scrolling. When you interact with elements (click, hover, type), the system automatically scrolls to ensure the element is in viewport BEFORE executing your action. This process:
+1. Detects if the target element is outside viewport
+2. Attempts scroll using CSS selector → XPath → coordinate-based fallback
+3. Implements retry logic for lazy-loaded content (up to 3 attempts)
+4. Waits for page stability after scroll (handles infinite scroll and dynamic loading)
+
+**Recovery Solution**:
+If automatic scroll fails, the error will indicate the specific issue:
+1. **Element Not Found**: Element may not exist yet due to lazy loading
+ - Use `execute_ui_action(action='Sleep', value='2000')` to wait for content to load
+ - Verify element identifier is correct by checking page structure
+ - Consider that element may appear conditionally based on previous actions
+
+2. **Scroll Timeout**: Page is loading slowly or has infinite scroll
+ - Increase wait time: `execute_ui_action(action='Sleep', value='3000')`
+ - Manually trigger scroll if needed: `execute_ui_action(action='Scroll', value='down')`
+ - Check for loading spinners or progress indicators
+
+3. **Element Obscured**: Element exists but is covered by another element (modal, overlay, popup)
+ - Close the obscuring element first (dismiss modal, close popup)
+ - Use `execute_ui_action(action='KeyboardPress', value='Escape')` to dismiss overlays
+ - Verify no sticky headers or floating elements are blocking the target
+
+**Important Notes**:
+- You do NOT need to manually scroll in normal circumstances - the system handles this automatically
+- Only use manual scroll actions when automatic scroll explicitly fails with error messages
+- If you see an error about scroll failure, report it as-is - these are rare and indicate system issues
+- Trust the automatic viewport management for elements referenced from full-page planning screenshots
+
## Test Execution Examples
### Example 1: Form Field Validation Recovery
@@ -330,6 +373,29 @@ def get_execute_system_prompt(case: dict) -> str:
**Tool Response**: `[SUCCESS] Action 'Input' on 'username field' completed successfully`
**Agent Reporting**: Report completion of the single action and allow framework to proceed to next step
+### Example 8: Mouse Action - Cursor Positioning
+**Context**: Drawing canvas requiring precise cursor positioning
+**Action**: `execute_ui_action(action='Mouse', target='canvas drawing area', value='move:250,150', description='Position cursor at specific canvas coordinates for drawing')`
+**Tool Response**: `[SUCCESS] Action 'Mouse' on 'canvas drawing area' completed successfully. Mouse moved to (250, 150)`
+**Use Case**: When standard click/hover actions are insufficient and precise coordinate-based cursor control is needed (e.g., drawing tools, custom interactive visualizations, coordinate-based maps)
+
+### Example 9: Mouse Action - Wheel Scrolling
+**Context**: Custom scrollable container with horizontal scroll
+**Action**: `execute_ui_action(action='Mouse', target='horizontal gallery container', value='wheel:100,0', description='Scroll gallery horizontally to the right')`
+**Tool Response**: `[SUCCESS] Action 'Mouse' on 'horizontal gallery container' completed successfully. Mouse wheel scrolled (deltaX: 100, deltaY: 0)`
+**Use Case**: When standard Scroll action doesn't support custom scroll directions or precise delta control needed (e.g., horizontal scrolling, custom scroll containers)
+
+### Example 10: Page Navigation Actions
+**Context 1 - Direct Navigation**: Navigate to specific URL for cross-site testing
+**Action**: `execute_ui_action(action='GoToPage', target='https://example.com/test-page', description='Navigate to external test page for integration testing')`
+**Tool Response**: `[SUCCESS] Action 'GoToPage' on 'https://example.com/test-page' completed successfully. Navigated to page`
+**Use Case**: Direct URL navigation for multi-site workflows, external authentication redirects, or testing cross-domain functionality
+
+**Context 2 - Browser Back**: Return to previous page after completing action
+**Action**: `execute_ui_action(action='GoBack', target='', description='Navigate back to main product listing page')`
+**Tool Response**: `[SUCCESS] Action 'GoBack' completed successfully. Successfully navigated back to previous page`
+**Use Case**: Test browser back button functionality, validate state preservation after navigation, or reset to previous page state
+
## Test Completion Protocol
When all test steps are completed or an unrecoverable error occurs:
diff --git a/webqa_agent/testers/case_gen/prompts/planning_prompts.py b/webqa_agent/testers/case_gen/prompts/planning_prompts.py
index 4502aac..38bc8f0 100644
--- a/webqa_agent/testers/case_gen/prompts/planning_prompts.py
+++ b/webqa_agent/testers/case_gen/prompts/planning_prompts.py
@@ -30,7 +30,7 @@ def get_shared_test_design_standards(language: str = 'zh-CN') -> str:
- **`domain_specific_rules`**: Industry-specific validation requirements or compliance rules
- **`test_data_requirements`**: Specification of domain-appropriate test data and setup conditions
- **`steps`**: Detailed test execution steps with clear action/verification pairs that simulate real user behavior and scenarios
- - `action`: User-scenario action instructions describing what a real user would do in natural language, DON'T IMAGE. **Only use these action types: "Tap", "Scroll", "Input", "Sleep", "KeyboardPress", "Drag", "SelectDropdown". Do NOT invent or output any other action types or non-existent data.**
+ - `action`: User-scenario action instructions describing what a real user would do in natural language, DON'T IMAGE. **Only use these action types: "Tap", "Input", "Scroll", "SelectDropdown", "Clear", "Hover", "KeyboardPress", "Upload", "Drag", "GoToPage", "GoBack", "Sleep", "GetNewPage", "Mouse". Do NOT invent or output any other action types or non-existent data.**
- `verify`: User-expectation validation instructions describing what result a real user would expect to see
- **`preamble_actions`**: Optional setup steps to establish required test preconditions
- **`reset_session`**: Session management flag for test isolation strategy
@@ -38,7 +38,7 @@ def get_shared_test_design_standards(language: str = 'zh-CN') -> str:
- **`cleanup_requirements`**: Post-test cleanup actions if needed
#### Step Decomposition Rules:
-1. **One Action Per Step**: Each step in the `steps` array must contain ONLY ONE atomic action, and the action type must be one of: "Tap", "Scroll", "Input", "Sleep", "KeyboardPress", "Drag", "SelectDropdown".
+1. **One Action Per Step**: Each step in the `steps` array must contain ONLY ONE atomic action, and the action type must be one of: "Tap", "Input", "Scroll", "SelectDropdown", "Clear", "Hover", "KeyboardPress", "Upload", "Drag", "GoToPage", "GoBack", "Sleep", "GetNewPage", "Mouse".
2. **Strict Element Correspondence**: Each action must strictly correspond to a real element or option on the page.
3. **No Compound Instructions**: Never combine multiple UI interactions in a single step
4. **Sequential Operations**: Multiple operations on the same or different elements must be separated into distinct steps
@@ -86,6 +86,35 @@ def get_shared_test_design_standards(language: str = 'zh-CN') -> str:
- **Healthcare**: Use realistic patient data, medical codes, and HIPAA-compliant test scenarios
- **Social Media**: Use realistic user profiles, content types, and interaction patterns
+### Mouse Action Usage Guidelines
+**IMPORTANT**: The Mouse action allows precise cursor positioning and mouse wheel scrolling.
+
+#### Mouse Action Format
+- **Mouse Move**: Use format `"Mouse"` action with value `"move:x,y"` where x,y are pixel coordinates
+ - Example: `{{"action": "Move mouse cursor to position (100, 200)"}}` with value `"move:100,200"`
+ - Use for: Precise cursor positioning, custom drawing areas, coordinate-based interactions
+
+- **Mouse Wheel**: Use format `"Mouse"` action with value `"wheel:deltaX,deltaY"`
+ - Example: `{{"action": "Scroll mouse wheel down"}}` with value `"wheel:0,100"`
+ - Use for: Custom scroll behavior, horizontal scrolling, precise scroll control
+
+#### When to Use Mouse Action
+- **Coordinate-based interactions**: Canvas drawing, image mapping, coordinate systems
+- **Custom scroll needs**: Horizontal scrolling, specific scroll distances
+- **Specialized UIs**: Games, design tools, interactive visualizations
+
+#### Mouse Action Examples
+```json
+[
+ {{"action": "Move mouse to drawing area coordinates (150, 300)"}},
+ {{"verify": "Verify cursor position indicator updates"}},
+ {{"action": "Scroll horizontally in the canvas"}},
+ {{"verify": "Verify canvas content shifts horizontally"}}
+]
+```
+
+**Note**: For standard element interactions (clicking buttons, hovering over links), prefer using `Tap` and `Hover` actions which automatically locate elements.
+
### User-Scenario Step Design Standards
**CRITICAL**: All test steps must be designed from the user's perspective to ensure realistic and actionable test scenarios:
@@ -175,34 +204,30 @@ def get_shared_test_design_standards(language: str = 'zh-CN') -> str:
def get_test_case_planning_system_prompt(
business_objectives: str,
- completed_cases: list = None,
language: str = 'zh-CN',
) -> str:
"""Generate system prompt for test case planning.
Args:
business_objectives: Business objectives
- completed_cases: Completed test cases (for replanning)
language: Language for test case naming (zh-CN or en-US)
Returns:
Formatted system prompt string
"""
- # Determine if initial planning or replanning
- if not completed_cases:
- # Decide mode based on whether business_objectives is empty
- # Handle case where business_objectives might be a list
- business_objectives_str = business_objectives if isinstance(business_objectives, str) else str(business_objectives) if business_objectives else ""
- if business_objectives_str and business_objectives_str.strip():
- role_and_objective = """
+ # Decide mode based on whether business_objectives is empty
+ # Handle case where business_objectives might be a list
+ business_objectives_str = business_objectives if isinstance(business_objectives, str) else str(business_objectives) if business_objectives else ""
+ if business_objectives_str and business_objectives_str.strip():
+ role_and_objective = """
## Role
You are a Senior QA Testing Professional with expertise in business domain analysis, requirement engineering, and context-aware test design. Your responsibility is to deeply understand the application's business context, domain-specific patterns, and user needs to generate highly relevant and effective test cases.
## Primary Objective
Conduct comprehensive business domain analysis and contextual understanding before generating test cases. Analyze the application's purpose, industry patterns, user workflows, and business logic to create test cases that are not only technically sound but also business-relevant and domain-appropriate.
"""
- mode_section = f"""
+ mode_section = f"""
## Test Planning Mode: Context-Aware Intent-Driven Testing
**Business Objectives Provided**: {business_objectives_str}
@@ -259,15 +284,15 @@ def get_test_case_planning_system_prompt(
- **Success criteria**: Clear verification conditions
- **Test data**: If data input is required, provide specific test data
"""
- else:
- role_and_objective = """
+ else:
+ role_and_objective = """
## Role
You are a Senior QA Testing Professional with expertise in comprehensive web application analysis and domain-aware testing. Your responsibility is to conduct deep application analysis, understand business context, and design complete test suites that ensure software quality through systematic validation of all functional, business, and domain-specific requirements.
## Primary Objective
Perform comprehensive application analysis including business domain understanding, user workflow identification, and contextual awareness before generating test cases. Apply established QA methodologies including domain-specific testing patterns, business process validation, and risk-based testing prioritization.
"""
- mode_section = """
+ mode_section = """
## Test Planning Mode: Comprehensive Context-Aware Testing
**Business Objectives**: Not provided - Performing comprehensive testing with domain analysis
@@ -323,63 +348,6 @@ def get_test_case_planning_system_prompt(
* Verification points
- **Success criteria**: Clear verification conditions
- **Test data**: If data input is required, provide specific test data
-"""
- else:
- # Replanning mode
- role_and_objective = """
-## Role
-You are a Senior QA Testing Professional performing adaptive test plan revision based on execution results, enhanced business understanding, and evolving domain context.
-
-## Primary Objective
-Leverage deeper business domain insights and execution learnings to generate refined test plans that address remaining coverage gaps while building upon successful outcomes. Ensure enhanced business relevance and domain appropriateness in all test cases.
-"""
- # Also decide mode based on business_objectives during replanning
- # Handle case where business_objectives might be a list
- business_objectives_str = business_objectives if isinstance(business_objectives, str) else str(business_objectives) if business_objectives else ""
- if business_objectives_str and business_objectives_str.strip():
- mode_section = f"""
-## Replanning Mode: Enhanced Context-Aware Revision
-**Original Business Objectives**: {business_objectives_str}
-
-### Enhanced Replanning Requirements
-- Apply deeper domain understanding gained from execution results
-- Generate additional test cases with enhanced business relevance
-- Maintain focus on original business objectives while improving domain appropriateness
-- Incorporate lessons learned from executed test cases
-- Ensure new test cases complement completed ones with superior business alignment
-"""
- else:
- mode_section = """
-## Replanning Mode: Enhanced Comprehensive Testing Revision
-**Original Objectives**: Comprehensive testing with enhanced domain awareness
-
- CRITICAL ANALYSIS REQUIREMENTS
- BEFORE making ANY decision, you MUST:
-
- 1. **CHECK REPETITION WARNINGS FIRST**: If there are ANY repetition warnings above, those warnings are MANDATORY and NON-NEGOTIABLE. You MUST NOT perform any action that is mentioned in the warnings.
-
- 2. **FORBIDDEN ACTIONS**: If any element or action is marked as FORBIDDEN, FAILED, or CRITICAL in the warnings above, you are ABSOLUTELY PROHIBITED from using that element or action again.
-
- 3. **ALTERNATIVE STRATEGY REQUIRED**: When repetition warnings exist, you MUST:
- - Choose a completely different type of element (if button failed, try link or input)
- - Navigate to different page areas (scroll, click navigation menu)
- - Try completely different approaches to achieve the objective
- - Consider marking the test as completed if the objective might already be achieved
-
- 4. **ERROR HANDLING PRIORITY**: Check page content and screenshots for errors, warnings, login requirements, etc. Handle these BEFORE continuing the original process.
-
- 5. **NO EXCUSES**: There are NO exceptions to repetition warnings. Even if the element seems important for the objective, if it's marked as forbidden, you MUST find an alternative approach.
-
- Analysis Priority Order:
- 1. Compliance with repetition warnings (HIGHEST PRIORITY)
- 2. Error/exception handling in page content
- 3. Progress toward test objective
- 4. Coverage of untested functionalities
-
- Please analyze the current state and decide:
- 1. Whether the current test case is completed
- 2. Whether to shift the test focus
- 3. The most valuable next action
"""
shared_standards = get_shared_test_design_standards(language)
@@ -425,40 +393,23 @@ def get_test_case_planning_system_prompt(
def get_test_case_planning_user_prompt(
state_url: str,
- completed_cases: list = None,
- reflection_history: list = None,
- remaining_objectives: str = None,
) -> str:
"""Generate user prompt for test case planning.
Args:
state_url: Target URL
- completed_cases: Completed test cases (for replanning)
- reflection_history: Reflection history (for replanning)
- remaining_objectives: Remaining objectives (for replanning)
Returns:
Formatted user prompt string
"""
- context_section = ""
- if completed_cases:
- # Replanning mode
- last_reflection = reflection_history[-1] if reflection_history else {}
- context_section = f"""
-## Revision Context with Enhanced Business Understanding
-- **Completed Test Execution Summary**: {json.dumps(completed_cases, indent=2)}
-- **Previous Reflection Analysis**: {json.dumps(last_reflection, indent=2)}
-- **Remaining Coverage Objectives**: {remaining_objectives}
-- **Enhanced Domain Insights**: Apply deeper business context learned from execution results
-"""
-
user_prompt = f"""
## Application Under Test (AUT)
- **Target URL**: {state_url}
-- **Visual Element Reference (Referenced via attached screenshot) **: The attached screenshot contains numbered markers corresponding to interactive elements.
+- **Visual Element Reference (Referenced via attached screenshot)**: The attached screenshot contains numbered markers corresponding to interactive elements.
-{context_section}
+**IMPORTANT - Full-Page Context**:
+The screenshot shows the ENTIRE webpage from top to bottom, not just the visible viewport. All elements on the page are captured and numbered, including those that may be below the fold. When planning test cases, you can reference ANY element visible in this full-page screenshot. During execution, the system will automatically scroll to elements outside the viewport as needed.
Please help me plan test cases based on the above information. Please conduct in-depth analysis according to the requirements in the system prompt and generate test cases that meet the specifications.
Example 1:
@@ -713,7 +664,10 @@ def get_reflection_user_prompt(
interactive_elements_section = f"""
- **Interactive Elements Map**:
{interactive_elements_json}
-- **Visual Element Reference**: The attached screenshot contains numbered markers corresponding to interactive elements. Each number in the image maps to an element ID in the Interactive Elements Map above, providing precise visual-textual correlation for comprehensive UI analysis."""
+- **Visual Element Reference**: The attached screenshot contains numbered markers corresponding to interactive elements. Each number in the image maps to an element ID in the Interactive Elements Map above, providing precise visual-textual correlation for comprehensive UI analysis.
+
+**IMPORTANT - Full-Page Context**:
+The screenshot shows the ENTIRE webpage from top to bottom, not just the visible viewport. All elements on the page are captured and numbered, including those below the fold. When replanning test cases, you can reference ANY element visible in this full-page screenshot. The execution system automatically scrolls to elements outside the viewport as needed."""
# Determine test mode for reflection decision
# Handle case where business_objectives might be a list
diff --git a/webqa_agent/testers/case_gen/prompts/tool_prompts.py b/webqa_agent/testers/case_gen/prompts/tool_prompts.py
deleted file mode 100644
index a87de12..0000000
--- a/webqa_agent/testers/case_gen/prompts/tool_prompts.py
+++ /dev/null
@@ -1,191 +0,0 @@
-"""工具相关的提示词模板."""
-
-
-def get_error_detection_prompt() -> str:
- """返回UI错误检测LLM的系统提示词."""
- prompt = """
-You are a Senior QA Test Validation Specialist with expertise in automated UI testing and validation error detection. Your responsibility is to analyze post-action UI states and determine whether specific user actions have resulted in validation errors or system failures that require immediate remediation.
-
-## Core Mission
-Provide precise, actionable validation analysis for UI test execution agents by detecting errors that directly prevent the intended user action from achieving its stated objective. Your analysis must distinguish between actionable errors requiring immediate correction and informational messages that do not block test progression.
-
-## Input Context Analysis
-You will receive the following test execution context:
-
-1. **Action Intent**: The specific user goal or business objective the action was intended to achieve
-2. **Executed Action Details**:
- - `action`: The type of UI interaction performed
- - `target`: The UI element that was targeted
- - `value`: The data input provided (for text-based actions)
-3. **Post-Action Screenshot**: Base64-encoded visual capture of the UI state after action execution
-4. **Post-Action Page Structure**: Complete textual representation of the page content and elements
-
-## Error Classification Framework
-
-### Category 1: CRITICAL ERRORS (Require Immediate Action)
-**Definition**: Errors that directly prevent the intended action objective from being achieved and require immediate remediation.
-
-**Error Types**:
-- **Input Validation Failures**: Form field validation errors directly related to the submitted data
-- **Authentication/Authorization Errors**: Access denied, session expired, insufficient permissions
-- **System Errors**: Application crashes, server errors, network timeouts
-- **Business Logic Violations**: Data conflicts, constraint violations, workflow rule violations
-- **UI State Errors**: Unexpected modal dialogs, navigation failures, broken functionality
-
-**Detection Criteria**:
-- Error message explicitly references the submitted data or performed action
-- System prevents progression of the intended user workflow
-- UI state has changed in a way that blocks the objective achievement
-
-### Category 2: NON-CRITICAL CONDITIONS (No Immediate Action Required)
-**Definition**: UI states or messages that do not prevent the current action's objective from being achieved.
-
-**Condition Types**:
-- **Stale Validation Messages**: Error messages from previous actions that don't apply to current input
-- **Informational Messages**: Help text, tooltips, status updates that don't indicate failure
-- **Progressive Disclosure**: New form fields or options appearing as part of normal workflow
-- **Secondary Validation Warnings**: Non-blocking suggestions or recommendations
-- **Future State Preparations**: Empty required fields that will be addressed in subsequent test steps
-
-## Advanced Error Detection Logic
-
-### Stale Error Recognition Protocol
-**Scenario**: Previous validation errors may persist visually even after corrective action
-**Analysis Method**:
-1. **Data Correlation Check**: Does the visible error message specifically reference the current input value?
-2. **Temporal Analysis**: Is the error message consistent with the action just performed?
-3. **Context Relevance**: Does the error logically apply to the current UI interaction?
-4. **Resolution Path**: Would the error be resolved by the action that was just taken?
-
-**Example Analysis**:
-- Current Action: `type` with `value='john.doe@email.com'`
-- Visible Error: "Email format is invalid"
-- Analysis: The current value is properly formatted; error message is stale from previous attempt
-- **Conclusion**: NO ERROR DETECTED (stale condition)
-
-### Intent-Based Validation Protocol
-**Methodology**: Evaluate success not just by the absence of errors, but by positively confirming that the UI has transitioned to the expected state implied by the action's intent. This is the most critical part of your analysis.
-**Process**:
-1. **Deconstruct Intent**: What is the explicit goal of the action? (e.g., "Navigate to the login page," "Open the user profile dialog," "Apply a filter to the search results.")
-2. **Identify Success Indicators**: Based on the intent, what specific UI elements or state changes MUST be present on the new page? (e.g., For a login page, success indicators are the presence of 'username'/'password' input fields and a 'submit' button. For a search filter, it's the updated results list.)
-3. **Scan for Indicators**: Actively scan the provided `Page Structure` and `Screenshot` for these specific success indicators.
-4. **Compare and Conclude**:
- - If the key success indicators ARE PRESENT, the action was successful, even if other, unrelated warnings or elements are also on the page. Conclude **NO ERROR**.
- - If the key success indicators ARE MISSING, the action has failed to achieve its intent, even if no explicit error message is visible. This is a critical failure. Conclude **ERROR DETECTED**.
-
-**Example Analysis**:
-- Action Intent: "Navigate to the login page by clicking the '登录' button"
-- Post-Action State: The page remains unchanged. The `Page Structure` does not contain any `` fields or a login form.
-- Analysis: The primary success indicators for navigating to a login page (username/password fields) are absent. The UI has not transitioned to the expected state.
-- **Conclusion**: ERROR DETECTED. The click action had no effect.
-- **Remediation Suggestion**: "The '登录' button was clicked, but the login page/modal did not appear. The UI did not change as expected. Verify the button's functionality or if another action is required first."
-
-## Quality Assurance Testing Scenarios
-
-### Scenario A: Form Input Validation
-**Context**: User submitting data to a web form with validation rules
-**Critical Errors**: Field-specific validation messages related to the submitted data
-**Non-Critical**: Generic form instructions, placeholder text, unrelated field warnings
-
-### Scenario B: Dropdown Selection
-**Context**: User selecting an option from a dropdown menu
-**Critical Errors**: "Option not found" errors, dropdown functionality failures
-**Non-Critical**: Dropdown opening successfully but showing different options than expected
-
-### Scenario C: Navigation Actions
-**Context**: User attempting to navigate to a different page or section
-**Critical Errors**: Access denied messages, broken links, page load failures
-**Non-Critical**: Page loading successfully but containing unrelated content warnings
-
-### Scenario D: Dynamic Content Loading
-**Context**: User triggering content updates or asynchronous operations
-**Critical Errors**: Load failures, timeout errors, data retrieval problems
-**Non-Critical**: Loading states, progress indicators, partial content updates
-
-## Decision-Making Examples
-
-### Example 1: Input Validation Error Detection
-**Input Analysis**:
-- Action Intent: "Enter organization name for account setup"
-- Action: `type` on `Organization Name field` with `value='Test@Org#123'`
-- Page State: Error message "Organization name can only contain letters, numbers, spaces and symbols _-"
-- Analysis: The submitted value contains '@' and '#' characters which violate the stated validation rule
-**Decision**: ERROR DETECTED - Direct validation failure requiring data correction
-
-### Example 2: Stale Error Identification
-**Input Analysis**:
-- Action Intent: "Enter valid email address"
-- Action: `type` on `Email field` with `value='user@company.com'`
-- Page State: Error message "Please enter a valid email address" still visible
-- Analysis: The current input follows proper email format; error message doesn't apply to this value
-**Decision**: NO ERROR DETECTED - Stale validation message from previous attempt
-
-### Example 3: Intent-Based Success Recognition
-**Input Analysis**:
-- Action Intent: "Open registration form for new account creation"
-- Action: `click` on `Sign Up button`
-- Page State: Registration form displayed with "Required field" indicators on empty inputs
-- Analysis: The form opening objective was achieved; empty field indicators are normal initial state
-**Decision**: NO ERROR DETECTED - Intent successfully fulfilled
-
-### Example 4: System Error Detection
-**Input Analysis**:
-- Action Intent: "Submit completed application form"
-- Action: `click` on `Submit button`
-- Page State: "Server error: Unable to process request. Please try again later."
-- Analysis: The submission failed due to system-level error preventing objective completion
-**Decision**: ERROR DETECTED - System failure requiring retry or escalation
-
-## Output Format Specification
-
-You must return a strictly formatted JSON object with complete analysis:
-
-```json
-{
- "error_detected": ,
- "error_message": "",
- "reasoning": "",
- "error_category": "",
- "remediation_suggestion": ""
-}
-```
-
-### Field Specifications:
-- **error_detected**: `true` if a critical error requiring immediate action is identified, `false` otherwise
-- **error_message**: Concise, actionable description of the detected error (null if no error)
-- **reasoning**: Detailed analysis explaining the decision-making process and evidence considered
-- **error_category**: Classification of error type (e.g., "Input_Validation", "System_Error", "Authentication") or null
-- **remediation_suggestion**: Specific guidance for error resolution (null if no error detected)
-
-## Response Examples
-
-### Critical Error Response:
-```json
-{
- "error_detected": true,
- "error_message": "Password must be at least 8 characters long with uppercase, lowercase, and numeric characters",
- "reasoning": "The submitted password 'test123' does not meet the complexity requirements displayed in the validation message. The error directly corresponds to the current input value and prevents successful form submission.",
- "error_category": "Input_Validation",
- "remediation_suggestion": "Modify password to include uppercase letters and ensure minimum 8 character length"
-}
-```
-
-### No Error Response:
-```json
-{
- "error_detected": false,
- "error_message": null,
- "reasoning": "The action successfully achieved its intended objective. The login form opened as expected and no validation errors were triggered by the current action. Visible placeholder text and field labels are standard UI elements, not error conditions.",
- "error_category": null,
- "remediation_suggestion": null
-}
-```
-
-## Quality Standards
-- **Precision**: Only identify errors that directly impact the current action's objective
-- **Actionability**: All detected errors must provide clear remediation guidance
-- **Context Awareness**: Consider the full testing context and user intent
-- **Consistency**: Apply uniform analysis criteria across all evaluations
-- **Completeness**: Provide thorough reasoning for all decisions made
-"""
- return prompt
diff --git a/webqa_agent/testers/case_gen/tools/element_action_tool.py b/webqa_agent/testers/case_gen/tools/element_action_tool.py
index 86853bf..4cf7428 100644
--- a/webqa_agent/testers/case_gen/tools/element_action_tool.py
+++ b/webqa_agent/testers/case_gen/tools/element_action_tool.py
@@ -58,7 +58,7 @@ class UIActionSchema(BaseModel):
"'KeyboardPress' action (key name like 'Enter', 'Tab', 'Escape', etc.), "
"'Upload' action (file path), "
"'Sleep' action (duration in milliseconds), "
- "'Mouse' action (operation type: 'move' for cursor positioning or 'wheel' for scrolling). "
+ "'Mouse' action (operation specification in format 'move:x,y' for cursor positioning to coordinates (x,y) or 'wheel:deltaX,deltaY' for scrolling by delta values. Examples: 'move:100,200' moves cursor to (100,200), 'wheel:0,100' scrolls down by 100 pixels). "
"Optional for 'Drag' action (target position description), "
"'GetNewPage' action (tab/window identifier). "
"Optional for other actions."
@@ -179,12 +179,14 @@ async def _arun(
if value:
action_phrase += f" {value}"
elif action == "Mouse":
- if value and 'move' in value.lower():
- action_phrase = f"Move mouse cursor to {target}"
- elif value and 'wheel' in value.lower():
- action_phrase = f"Scroll mouse wheel on {target}"
+ if value and 'move:' in value.lower():
+ # Extract coordinates from 'move:x,y' format
+ action_phrase = f"Move mouse cursor to coordinates {value.split(':', 1)[1]} (specified as {target})"
+ elif value and 'wheel:' in value.lower():
+ # Extract delta values from 'wheel:deltaX,deltaY' format
+ action_phrase = f"Scroll mouse wheel by {value.split(':', 1)[1]} (on {target})"
else:
- action_phrase = f"Perform mouse action on {target}"
+ action_phrase = f"Perform mouse action on {target} with value '{value}'"
else:
action_phrase = f"{action} on {target}"
if value:
@@ -212,6 +214,117 @@ async def _arun(
# First, check for a hard failure from the action executor
if not result.get("success"):
+ # Check for enriched error details
+ error_details = result.get("error_details", {})
+
+ if error_details and error_details.get("error_type"):
+ # Format structured error message based on error type
+ error_type = error_details.get("error_type")
+ error_reason = error_details.get("error_reason", "Unknown reason")
+
+ if error_type == "scroll_failed":
+ error_message = f"""[FAILURE] Action '{action}' on '{target}' failed.
+
+**Root Cause**: Element viewport positioning failed
+**Details**: {error_reason}
+**Strategies Attempted**: {', '.join(error_details.get('attempted_strategies', []))}
+
+**Recovery Actions**:
+1. Use Sleep action (2-3 seconds) to allow lazy-loaded content to appear
+2. Try manual Scroll action to navigate the page closer to the element
+3. Verify element ID is correct from current page state
+4. Check if element is in a collapsed section that needs to be opened first"""
+
+ elif error_type == "scroll_timeout_lazy_loading":
+ error_message = f"""[FAILURE] Action '{action}' on '{target}' failed.
+
+**Root Cause**: Page content unstable after scrolling (likely lazy-loading or infinite scroll)
+**Details**: {error_reason}
+
+**Recovery Actions**:
+1. Use Sleep action with longer duration (3-5 seconds) to allow content to stabilize
+2. Try the action again - content may have loaded by now
+3. Use manual Scroll action to trigger additional content loading
+4. Verify the element ID from the current page state in case it changed"""
+
+ elif error_type == "element_not_found":
+ error_message = f"""[FAILURE] Action '{action}' on '{target}' failed.
+
+**Root Cause**: Element does not exist on current page
+**Element ID**: {error_details.get('element_info', {}).get('element_id', target)}
+
+**Recovery Actions**:
+1. Review current page structure - element may have a different ID now
+2. Check if navigation to the correct page is needed
+3. Verify element is not hidden behind authentication or modal dialog
+4. Use Sleep action if element loads dynamically after page interaction"""
+
+ elif error_type == "element_not_clickable":
+ error_message = f"""[FAILURE] Action '{action}' on '{target}' failed.
+
+**Root Cause**: Element exists but cannot be clicked
+**Details**: {error_reason}
+
+**Recovery Actions**:
+1. Check if element is obscured by modal/overlay - close it first using Tap action
+2. Try Hover action over the element before clicking
+3. Check if element is disabled - may need to enable it through other actions
+4. Verify correct element ID - similar but different elements may exist"""
+
+ elif error_type == "element_not_typeable":
+ error_message = f"""[FAILURE] Action '{action}' on '{target}' failed.
+
+**Root Cause**: Element cannot accept text input
+**Details**: {error_reason}
+
+**Recovery Actions**:
+1. Verify the element is actually an input field or contenteditable element
+2. Try Clear action first, then Input action
+3. Check if element is disabled or read-only
+4. Use Tap action to focus the element before typing"""
+
+ elif error_type == "file_upload_failed":
+ error_message = f"""[FAILURE] Action '{action}' on '{target}' failed.
+
+**Root Cause**: File upload operation failed
+**Details**: {error_reason}
+
+**Recovery Actions**:
+1. Verify the file path exists and is accessible
+2. Check if the file format is accepted by the input element
+3. Ensure the file size is within acceptable limits
+4. Verify file input element is present and enabled on the page
+5. Check file permissions and ensure the file is readable"""
+
+ elif error_type == "playwright_error":
+ error_message = f"""[FAILURE] Action '{action}' on '{target}' failed.
+
+**Root Cause**: Browser interaction error
+**Technical Details**: {error_details.get('playwright_error', 'Unknown error')}
+
+**Recovery Actions**:
+1. Retry the action after a short Sleep (1-2 seconds)
+2. Check if page has navigated unexpectedly
+3. Verify element still exists on current page
+4. Check browser console for JavaScript errors that might interfere"""
+
+ else:
+ # Unknown error type, use generic format
+ error_message = f"""[FAILURE] Action '{action}' on '{target}' failed.
+
+**Error Type**: {error_type}
+**Details**: {error_reason}
+
+**Recovery Actions**:
+1. Review the error details carefully
+2. Check current page state
+3. Try alternative action strategies
+4. Use Sleep action to allow page to stabilize"""
+
+ logging.warning(f"Action failed with structured error: {error_type}")
+ return error_message
+
+ # Fallback: Use existing error handling for errors without enriched details
error_message = (
f"Action '{action}' on '{target}' failed. Reason: {result.get('message', 'No details provided.')}"
)
diff --git a/webqa_agent/testers/case_gen/utils/message_converter.py b/webqa_agent/testers/case_gen/utils/message_converter.py
index 247901d..3a873c2 100644
--- a/webqa_agent/testers/case_gen/utils/message_converter.py
+++ b/webqa_agent/testers/case_gen/utils/message_converter.py
@@ -91,36 +91,3 @@ def convert_intermediate_steps_to_messages(
continue
return messages
-
-
-def merge_messages_with_intermediate_steps(
- existing_messages: List[BaseMessage],
- intermediate_steps: List[Tuple[Any, str]],
- include_intermediate: bool = True
-) -> List[BaseMessage]:
- """Merge existing messages with converted intermediate steps.
-
- Args:
- existing_messages: Current message history
- intermediate_steps: New intermediate steps to add
- include_intermediate: Whether to include intermediate steps in messages
-
- Returns:
- Merged list of messages
- """
- if not include_intermediate or not intermediate_steps:
- return existing_messages
-
- # Convert intermediate steps to messages
- intermediate_messages = convert_intermediate_steps_to_messages(intermediate_steps)
-
- # Merge with existing messages
- # Note: We append intermediate messages to maintain chronological order
- merged_messages = existing_messages + intermediate_messages
-
- logging.debug(
- f"Merged {len(existing_messages)} existing messages with "
- f"{len(intermediate_messages)} intermediate messages"
- )
-
- return merged_messages
\ No newline at end of file
diff --git a/webqa_agent/testers/case_gen/utils/prompt_utils.py b/webqa_agent/testers/case_gen/utils/prompt_utils.py
deleted file mode 100644
index 5e09748..0000000
--- a/webqa_agent/testers/case_gen/utils/prompt_utils.py
+++ /dev/null
@@ -1,39 +0,0 @@
-"""This module provides utility functions for generating parts of agent
-prompts."""
-
-
-def check_repetition(case: dict) -> str:
- """Checks for repeated actions in the test case history and returns a
- warning string."""
- if not case.get("test_context"):
- return ""
-
- warnings = []
- test_context = case["test_context"]
-
- # Check for repeated element interactions
- for element, data in test_context.get("tested_elements", {}).items():
- if data.get("test_count", 0) >= 2:
- recent_failures = [r for r in data.get("results", [])[-2:] if not r.get("success")]
- if len(recent_failures) >= 2:
- warnings.append(
- f"⚠️ REPETITION WARNING: Element '{element}' has failed multiple times recently. AVOID interacting with it again."
- )
- elif data.get("test_count", 0) >= 3:
- warnings.append(
- f"⚠️ REPETITION WARNING: Element '{element}' has been tested multiple times. Consider a different element or action."
- )
-
- # Check for repeated action paths
- test_path = test_context.get("test_path", [])
- if len(test_path) >= 3:
- recent_path = test_path[-3:]
- if len(set(recent_path)) == 1:
- warnings.append(
- f"⚠️ REPETITION WARNING: You are repeating the exact same action '{recent_path[0]}' three times in a row. You MUST choose a different action."
- )
-
- if warnings:
- return "=== REPETITION WARNINGS ===\n" + "\n".join(warnings) + "\n"
-
- return "No repetition detected. Proceed with the next logical step."
diff --git a/webqa_agent/testers/function_tester.py b/webqa_agent/testers/function_tester.py
index cb031fe..7f72ed1 100644
--- a/webqa_agent/testers/function_tester.py
+++ b/webqa_agent/testers/function_tester.py
@@ -92,12 +92,12 @@ async def action(self, test_step: str, file_path: str = None) -> Tuple[Dict[str,
# Crawl current page state
dp = DeepCrawler(self.page)
- prev = await dp.crawl(highlight=True, viewport_only=True, cache_dom=True)
+ prev = await dp.crawl(highlight=True, viewport_only=False, cache_dom=True)
await self._actions.update_element_buffer(prev.raw_dict())
logging.debug(f"previous dom before action : {prev.to_llm_json()}")
# Take screenshot
- marker_screenshot = await self._actions.b64_page_screenshot(file_name="marker")
+ marker_screenshot = await self._actions.b64_page_screenshot(file_name="marker", full_page=True)
# Remove marker
await dp.remove_marker()
@@ -123,7 +123,7 @@ async def action(self, test_step: str, file_path: str = None) -> Tuple[Dict[str,
end_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
- curr = await dp.crawl(highlight=True, viewport_only=True, cache_dom=True)
+ curr = await dp.crawl(highlight=True, viewport_only=False, cache_dom=True)
diff_elems = curr.diff_dict([str(ElementKey.TAG_NAME), str(ElementKey.INNER_TEXT), str(ElementKey.ATTRIBUTES), str(ElementKey.CENTER_X), str(ElementKey.CENTER_Y)])
if diff_elems:
logging.debug(f"Diff element map after action: {diff_elems}")
@@ -203,15 +203,15 @@ async def verify(self, assertion: str) -> tuple[Dict[str, Any], Dict[str, Any]]:
# Crawl current page
dp = DeepCrawler(self.page)
- await dp.crawl(highlight=True, filter_text=True, viewport_only=True)
+ await dp.crawl(highlight=True, filter_text=True, viewport_only=False)
- marker_screenshot = await self._actions.b64_page_screenshot(file_name="marker")
+ marker_screenshot = await self._actions.b64_page_screenshot(file_name="marker", full_page=True)
await dp.remove_marker()
- screenshot = await self._actions.b64_page_screenshot(file_name="assert")
+ screenshot = await self._actions.b64_page_screenshot(file_name="assert", full_page=True)
# Get page structure
- await dp.crawl(highlight=False, filter_text=True, viewport_only=True)
+ await dp.crawl(highlight=False, filter_text=True, viewport_only=False)
page_structure = dp.get_text()
# Prepare LLM input