diff --git a/OS_images/main.py b/OS_images/main.py
index a5b207cef..154b66f32 100644
--- a/OS_images/main.py
+++ b/OS_images/main.py
@@ -346,16 +346,33 @@ def get_cursor():
         pyautogui.moveTo(current_x, current_y)
         # =====================================
 
-        cursor_obj = Xcursor()
-        imgarray = cursor_obj.getCursorImageArrayFast()
-        cursor_img = Image.fromarray(imgarray)
-
-        # Taking screenshot after the wake-up
-        screenshot = pyautogui.screenshot()
-
-        cursor_x, cursor_y = pyautogui.position()
-        screenshot.paste(cursor_img, (cursor_x, cursor_y), cursor_img)
-        screenshot.save(file_path)
+        max_screenshot_attempts = 3
+        for _screenshot_attempt in range(max_screenshot_attempts):
+            try:
+                cursor_obj = Xcursor()
+                imgarray = cursor_obj.getCursorImageArrayFast()
+                cursor_img = Image.fromarray(imgarray)
+
+                # Taking screenshot after the wake-up
+                screenshot = pyautogui.screenshot()
+
+                cursor_x, cursor_y = pyautogui.position()
+                screenshot.paste(cursor_img, (cursor_x, cursor_y), cursor_img)
+                screenshot.save(file_path)
+                break  # Success
+            except Exception as e:
+                logger.warning(f"Screenshot attempt {_screenshot_attempt + 1}/{max_screenshot_attempts} failed: {e}")
+                # Clean up stale temp files that may cause PIL errors
+                import glob
+                for tmp_png in glob.glob("/tmp/tmp*.png"):
+                    try:
+                        os.remove(tmp_png)
+                    except OSError:
+                        pass
+                if _screenshot_attempt == max_screenshot_attempts - 1:
+                    logger.error(f"All {max_screenshot_attempts} screenshot attempts failed, returning error")
+                    return jsonify({"status": "error", "message": f"Screenshot failed: {e}"}), 503
+                time.sleep(0.5)
     elif user_platform == "Darwin":  # (Mac OS)
         # Use the screencapture utility to capture the screen with the cursor
         subprocess.run(["screencapture", "-C", file_path])
@@ -3773,4 +3790,4 @@ def run_bash_script():
             pass
 
 if __name__ == '__main__':
-    app.run(debug=True, host="0.0.0.0")
+    app.run(debug=False, host="0.0.0.0")
diff --git a/cua/README.md b/cua/README.md
index 8b337c5ec..d7ab238f4 100644
--- a/cua/README.md
+++ b/cua/README.md
@@ -1,51 +1,150 @@
 # CUA Data Collection
 
-## Running on Interactive Session
-### 1. Boot Up vLLM Servers
-We use two vLLM servers - 1 for Qwen3-VL-235B (goal generation policy, aka `planner model` in the code) and 1 for
-UI-TARS-1.5-7B (action generation policy, aka `actor model` in the code).
+Runs a **planner** model (Qwen3-VL-235B, tp=8) on a non-reserved GPU node and one or more **actor** nodes (UI-TARS-1.5-7B, tp=4) on reserved GPU nodes with KVM-accelerated Linux VMs for data collection. All nodes use the combined `cua-vllm-0.13.0.sqsh` container image (vLLM/CUDA + QEMU/KVM).
+
+**Why `enroot exec`?** The srun enroot container loses `/dev/kvm` write access. Running `enroot exec` from outside the container retains it. Actor nodes use the SSH+enroot holder-job pattern for this reason.
+
+## Automated Full Run
 
-Run both servers on 2 nodes by
 ```bash
 cd scripts
-sbatch run_model.sbatch
+bash run.sh
 ```
 
-The logs will be shown at `scripts/logs/planner.out` and `scripts/logs/actor.out`.
+This:
+1. Submits a planner sbatch job (Qwen3-VL-235B, 8 GPUs, non-reserved)
+2. Waits for the planner to start and write its hostname to a coordination file
+3. Launches `NUM_ACTORS` actor instances, each submitting its own holder job on a reserved node
+4. Each actor starts UI-TARS-1.5-7B vLLM + data collection VMs inside its container
+5. Waits for all actors to finish, then cancels the planner
+
+### Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `NUM_ACTORS` | 1 | Number of actor nodes to launch |
+| `MAX_PARALLEL` | 16 | Parallel VMs per actor node |
+| `MAX_TRAJECTORIES` | 10000 | Trajectories to collect per actor |
 
-### 2. Run CPU node for data collection, with /dev/kvm write permission
-We now run CPU interactive session, in which we boot up the linux virtual machine (VM) and call the vLLM servers to
-collect the trajectories. Here, it's important we have a write access to `/dev/kvm`, as it allows us to accelerate VM.
+Each actor independently collects up to `MAX_TRAJECTORIES`. With `NUM_ACTORS=3, MAX_TRAJECTORIES=500`, you get up to 1500 total.
+
+### Examples
+
+```bash
+# Single actor, defaults
+bash run.sh
+
+# 3 actors, 4 parallel VMs each, 500 trajectories each
+NUM_ACTORS=3 MAX_PARALLEL=4 MAX_TRAJECTORIES=500 bash run.sh
+```
+
+### Logs
+
+All logs go to `scripts/logs_multi_thread/`:
+
+| File | Contents |
+|------|----------|
+| `planner-<jobid>.out` | Planner vLLM server output |
+| `actor_launcher_<N>.log` | Actor N lifecycle (job submission, container polling, SSH exec) |
+| `actor_<N>-<jobid>.out` | Actor N vLLM + data collection output |
+| `vllm_actor_<N>.log` | Actor N vLLM server detailed logs |
+
+### SLURM Configuration
+
+| Component | Account | Partition | Reservation |
+|-----------|---------|-----------|-------------|
+| Planner | `nvr_lacr_llm` | `interactive` | none |
+| Actor | `llmservice_fm_vision` | `interactive` | `sla_res_osworld_agent_vlm` |
+
+## Interactive Debugging
+
+### 1. Start the Planner vLLM Server
+
+In a separate terminal, submit the planner on its own 8-GPU node:
+
+```bash
+IMAGE="/lustre/fsw/portfolios/nvr/users/bcui/images/cua-vllm-0.13.0.sqsh"
+PLANNER_MODEL="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/models/Qwen3-VL-235B-A22B-Thinking"
+
+srun --job-name=planner \
+    --account=nvr_lacr_llm \
+    --partition=interactive \
+    --gpus-per-node=8 \
+    --nodes=1 --ntasks-per-node=1 \
+    --time=04:00:00 --exclusive \
+    --container-image=$IMAGE \
+    --container-mounts=/lustre:/lustre \
+    bash -c "vllm serve $PLANNER_MODEL \
+        --api-key gen \
+        --tensor-parallel-size 8 \
+        --enable-expert-parallel \
+        --limit-mm-per-prompt.video 0 \
+        --limit-mm-per-prompt.image 3 \
+        --async-scheduling \
+        --max-model-len 65536 \
+        --gpu-memory-utilization 0.9"
+```
+
+Note which node it lands on via `squeue -u $USER` — you'll need this as `$PLANNER_NODE` later (e.g. `pool0-2838`).
+
+### 2. Get an Interactive Shell with KVM
 
-Run the bash script to spin up the CPU interactive node by
 ```bash
 cd scripts
 bash debug_interactive.sh
 ```
 
-***What does `debug_interactive.sh` do?*** \
-(1) We first allocate 1 CPU interactive node (with `sleep infinity &` as the command). We assign one of the reserved nodes
-for `/dev/kvm` access. \
-(2) When the node is ready with enroot container running, we ssh into the node, and fetch the enroot container ID. \
-(3) We run `enroot exec $CONTAINER_ID bash` in order to access a bash shell inside that enroot container.
+This allocates a GPU interactive node (8 GPUs) with the combined container image. The script:
+1. Submits a background `sleep infinity` job to reserve the node
+2. Waits for the enroot container to be ready
+3. SSHs into the node and runs `enroot exec` to enter the container
+
+When you exit the shell (`exit`), the script automatically cancels the SLURM job via a cleanup trap.
 
-***Wait, why aren't we just directly using the CPU interactive node in step (1)?*** \
-This is a very finicky detail, but in step (1), the enroot container environment loses `/dev/kvm` access that the
-CPU node originally had. The only way to retain `/dev/kvm` access inside enroot is to first boot up the container,
-then running `enroot exec $CONTAINER_ID bash` from outside.
+### 3. Start the Actor vLLM Server
 
+Inside the container shell, start UI-TARS-1.5-7B in the background:
 
-### 3-A. Run Data Collection Script for Debugging
 ```bash
-python debug_collect_trajectories.py --planner_node $PLANNER_NODE --actor_node $ACTOR_NODE
+vllm serve ByteDance-Seed/UI-TARS-1.5-7B \
+    --api-key gen \
+    --tensor-parallel-size 4 \
+    --limit-mm-per-prompt.image 5 \
+    --limit-mm-per-prompt.video 0 \
+    --max-model-len 65536 &
 ```
-`$PLANNER_NODE` and `$ACTOR_NODE` should be manually set by the user (e.g., pool0-2838).
 
-### 3-B. Run Data Collection Script for Parallel Processing
+Wait for it to be healthy:
 ```bash
-python parallel_collect_trajectories.py --planner_node $PLANNER_NODE --actor_node $ACTOR_NODE
+curl http://localhost:8000/health
 ```
 
-## Running with SBATCH
-TBD (we just need SBATCH script to run `parallel_collect_trajectories.py`)
+### 4. Run Debug Data Collection
+
+Still inside the container:
+
+```bash
+cd /lustre/fsw/portfolios/nvr/users/bcui/ProRL-Agent-Server/cua
+source cua_env_reqs/bin/activate
+export PYTHONPATH=/lustre/fsw/portfolios/nvr/users/bcui/ProRL-Agent-Server:$PYTHONPATH
+
+python debug_collect_trajectories.py \
+    --planner_node $PLANNER_NODE \
+    --actor_node localhost
+```
+
+`$PLANNER_NODE` is the node from step 1. The actor is `localhost` since it's running on the same node.
+
+## Scripts Reference
 
+| Script | Purpose |
+|--------|---------|
+| `run.sh` | Multi-actor launcher (1 planner + N actors) |
+| `run_planner.sbatch` | Planner vLLM server sbatch job |
+| `run_actor_and_vm.sh` | Single actor launcher (SSH+enroot pattern) |
+| `run_all.sbatch` | Legacy consolidated 2-node sbatch (kept for reference) |
+| `debug_interactive.sh` | Interactive GPU shell with KVM |
+| `debug_check_kvm.sbatch` | Verify KVM works on GPU nodes (inside container) |
+| `check_kvm_cpu.sbatch` | Verify KVM on CPU nodes (legacy) |
+| `check_kvm_bash.sh` | Quick KVM write-permission test |
+| `run_models.sbatch` | Start both model servers on 2 GPU nodes (standalone) |
diff --git a/cua/cleanup_nvcf.py b/cua/cleanup_nvcf.py
new file mode 100644
index 000000000..447298c9a
--- /dev/null
+++ b/cua/cleanup_nvcf.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+"""List and clean up NVCF functions.
+
+Usage:
+    # List all functions in the org:
+    python cleanup_nvcf.py --list
+
+    # Undeploy and delete only YOUR pool functions (nvcf-pool-*):
+    python cleanup_nvcf.py --cleanup
+
+    # Undeploy and delete ALL functions (careful — includes other users'):
+    python cleanup_nvcf.py --cleanup --all
+
+    # Undeploy and delete a specific function:
+    python cleanup_nvcf.py --delete FUNCTION_ID VERSION_ID
+"""
+import argparse
+import os
+import sys
+
+sys.path.insert(0, "/lustre/fsw/portfolios/nvr/users/bcui/ProRL-Agent-Server")
+
+from openhands.nvidia.os_world.nvcf import OSWorldDeployer
+
+POOL_NAME_PREFIX = "nvcf-pool-"
+
+
+def main():
+    parser = argparse.ArgumentParser(description="NVCF function cleanup utility")
+    parser.add_argument("--list", action="store_true", help="List all functions in the org")
+    parser.add_argument("--cleanup", action="store_true",
+                        help="Undeploy and delete pool functions (nvcf-pool-* only, unless --all)")
+    parser.add_argument("--all", action="store_true",
+                        help="With --cleanup: delete ALL functions, not just pool ones")
+    parser.add_argument("--delete", nargs=2, metavar=("FUNC_ID", "VER_ID"),
+                        help="Delete a specific function")
+    args = parser.parse_args()
+
+    api_key = os.environ.get("NGC_API_KEY")
+    org = os.environ.get("NGC_ORG")
+    if not api_key or not org:
+        print("ERROR: Set NGC_API_KEY and NGC_ORG environment variables")
+        sys.exit(1)
+
+    deployer = OSWorldDeployer(api_key=api_key, org_name=org)
+
+    if args.list or (not args.cleanup and not args.delete):
+        print("Listing all private NVCF functions in org...\n")
+        result = deployer.list_functions()
+        functions = result.get("functions", [])
+        if not functions:
+            print("No functions found.")
+            return
+        for fn in functions:
+            fn_id = fn.get("id", "?")
+            name = fn.get("name", "?")
+            status = fn.get("status", "?")
+            ver_id = fn.get("versionId", "?")
+            mine = " <-- pool" if name.startswith(POOL_NAME_PREFIX) else ""
+            print(f"  {name:40s}  status={status:10s}  fn={fn_id}  ver={ver_id}{mine}")
+        print(f"\nTotal: {len(functions)} functions")
+
+    if args.delete:
+        fn_id, ver_id = args.delete
+        print(f"Undeploying {fn_id}...")
+        try:
+            deployer.undeploy(fn_id, ver_id, graceful=True)
+            print("Undeployed. Deleting...")
+        except Exception as e:
+            print(f"Undeploy failed (may already be undeployed): {e}")
+        try:
+            deployer.delete_function(fn_id, ver_id)
+            print("Deleted.")
+        except Exception as e:
+            print(f"Delete failed: {e}")
+
+    if args.cleanup:
+        result = deployer.list_functions()
+        functions = result.get("functions", [])
+
+        if not args.all:
+            # Only clean up pool functions
+            functions = [f for f in functions if f.get("name", "").startswith(POOL_NAME_PREFIX)]
+            print(f"Cleaning up {len(functions)} pool functions (nvcf-pool-*)...\n")
+        else:
+            print(f"Cleaning up ALL {len(functions)} functions...\n")
+
+        if not functions:
+            print("Nothing to clean up.")
+            return
+
+        for fn in functions:
+            fn_id = fn.get("id", "?")
+            ver_id = fn.get("versionId", "?")
+            name = fn.get("name", "?")
+            status = fn.get("status", "?")
+            print(f"  Cleaning up: {name} ({fn_id}) status={status}")
+            try:
+                deployer.undeploy(fn_id, ver_id, graceful=True)
+                print(f"    Undeployed")
+            except Exception as e:
+                print(f"    Undeploy skipped: {e}")
+            try:
+                deployer.delete_function(fn_id, ver_id)
+                print(f"    Deleted")
+            except Exception as e:
+                print(f"    Delete failed: {e}")
+        print(f"\nDone. Cleaned up {len(functions)} functions.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cua/modules/debug_env_controller.py b/cua/modules/debug_env_controller.py
index 71c113fb4..5bf9ab85e 100644
--- a/cua/modules/debug_env_controller.py
+++ b/cua/modules/debug_env_controller.py
@@ -1,123 +1,203 @@
 import logging
+import os
+import sys
 import re
-from typing import Dict, Tuple
-
-import ipdb
-
-from examples.setup import SetupController
-from openhands.core.config import OpenHandsConfig
-from openhands.events import EventStream
-from openhands.events.action.os import OSWorldInteractiveAction
-from openhands.events.observation import ErrorObservation
-from openhands.runtime.impl.singularity.osworld_singularity_runtime import OSWorldSingularityRuntime
-from openhands.storage import get_file_store
+import uuid
+import time
+import threading
+import requests
+from typing import Dict, List, Optional, Tuple
+
+# Ensure OSWorld is importable
+_osworld_path = "/lustre/fsw/portfolios/nvr/users/bcui/OSWorld"
+if _osworld_path not in sys.path:
+    sys.path.insert(0, _osworld_path)
+
+from desktop_env.desktop_env import DesktopEnv
 from openhands.core.logger import openhands_logger
 
 # Create a child logger
 logger = openhands_logger.getChild('env_controller')
 logger.setLevel(logging.DEBUG)
 
+# Semaphore to limit concurrent downloads (shared with setup.py via env var)
+_DOWNLOAD_SEMAPHORE = threading.Semaphore(int(os.environ.get('OSWORLD_MAX_CONCURRENT_DOWNLOADS', '3')))
+
 
 class EnvController:
     """
-    Static Wrapper class that interfaces with OSWorldSingularityRuntime.
+    Static wrapper class that interfaces with OSWorld's DesktopEnv.
+    Replaces the previous OpenHands runtime-based approach with OSWorld's
+    native DesktopEnv + NVCFProvider for NVCF deployments.
     """
+
     @staticmethod
-    async def initialize_runtime(job_id: str, vm_image_path: str, os_type: str,
-                                 osworld_setup: Dict) -> OSWorldSingularityRuntime:
+    def pre_download_setup_files(osworld_setup: Dict, cache_dir: str = "/tmp/osworld_cache") -> bool:
         """
-        Initialize OSWorldSingularityRuntime.
-        Used by DataCollector._init_worker to boot up the VM.
+        Pre-download all setup files to local cache BEFORE deploying NVCF.
+        This avoids wasting NVCF resources if downloads fail (e.g., HF 429 errors).
+
+        Returns True if all downloads succeeded, False otherwise.
         """
-        config = OpenHandsConfig()
-        config.runtime = "osworld"
-        config.sandbox.base_container_image = "ubuntu:24.04"
-        config.sandbox.run_as_fakeroot = False
-        config.sandbox.runtime_container_image = None  # Trigger auto-build
-
-        # Unique event stream per trajectory
-        file_store = get_file_store('local', f'/tmp/synthetic_data_gen_{job_id}')
-        event_stream = EventStream(sid=job_id, file_store=file_store)
-
-        logger.debug(f"[initialize_runtime] Creating runtime for {job_id}")
-        logger.debug(f"[initialize_runtime]   VM image: {vm_image_path}")
-        logger.debug(f"[initialize_runtime]   Base image: {config.sandbox.base_container_image}")
-
-        runtime = OSWorldSingularityRuntime(
-            config=config,
-            event_stream=event_stream,
-            sid=job_id,
-            os_type=os_type,
-            vm_image_path=vm_image_path,
-            attach_to_existing=False,
-        )
+        config_list = osworld_setup.get("config", [])
+        if not config_list:
+            return True
+
+        os.makedirs(cache_dir, exist_ok=True)
+
+        # Build headers with HF token if available
+        dl_headers = {}
+        hf_token = os.environ.get('HF_TOKEN') or os.environ.get('HUGGING_FACE_HUB_TOKEN')
+        if hf_token:
+            dl_headers['Authorization'] = f'Bearer {hf_token}'
+
+        for cfg in config_list:
+            if cfg.get("type") != "download":
+                continue
+
+            files = cfg.get("parameters", {}).get("files", [])
+            for f in files:
+                url = f.get("url", "")
+                path = f.get("path", "")
+                if not url or not path:
+                    continue
+
+                cache_path = os.path.join(cache_dir, "{:}_{:}".format(
+                    uuid.uuid5(uuid.NAMESPACE_URL, url),
+                    os.path.basename(path)))
+
+                if os.path.exists(cache_path):
+                    logger.info(f"[pre_download] Cache hit: {cache_path}")
+                    continue
+
+                logger.info(f"[pre_download] Downloading {url} to cache...")
+                max_retries = 8
+                downloaded = False
+                last_error = None
+
+                with _DOWNLOAD_SEMAPHORE:
+                    for i in range(max_retries):
+                        try:
+                            backoff = min(2 ** i + 1, 60)
+                            if i > 0:
+                                logger.info(f"[pre_download] Waiting {backoff}s before retry {i+1}/{max_retries}")
+                                time.sleep(backoff)
+
+                            response = requests.get(url, stream=True, timeout=300, headers=dl_headers)
+                            response.raise_for_status()
+
+                            downloaded_size = 0
+                            with open(cache_path, 'wb') as fh:
+                                for chunk in response.iter_content(chunk_size=8192):
+                                    if chunk:
+                                        fh.write(chunk)
+                                        downloaded_size += len(chunk)
+
+                            logger.info(f"[pre_download] Downloaded {downloaded_size / (1024*1024):.2f} MB to {cache_path}")
+                            downloaded = True
+                            break
+
+                        except requests.RequestException as e:
+                            last_error = e
+                            logger.warning(f"[pre_download] Failed {url}: {e} ({max_retries - i - 1} retries left)")
+                            if os.path.exists(cache_path):
+                                os.remove(cache_path)
+
+                if not downloaded:
+                    logger.error(f"[pre_download] All retries exhausted for {url}. Last error: {last_error}")
+                    return False
+
+        return True
 
-        logger.debug(f"[initialize_runtime] Runtime object created, connecting to VM...")
-
-        await runtime.connect()
-        logger.debug(f"[initialize_runtime] ✓ Runtime initialized and connected for {job_id}")
-        logger.debug(f"[initialize_runtime]   VM URL: {runtime.osworld_vm_url if hasattr(runtime, 'osworld_vm_url') else 'N/A'}")
-
-        if osworld_setup and os_type == "linux":
-            logger.debug(f"[initialize_runtime] Setting up OSWorld...")
-            logger.debug(f"[initialize_runtime OSWorld Setup: {osworld_setup}")
-            setup_controller = SetupController(
-                vm_ip="127.0.0.1",
-                server_port=runtime._vm_server_port,
-                chromium_port=runtime._chromium_port,
-                cache_dir="/tmp/osworld_example",  # might need to be changed to a unique directory for each job
-                client_password="password",
-                runtime=runtime
-            )
-            await setup_controller.setup(osworld_setup['config'])
-            logger.debug(f"[initialize_runtime] ✓ OSWorld setup completed")
-        else:
-            logger.debug(f"[initialize_runtime] No OSWorld setup provided")
+    @staticmethod
+    async def initialize_runtime(
+        job_id: str,
+        vm_image_path: str,
+        os_type: str,
+        osworld_setup: Dict,
+        runtime_type: str = "singularity",
+        nvcf_function_id: Optional[str] = None,
+        nvcf_version_id: Optional[str] = None,
+        nvcf_api_key: Optional[str] = None,
+        nvcf_org: Optional[str] = None,
+    ):
+        """
+        Initialize runtime using OSWorld's DesktopEnv.
 
-        return runtime
+        For NVCF runtime: creates DesktopEnv(provider_name='nvcf') which
+        auto-deploys an NVCF function and starts a local proxy.
 
-    @staticmethod
-    def execute_pyautogui_command(runtime: OSWorldSingularityRuntime, pyautogui_command: str):
-        pyautogui_action = OSWorldInteractiveAction(
-            method="execute_python_command",
-            params={
-                "command": pyautogui_command,
-            }
+        For singularity runtime: creates DesktopEnv(provider_name='singularity')
+        which uses the local KVM-based approach.
+        """
+        logger.debug(f"[initialize_runtime] Creating {runtime_type} DesktopEnv for {job_id}")
+
+        if runtime_type == "nvcf":
+            # Set env vars that OSWorld's NVCFProvider reads
+            if nvcf_api_key:
+                os.environ.setdefault("NGC_API_KEY", nvcf_api_key)
+            if nvcf_org:
+                os.environ.setdefault("NGC_ORG", nvcf_org)
+            if nvcf_function_id:
+                os.environ["NVCF_FUNCTION_ID"] = nvcf_function_id
+            if nvcf_version_id:
+                os.environ["NVCF_VERSION_ID"] = nvcf_version_id
+
+            provider_name = "nvcf"
+        else:
+            provider_name = "singularity"
+
+        env = DesktopEnv(
+            provider_name=provider_name,
+            path_to_vm=vm_image_path if runtime_type != "nvcf" else "",
+            action_space="pyautogui",
+            headless=True,
+            os_type="Ubuntu" if os_type == "linux" else os_type,
+            require_a11y_tree=False,
         )
-        result = runtime.run_action(pyautogui_action)
 
-        if not isinstance(result, ErrorObservation):
-            logger.debug("[execute_pyautogui_command] ✓ Action complete")
-        else:
-            logger.debug(f"[execute_pyautogui_command] Error in Action: {result}")
+        logger.debug(f"[initialize_runtime] DesktopEnv created, resetting with OSWorld setup...")
 
-    @staticmethod
-    def get_screen_size(runtime: OSWorldSingularityRuntime) -> Tuple[int, int]:
-        observation = runtime.run_action(OSWorldInteractiveAction(
-            method="get_vm_screen_size",
-            params={},
-            thought=""
-        ))
+        # DesktopEnv.reset() handles: start emulator, NVCF deploy, proxy, snapshot revert, setup
+        env.reset(task_config=osworld_setup)
 
-        assert hasattr(observation, "content"), "get_screen_size failed."
+        logger.debug(f"[initialize_runtime] DesktopEnv reset complete for {job_id}")
 
-        match = re.search(r"Width: (\d+), Height: (\d+)", observation.content)
-        width, height = int(match.group(1)), int(match.group(2))
+        return env
+
+    @staticmethod
+    def execute_pyautogui_command(env, pyautogui_command: str):
+        """Execute a pyautogui command on the remote VM via OSWorld's PythonController."""
+        try:
+            env.controller.execute_python_command(pyautogui_command)
+            logger.debug("[execute_pyautogui_command] Action complete")
+        except Exception as e:
+            logger.debug(f"[execute_pyautogui_command] Error in Action: {e}")
 
-        return width, height
+    @staticmethod
+    def get_screen_size(env) -> Tuple[int, int]:
+        """Get the screen size of the remote VM."""
+        try:
+            size = env.controller.get_vm_screen_size()
+            if isinstance(size, tuple) and len(size) == 2:
+                return size
+            # Fallback: parse from string if needed
+            if isinstance(size, str):
+                match = re.search(r"(\d+)\D+(\d+)", size)
+                if match:
+                    return int(match.group(1)), int(match.group(2))
+        except Exception as e:
+            logger.warning(f"[get_screen_size] Failed: {e}, using defaults")
+
+        return env.screen_width, env.screen_height
 
     @staticmethod
-    def get_screenshot(runtime: OSWorldSingularityRuntime) -> bytes:
+    def get_screenshot(env) -> bytes:
         """
-        Returns the current screenshot from the runtime, in base64 format.
-        If screenshot_path is set, save the screenshot as png.
+        Returns the current screenshot from the DesktopEnv as bytes.
         """
-        screenshot = runtime.get_vm_screenshot()
+        screenshot = env.controller.get_screenshot()
         if not screenshot:
-            logger.debug("✗ Failed to get screenshot from runtime.")
-            raise RuntimeError("Failed to get screenshot from runtime.")
-
+            logger.debug("Failed to get screenshot from DesktopEnv.")
+            raise RuntimeError("Failed to get screenshot from DesktopEnv.")
         return screenshot
-
-
-
diff --git a/cua/modules/module_data_collector.py b/cua/modules/module_data_collector.py
index eb9233493..2c3d75772 100644
--- a/cua/modules/module_data_collector.py
+++ b/cua/modules/module_data_collector.py
@@ -12,8 +12,6 @@
 from pathlib import Path
 from typing import Optional, Dict, Any, Tuple
 
-import ipdb
-
 from modules.actors.debug_uitars_actor import UITarsActor
 from modules.debug_planner import Planner
 from modules.debug_env_controller import EnvController
@@ -42,6 +40,13 @@ def __init__(self, args: Namespace):
         self.vm_image_path = args.vm_image_path
         self.os_type = 'linux' if 'Ubuntu' in self.vm_image_path else 'windows'
 
+        # Runtime type: "singularity" (local KVM) or "nvcf" (NVCF via OSWorld DesktopEnv)
+        self.runtime_type = getattr(args, 'runtime', 'singularity')
+
+        # NVCF credentials (passed via env vars to OSWorld's NVCFProvider)
+        self.nvcf_api_key = getattr(args, 'nvcf_api_key', None)
+        self.nvcf_org = getattr(args, 'nvcf_org', None)
+
         self.max_steps_per_trajectory = args.max_steps_per_trajectory
         self.max_steps_per_goal = args.max_steps_per_goal
 
@@ -117,10 +122,15 @@ def save_trajectory(trajectory: Dict, trajectory_save_dir: Path):
 
         logger.debug(f"✓ [save_trajectory] Saved to {str(trajectory_save_dir / 'trajectory.json')}")
 
-    async def init_runtime_for_job(self, trajectory_idx: int) -> Tuple:
+    async def init_runtime_for_job(self, trajectory_idx: int,
+                                   nvcf_function_id: str = None,
+                                   nvcf_version_id: str = None) -> Tuple:
         """
         Stage 1: Initialize the VM and OSWorld setup.
-        Returns: (runtime, trajectory, trajectory_save_dir, trajectory_id, osworld_setup)
+        Returns: (env, trajectory, trajectory_save_dir, trajectory_id, osworld_setup)
+
+        Uses OSWorld's DesktopEnv which handles NVCF deploy, local proxy,
+        and environment setup internally.
         """
         # Create unique IDs
         job_id = f"job_{trajectory_idx:04d}"
@@ -140,13 +150,35 @@ async def init_runtime_for_job(self, trajectory_idx: int) -> Tuple:
             else:
                 osworld_setup_ready = True
 
-        # Initialize Runtime (Async)
-        runtime = await EnvController.initialize_runtime(
-            job_id, self.vm_image_path, self.os_type, osworld_setup
+        logger.info(f"[job {trajectory_idx:04d}] Sampled OSWorld config: id={osworld_setup.get('id', 'unknown')}, "
+                     f"snapshot={osworld_setup.get('snapshot', 'unknown')}, "
+                     f"apps={osworld_setup.get('related_apps', [])}, "
+                     f"instruction={osworld_setup.get('instruction', '')[:80]}")
+
+        # Pre-download setup files to local cache BEFORE deploying NVCF.
+        # This avoids wasting expensive NVCF GPU resources if downloads fail.
+        if self.runtime_type == "nvcf":
+            logger.info(f"[job {trajectory_idx:04d}] Pre-downloading setup files before NVCF deploy...")
+            download_ok = EnvController.pre_download_setup_files(osworld_setup)
+            if not download_ok:
+                raise RuntimeError(
+                    f"[job {trajectory_idx:04d}] Setup file pre-download failed. "
+                    f"Skipping NVCF deploy to avoid wasting resources."
+                )
+            logger.info(f"[job {trajectory_idx:04d}] Pre-download complete, proceeding with NVCF deploy.")
+
+        # Initialize DesktopEnv (handles NVCF deploy + proxy + setup internally)
+        env = await EnvController.initialize_runtime(
+            job_id, self.vm_image_path, self.os_type, osworld_setup,
+            runtime_type=self.runtime_type,
+            nvcf_function_id=nvcf_function_id,
+            nvcf_version_id=nvcf_version_id,
+            nvcf_api_key=self.nvcf_api_key,
+            nvcf_org=self.nvcf_org,
         )
 
         # Get screen size
-        width, height = EnvController.get_screen_size(runtime)
+        width, height = EnvController.get_screen_size(env)
 
         # Prepare Metadata
         trajectory = {
@@ -160,25 +192,24 @@ async def init_runtime_for_job(self, trajectory_idx: int) -> Tuple:
             'steps': [],
         }
 
-        return runtime, trajectory, trajectory_save_dir, trajectory_id, osworld_setup
+        return env, trajectory, trajectory_save_dir, trajectory_id, osworld_setup
 
-    async def collect_trajectory(self, runtime, trajectory: Dict, trajectory_save_dir: Path, osworld_setup: Dict):
+    async def collect_trajectory(self, env, trajectory: Dict, trajectory_save_dir: Path, osworld_setup: Dict):
         """
         Stage 2: Run the Agent Loop (Goal Generation -> Action Execution).
+        `env` is an OSWorld DesktopEnv instance.
         """
         # Wait for UI initialization
         time.sleep(3.0)
 
         # Initial Screenshot
-        screenshot_bytes = EnvController.get_screenshot(runtime)
+        screenshot_bytes = EnvController.get_screenshot(env)
         image_filename = trajectory_save_dir / f"0-0.png"
         save_image(screenshot_bytes, image_filename, logger)
 
         # --- 1. Generate High Level Goal --- #
-        # todo implement the verification mechanism for goal achievability using requirements
-        # generate goal in a separate loop
-        prev_requirements = []  # will be a list of tuple [("condition 1", "verdict 1"), ...]
-        example_goals = random.sample(self.example_instructions, 1)  # for now, we sample 1 example goal
+        prev_requirements = []
+        example_goals = random.sample(self.example_instructions, 1)
         goal, requirements = self.planner.generate_goal_with_long_horizon(
             screenshot_bytes, osworld_setup["config"], example_goals, prev_requirements,
         )
@@ -231,21 +262,19 @@ async def collect_trajectory(self, runtime, trajectory: Dict, trajectory_save_di
                 )
 
                 if action_result is None:
-                    # UI-TARS action generation failed (failed to meet the requirement)
-                    # in this case, save only up to the current trajectory
                     break
 
                 pyautogui_command = action_result["pyautogui_command"]
                 action_generation = action_result["action_generation"]
 
                 # Execute
-                EnvController.execute_pyautogui_command(runtime, pyautogui_command)
+                EnvController.execute_pyautogui_command(env, pyautogui_command)
 
                 # Wait & Observe
                 time.sleep(3.0)
 
                 # Capture new state
-                screenshot_bytes = EnvController.get_screenshot(runtime)
+                screenshot_bytes = EnvController.get_screenshot(env)
 
                 # Save step info
                 action_idx = len(step_for_this_subgoal['actions'])
@@ -276,11 +305,11 @@ async def single_trajectory_job(self, trajectory_idx: int):
         Simply chains the two stages sequentially in the main thread.
         """
         # 1. Init
-        runtime, trajectory_data, save_dir, t_id, setup = await self.init_runtime_for_job(trajectory_idx)
+        env, trajectory_data, save_dir, t_id, setup = await self.init_runtime_for_job(trajectory_idx)
 
         try:
             # 2. Collect
-            await self.collect_trajectory(runtime, trajectory_data, save_dir, setup)
+            await self.collect_trajectory(env, trajectory_data, save_dir, setup)
         finally:
-            # Cleanup for debug mode
-            runtime.close()
+            # Cleanup
+            env.close()
diff --git a/cua/modules/nvcf_pool.py b/cua/modules/nvcf_pool.py
new file mode 100644
index 000000000..4f4cc7167
--- /dev/null
+++ b/cua/modules/nvcf_pool.py
@@ -0,0 +1,230 @@
+"""NVCF Function Pool: manages a warm pool of pre-deployed NVCF functions for parallel data collection."""
+
+import logging
+import math
+import queue
+import threading
+from concurrent.futures import ThreadPoolExecutor
+from typing import List, Optional, Tuple
+
+from openhands.core.logger import openhands_logger
+from openhands.nvidia.os_world.nvcf import (
+    OSWorldDeployer,
+    OSWorldDeploymentConfig,
+    OSWorldFunctionConfig,
+)
+
+logger = openhands_logger.getChild('nvcf_pool')
+logger.setLevel(logging.INFO)
+
+
+class NVCFPool:
+    """Thread-safe pool of pre-deployed NVCF functions.
+
+    Deploys NVCF functions at startup and provides acquire/release semantics
+    so workers can check out a warm VM, use it for a trajectory, and return it.
+
+    When num_vms_per_instance > 1, fewer functions are deployed, each with
+    multiple VM instances on the same machine, reducing resource overhead.
+    """
+
+    def __init__(
+        self,
+        pool_size: int,
+        num_vms_per_instance: int = 1,
+        nvcf_api_key: Optional[str] = None,
+        nvcf_org: Optional[str] = None,
+    ):
+        self.pool_size = pool_size
+        self.num_vms_per_instance = num_vms_per_instance
+        self._deployer = OSWorldDeployer(api_key=nvcf_api_key, org_name=nvcf_org)
+        self._nvcf_api_key = nvcf_api_key
+        self._nvcf_org = nvcf_org
+
+        # Number of NVCF functions to deploy
+        self._num_functions = math.ceil(pool_size / num_vms_per_instance)
+
+        # Each entry is (function_id, version_id)
+        self._all_functions: List[Tuple[str, str]] = []
+        self._available: queue.Queue[Tuple[str, str]] = queue.Queue()
+        self._lock = threading.Lock()
+
+    def _deploy_one(self, index: int) -> Tuple[str, str]:
+        """Deploy a single NVCF function and wait for it to become ACTIVE."""
+        func_config = OSWorldFunctionConfig(
+            name=f"nvcf-pool-{index}",
+            description=f"Warm pool function {index} ({self.num_vms_per_instance} VMs)",
+        )
+        deploy_config = OSWorldDeploymentConfig(
+            gpu="L40S",
+            min_instances=self.num_vms_per_instance,
+            max_instances=self.num_vms_per_instance,
+        )
+
+        logger.info(f"[pool-{index}] Creating function ({self.num_vms_per_instance} VMs)...")
+        result = self._deployer.create_function(func_config)
+        function = result.get("function", {})
+        function_id = function.get("id")
+        version_id = function.get("versionId")
+        if not function_id or not version_id:
+            raise RuntimeError(f"[pool-{index}] create_function failed: {result}")
+
+        logger.info(f"[pool-{index}] Deploying {function_id}...")
+        self._deployer.deploy(function_id, version_id, deploy_config)
+
+        logger.info(f"[pool-{index}] Waiting for ACTIVE...")
+        self._deployer.wait_for_active(
+            function_id, version_id, timeout=1800, poll_interval=30
+        )
+        logger.info(f"[pool-{index}] ACTIVE: {function_id} with {self.num_vms_per_instance} instances")
+        return function_id, version_id
+
+    def _undeploy_one(self, function_id: str, version_id: str) -> None:
+        """Undeploy and delete a single NVCF function."""
+        try:
+            logger.info(f"Undeploying {function_id}...")
+            self._deployer.undeploy(function_id, version_id, graceful=True)
+        except Exception as e:
+            logger.warning(f"Failed to undeploy {function_id}: {e}")
+        try:
+            self._deployer.delete_function(function_id, version_id)
+        except Exception as e:
+            logger.warning(f"Failed to delete function {function_id}: {e}")
+
+    def deploy_all(self, max_workers: int = 8) -> None:
+        """Deploy NVCF functions in parallel and wait for all to become ACTIVE.
+
+        With num_vms_per_instance > 1, deploys fewer functions (each with
+        multiple instances) to reach the desired pool_size.
+        """
+        logger.info(
+            f"Deploying {self._num_functions} NVCF function(s) "
+            f"x {self.num_vms_per_instance} VMs each = {self.pool_size} total slots..."
+        )
+
+        workers = min(max_workers, self._num_functions)
+        with ThreadPoolExecutor(max_workers=workers) as executor:
+            futures = [executor.submit(self._deploy_one, i) for i in range(self._num_functions)]
+            for future in futures:
+                fn_id, ver_id = future.result()  # raises if deploy failed
+                self._all_functions.append((fn_id, ver_id))
+                # Add one entry per VM instance so acquire/release works correctly
+                for _ in range(self.num_vms_per_instance):
+                    self._available.put((fn_id, ver_id))
+
+        logger.info(
+            f"All {self._num_functions} function(s) deployed. "
+            f"{self._available.qsize()} VM slots ready."
+        )
+
+    def deploy_all_from_ids(self, function_ids: List[Tuple[str, str]], vms_per_function: int = 1) -> None:
+        """Use pre-existing function IDs instead of deploying new ones."""
+        for fn_id, ver_id in function_ids:
+            self._all_functions.append((fn_id, ver_id))
+            for _ in range(vms_per_function):
+                self._available.put((fn_id, ver_id))
+        self.pool_size = len(function_ids) * vms_per_function
+        self.num_vms_per_instance = vms_per_function
+        logger.info(f"Pool initialized with {len(function_ids)} pre-existing function(s), {self.pool_size} total slots.")
+
+    def acquire(self, timeout: Optional[float] = None) -> Tuple[str, str]:
+        """Acquire a function from the pool. Blocks until one is available.
+
+        Returns:
+            (function_id, version_id) tuple
+        """
+        try:
+            return self._available.get(block=True, timeout=timeout)
+        except queue.Empty:
+            raise TimeoutError(f"No NVCF function available within {timeout}s")
+
+    def release(self, function_id: str, version_id: str) -> None:
+        """Return a function to the pool for reuse."""
+        self._available.put((function_id, version_id))
+
+    def health_check(self, function_id: str) -> bool:
+        """Check if an NVCF function is still healthy by pinging /platform."""
+        try:
+            import requests
+            headers = {
+                "Authorization": f"Bearer {self._nvcf_api_key}",
+                "Function-ID": function_id,
+            }
+            r = requests.get(
+                "https://grpc.nvcf.nvidia.com/api/platform",
+                headers=headers,
+                timeout=10.0,
+            )
+            return r.status_code == 200
+        except Exception:
+            return False
+
+    def _is_function_gone(self, function_id: str, version_id: str) -> bool:
+        """Check if a function has been completely deleted/evicted (404)."""
+        try:
+            self._deployer.get_function_info(function_id, version_id)
+            return False
+        except Exception as e:
+            if '404' in str(e) or 'Not found' in str(e):
+                return True
+            return False
+
+    def release_or_replace(self, function_id: str, version_id: str) -> None:
+        """Release a function back to pool, replacing it if unhealthy.
+
+        For multi-instance functions, individual instance failures are handled
+        by NVCF internally (it maintains min_instances). We only deploy a
+        full replacement if the entire function is gone (404).
+        """
+        if self.health_check(function_id):
+            self._available.put((function_id, version_id))
+            return
+
+        # For multi-instance functions: check if the function itself is gone
+        # vs just a transient instance failure that NVCF will self-heal.
+        if self.num_vms_per_instance > 1 and not self._is_function_gone(function_id, version_id):
+            logger.warning(
+                f"Function {function_id} health check failed but function still exists. "
+                f"NVCF should self-heal the instance. Releasing slot back to pool."
+            )
+            self._available.put((function_id, version_id))
+            return
+
+        logger.warning(f"Function {function_id} is gone (404), deploying replacement...")
+        # Undeploy broken function in background (best-effort cleanup)
+        threading.Thread(
+            target=self._undeploy_one, args=(function_id, version_id), daemon=True
+        ).start()
+        # Deploy replacement
+        try:
+            new_fn_id, new_ver_id = self._deploy_one(len(self._all_functions))
+            with self._lock:
+                self._all_functions.append((new_fn_id, new_ver_id))
+            # Add slots for all VMs on the replacement function
+            for _ in range(self.num_vms_per_instance):
+                self._available.put((new_fn_id, new_ver_id))
+            logger.info(f"Replacement function {new_fn_id} deployed with {self.num_vms_per_instance} VM slots.")
+        except Exception as e:
+            logger.error(f"Failed to deploy replacement: {e}. Pool size reduced.")
+
+    def undeploy_all(self) -> None:
+        """Undeploy and delete all functions in the pool."""
+        logger.info(f"Undeploying {len(self._all_functions)} NVCF function(s)...")
+        for fn_id, ver_id in self._all_functions:
+            self._undeploy_one(fn_id, ver_id)
+        self._all_functions.clear()
+        # Drain the queue
+        while not self._available.empty():
+            try:
+                self._available.get_nowait()
+            except queue.Empty:
+                break
+        logger.info("All NVCF functions undeployed.")
+
+    @property
+    def nvcf_api_key(self) -> Optional[str]:
+        return self._nvcf_api_key
+
+    @property
+    def nvcf_org(self) -> Optional[str]:
+        return self._nvcf_org
diff --git a/cua/parallel_collect_trajectories.py b/cua/parallel_collect_trajectories.py
index 53f91753f..da5a0de87 100644
--- a/cua/parallel_collect_trajectories.py
+++ b/cua/parallel_collect_trajectories.py
@@ -11,7 +11,7 @@
 from openhands.core.logger import openhands_logger
 
 # Configure logging
-openhands_logger.setLevel(logging.WARNING)
+openhands_logger.setLevel(logging.DEBUG)
 logger = openhands_logger.getChild('parallel_collector')
 logger.setLevel(logging.INFO)
 
@@ -35,7 +35,7 @@ def __init__(self, index: int):
         self.error: Optional[str] = None
 
         # Runtime objects (populated during execution)
-        self.runtime: Any = None
+        self.env: Any = None  # OSWorld DesktopEnv instance
         self.trajectory_data: Optional[Dict] = None
         self.save_dir: Any = None
         self.osworld_setup: Any = None
@@ -46,6 +46,7 @@ def __init__(self, args, data_collector: DataCollector):
         self.data_collector = data_collector
         self.max_parallel = args.max_parallel
         self.max_trajectories = args.max_trajectories
+        self.runtime_type = getattr(args, 'runtime', 'singularity')
 
         # Queues
         self.init_queue: queue.Queue = queue.Queue()
@@ -65,9 +66,10 @@ def __init__(self, args, data_collector: DataCollector):
         self._server_running = False
 
         # For sequential VM start-ups to mitigate boot storm
+        # NVCF uses pre-deployed VMs so no boot storm delay needed
         self._launch_lock = threading.Lock()
         self._last_launch_time = 0
-        self._launch_delay_seconds = 15.0  # Wait 15s between starts
+        self._launch_delay_seconds = 0.0 if self.runtime_type == "nvcf" else 15.0
 
     def start_workers(self):
         self._server_running = True
@@ -112,23 +114,16 @@ async def _init_worker(self, worker_id: int):
             job_details = self.jobs[job_idx]
 
             # Wait for available runtime slot
-            # logger.debug(f"[init-{worker_id}] Waiting for slot for job {job_idx}")
             await asyncio.to_thread(self._runtime_semaphore.acquire)
 
             # Rate Limit Logic: Prevent Boot Storm
             wait_time = 0.0
             with self._launch_lock:
                 now = time.time()
-                # The earliest this worker can start is either NOW,
-                # or 15s after the last scheduled launch.
                 target_start_time = max(now, self._last_launch_time + self._launch_delay_seconds)
-
                 wait_time = target_start_time - now
-
-                # Reserve this slot by updating the global timestamp immediately
                 self._last_launch_time = target_start_time
 
-            # Perform the wait asynchronously (outside the lock)
             if wait_time > 0:
                 if wait_time > 1.0:
                     logger.info(f"[init-{worker_id}] Delayed boot-up: waiting {wait_time:.1f}s...")
@@ -141,14 +136,14 @@ async def _init_worker(self, worker_id: int):
 
             try:
                 # --- call init_runtime_for_job --- #
-                # This creates the runtime and runs setup
-                runtime, traj_data, save_dir, traj_id, setup = \
+                # This creates the DesktopEnv, deploys NVCF (if needed), and runs setup
+                env, traj_data, save_dir, traj_id, setup = \
                     await self.data_collector.init_runtime_for_job(job_idx)
 
                 # Store details in the pre-allocated object
-                job_details.job_id = traj_id  # Using traj_id as primary ID
+                job_details.job_id = traj_id
                 job_details.trajectory_id = traj_id
-                job_details.runtime = runtime
+                job_details.env = env
                 job_details.trajectory_data = traj_data
                 job_details.save_dir = save_dir
                 job_details.osworld_setup = setup
@@ -159,8 +154,15 @@ async def _init_worker(self, worker_id: int):
             except Exception as e:
                 logger.error(f"[init-{worker_id}] Failed setup for job {job_idx}: {e}")
                 job_details.error = str(e)
-                job_details.completed = False  # Failed
-                job_details.event.set()  # Signal main thread we are done (failed)
+                job_details.completed = False
+                job_details.event.set()
+
+                # Close DesktopEnv if it was created
+                if job_details.env:
+                    try:
+                        job_details.env.close()
+                    except Exception:
+                        pass
 
                 # Release semaphore immediately on failure
                 self._runtime_semaphore.release()
@@ -184,7 +186,7 @@ async def _collect_worker(self, worker_id: int):
                 # --- call collect_trajectory ---
                 # This runs the Planner/Actor loop
                 await self.data_collector.collect_trajectory(
-                    job_details.runtime,
+                    job_details.env,
                     job_details.trajectory_data,
                     job_details.save_dir,
                     job_details.osworld_setup
@@ -196,10 +198,9 @@ async def _collect_worker(self, worker_id: int):
                 logger.error(f"[collect-{worker_id}] Error in {traj_id}: {e}")
                 job_details.error = str(e)
             finally:
-                # Cleanup Runtime
-                if job_details.runtime:
-                    # Run close in background thread to not block loop
-                    threading.Thread(target=job_details.runtime.close, daemon=True).start()
+                # Cleanup DesktopEnv (closes NVCF proxy, undeploys function)
+                if job_details.env:
+                    threading.Thread(target=job_details.env.close, daemon=True).start()
 
                 # Release semaphore (allows new Init worker to proceed)
                 self._runtime_semaphore.release()
@@ -269,6 +270,10 @@ def parse_args():
     parser.add_argument("--planner_node", type=str, required=True)
     parser.add_argument("--actor_node", type=str, required=True)
 
+    # Runtime selection
+    parser.add_argument("--runtime", type=str, choices=["singularity", "nvcf"], default="singularity",
+                        help="Runtime backend: 'singularity' (local KVM) or 'nvcf' (NVCF via OSWorld DesktopEnv)")
+
     # Environment & Setup
     parser.add_argument("--vm_image_path", type=str,
                         default="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/cua/prorl-agent-server/OS_images/Ubuntu.qcow2")
@@ -295,8 +300,7 @@ def parse_args():
     # Parallel specific args
     parser.add_argument("--max_parallel", type=int, default=24, help="Max concurrent VMs")
     parser.add_argument(
-        "--max_trajectories", type=int, default=10000, help="Total trajectories to generate"
-    )
+        "--max_trajectories", type=int, default=10000, help="Total trajectories to generate")
 
     return parser.parse_args()
 
@@ -309,6 +313,9 @@ async def main():
     logger.info("DataCollector initialized (datasets loaded)")
 
     # 2. Start Parallel Generator
+    # Each worker's DesktopEnv manages its own NVCF function lifecycle
+    # (deploy, local proxy, health monitoring, undeploy on close)
+    # No centralized NVCFPool needed - OSWorld's NVCFProvider handles everything.
     generator = ParallelTrajectoryGenerator(args, data_collector)
     await generator.run()
 
diff --git a/cua/scripts/collect_trajectories_sbatch.sh b/cua/scripts/collect_trajectories_sbatch.sh
new file mode 100755
index 000000000..4aeaecf9c
--- /dev/null
+++ b/cua/scripts/collect_trajectories_sbatch.sh
@@ -0,0 +1,251 @@
+#!/bin/bash
+# ============================================================================
+# 2-Node Batch Trajectory Collection
+# ============================================================================
+# Node 0 (Planner): Runs Qwen3-VL-235B vLLM server (tp=8, all 8 GPUs)
+# Node 1 (Actor):   2x UI-TARS-1.5-7B vLLM servers (tp=4 each, GPUs 0-3 + 4-7)
+#                    + round-robin load balancer on port 8000
+#                    + trajectory collection
+#
+# Usage: sbatch collect_trajectories_sbatch.sh
+# ============================================================================
+
+#SBATCH --job-name=traj_collect
+#SBATCH --account=nvr_lacr_llm
+#SBATCH --partition=batch_block1
+#SBATCH --gpus-per-node=8
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=1
+#SBATCH --mem=0
+#SBATCH --time=04:00:00
+#SBATCH --exclusive
+#SBATCH --output=/lustre/fsw/portfolios/nvr/users/bcui/ProRL-Agent-Server/cua/scripts/logs/traj_collect_%j.out
+#SBATCH --error=/lustre/fsw/portfolios/nvr/users/bcui/ProRL-Agent-Server/cua/scripts/logs/traj_collect_%j.err
+
+set -euo pipefail
+
+# ============================================================================
+# Configuration
+# ============================================================================
+IMAGE="/lustre/fsw/portfolios/nvr/users/bcui/images/cua-vllm-0.13.0.sqsh"
+PROJECT_ROOT="/lustre/fsw/portfolios/nvr/users/bcui/ProRL-Agent-Server"
+PROJECT_DIR="$PROJECT_ROOT/cua"
+
+PLANNER_MODEL="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/models/Qwen3-VL-235B-A22B-Thinking"
+ACTOR_MODEL="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/bcui/huggingface_models/UI-TARS-1.5-7B"
+
+PLANNER_PORT=8000
+ACTOR_PORT=8000          # Load balancer port (what the code talks to)
+ACTOR_PORT_1=8001        # Actor replica 1 (GPUs 0-3)
+ACTOR_PORT_2=8002        # Actor replica 2 (GPUs 4-7)
+
+# Trajectory collection settings
+MAX_PARALLEL=16
+MAX_TRAJECTORIES=1024
+
+# Log file name (timestamped)
+TIMESTAMP=$(date +%m-%d-%H%M)
+LOG_FILE="$PROJECT_DIR/${TIMESTAMP}-logs.log"
+
+# ============================================================================
+# Resolve node assignments
+# ============================================================================
+ALL_NODES=$(scontrol show hostnames "$SLURM_JOB_NODELIST")
+PLANNER_NODE=$(echo "$ALL_NODES" | head -n 1)
+ACTOR_NODE=$(echo "$ALL_NODES" | tail -n 1)
+
+echo "[sbatch] Job ID: $SLURM_JOB_ID"
+echo "[sbatch] Planner Node: $PLANNER_NODE"
+echo "[sbatch] Actor Node:   $ACTOR_NODE"
+echo "[sbatch] Log file: $LOG_FILE"
+
+mkdir -p "$PROJECT_DIR/scripts/logs"
+
+# ============================================================================
+# Launch Planner vLLM server on Node 0
+# ============================================================================
+echo "[sbatch] Starting Planner vLLM server on $PLANNER_NODE..."
+srun --nodes=1 --ntasks=1 --nodelist="$PLANNER_NODE" \
+    --container-image="$IMAGE" \
+    --container-mounts=/lustre:/lustre \
+    --container-writable \
+    bash -c "
+        vllm serve $PLANNER_MODEL \
+            --api-key gen \
+            --tensor-parallel-size 8 \
+            --enable-expert-parallel \
+            --limit-mm-per-prompt.video 0 \
+            --limit-mm-per-prompt.image 3 \
+            --async-scheduling \
+            --max-model-len 65536 \
+            --gpu-memory-utilization 0.9 \
+            > $PROJECT_DIR/scripts/logs/planner_${SLURM_JOB_ID}.log 2>&1
+    " &
+PLANNER_SRUN_PID=$!
+
+# ============================================================================
+# Launch Actor vLLM server + trajectory collection on Node 1
+# ============================================================================
+echo "[sbatch] Starting 2x Actor vLLM servers + collection on $ACTOR_NODE..."
+srun --nodes=1 --ntasks=1 --nodelist="$ACTOR_NODE" \
+    --container-image="$IMAGE" \
+    --container-mounts=/lustre:/lustre \
+    --container-writable \
+    bash -c "
+        # --- Start Actor vLLM replica 1 (GPUs 0-3) ---
+        CUDA_VISIBLE_DEVICES=0,1,2,3 vllm serve $ACTOR_MODEL \
+            --served-model-name ByteDance-Seed/UI-TARS-1.5-7B \
+            --api-key gen \
+            --port $ACTOR_PORT_1 \
+            --tensor-parallel-size 4 \
+            --limit-mm-per-prompt.image 5 \
+            --limit-mm-per-prompt.video 0 \
+            --max-model-len 65536 \
+            --disable-log-requests \
+            --disable-log-stats \
+            > $PROJECT_DIR/scripts/logs/actor1_${SLURM_JOB_ID}.log 2>&1 &
+        ACTOR1_PID=\$!
+
+        # --- Start Actor vLLM replica 2 (GPUs 4-7) ---
+        CUDA_VISIBLE_DEVICES=4,5,6,7 vllm serve $ACTOR_MODEL \
+            --served-model-name ByteDance-Seed/UI-TARS-1.5-7B \
+            --api-key gen \
+            --port $ACTOR_PORT_2 \
+            --tensor-parallel-size 4 \
+            --limit-mm-per-prompt.image 5 \
+            --limit-mm-per-prompt.video 0 \
+            --max-model-len 65536 \
+            --disable-log-requests \
+            --disable-log-stats \
+            > $PROJECT_DIR/scripts/logs/actor2_${SLURM_JOB_ID}.log 2>&1 &
+        ACTOR2_PID=\$!
+
+        # --- Start round-robin load balancer on port $ACTOR_PORT ---
+        python3 -c '
+import http.server, http.client, threading, sys, io
+
+backends = [(\"localhost\", $ACTOR_PORT_1), (\"localhost\", $ACTOR_PORT_2)]
+counter = 0
+lock = threading.Lock()
+
+class LBHandler(http.server.BaseHTTPRequestHandler):
+    def do_ANY(self, method):
+        global counter
+        with lock:
+            host, port = backends[counter % len(backends)]
+            counter += 1
+
+        content_length = int(self.headers.get(\"Content-Length\", 0))
+        body = self.rfile.read(content_length) if content_length > 0 else None
+
+        try:
+            conn = http.client.HTTPConnection(host, port, timeout=300)
+            conn.request(method, self.path, body=body, headers=dict(self.headers))
+            resp = conn.getresponse()
+            resp_body = resp.read()
+
+            self.send_response(resp.status)
+            for k, v in resp.getheaders():
+                if k.lower() not in (\"transfer-encoding\",):
+                    self.send_header(k, v)
+            self.end_headers()
+            self.wfile.write(resp_body)
+            conn.close()
+        except Exception as e:
+            self.send_response(502)
+            self.end_headers()
+            self.wfile.write(f\"LB error: {e}\".encode())
+
+    def do_GET(self): self.do_ANY(\"GET\")
+    def do_POST(self): self.do_ANY(\"POST\")
+    def do_PUT(self): self.do_ANY(\"PUT\")
+    def do_DELETE(self): self.do_ANY(\"DELETE\")
+    def log_message(self, format, *args): pass  # silence logs
+
+server = http.server.ThreadingHTTPServer((\"0.0.0.0\", $ACTOR_PORT), LBHandler)
+print(f\"[LB] Round-robin load balancer on port $ACTOR_PORT -> {backends}\", flush=True)
+server.serve_forever()
+' > $PROJECT_DIR/scripts/logs/actor_lb_${SLURM_JOB_ID}.log 2>&1 &
+        LB_PID=\$!
+
+        # --- Wait for all servers to be healthy ---
+        echo '[actor-node] Waiting for vLLM servers to become healthy...'
+
+        wait_for_server() {
+            local host=\$1
+            local port=\$2
+            local name=\$3
+            local max_wait=600
+            local elapsed=0
+
+            while [ \$elapsed -lt \$max_wait ]; do
+                if curl -sf http://\${host}:\${port}/health > /dev/null 2>&1; then
+                    echo \"[actor-node] \$name server healthy (\${elapsed}s)\"
+                    return 0
+                fi
+                sleep 10
+                elapsed=\$((elapsed + 10))
+                if [ \$((elapsed % 60)) -eq 0 ]; then
+                    echo \"[actor-node] Still waiting for \$name (\${elapsed}s)...\"
+                fi
+            done
+            echo \"[actor-node] ERROR: \$name server did not start within \${max_wait}s\"
+            return 1
+        }
+
+        # Wait for both actor replicas and planner
+        wait_for_server localhost $ACTOR_PORT_1 'Actor-1 (GPU 0-3)'
+        ACTOR1_OK=\$?
+
+        wait_for_server localhost $ACTOR_PORT_2 'Actor-2 (GPU 4-7)'
+        ACTOR2_OK=\$?
+
+        wait_for_server $PLANNER_NODE $PLANNER_PORT Planner
+        PLANNER_OK=\$?
+
+        if [ \$ACTOR1_OK -ne 0 ] || [ \$ACTOR2_OK -ne 0 ] || [ \$PLANNER_OK -ne 0 ]; then
+            echo '[actor-node] ERROR: One or more servers failed to start.'
+            echo 'Planner log:' && tail -20 $PROJECT_DIR/scripts/logs/planner_${SLURM_JOB_ID}.log 2>/dev/null
+            echo 'Actor-1 log:' && tail -20 $PROJECT_DIR/scripts/logs/actor1_${SLURM_JOB_ID}.log 2>/dev/null
+            echo 'Actor-2 log:' && tail -20 $PROJECT_DIR/scripts/logs/actor2_${SLURM_JOB_ID}.log 2>/dev/null
+            kill \$ACTOR1_PID \$ACTOR2_PID \$LB_PID 2>/dev/null
+            exit 1
+        fi
+
+        echo '[actor-node] All servers healthy. Starting trajectory collection...'
+
+        # --- Run trajectory collection ---
+        cd $PROJECT_DIR
+        source cua_env_reqs/bin/activate
+        export PYTHONPATH=$PROJECT_ROOT:\$PYTHONPATH
+
+        python parallel_collect_trajectories.py \
+            --planner_node $PLANNER_NODE \
+            --actor_node $ACTOR_NODE \
+            --runtime nvcf \
+            --max_parallel $MAX_PARALLEL \
+            --max_trajectories $MAX_TRAJECTORIES \
+            2>&1 | tee $LOG_FILE
+
+        COLLECT_EXIT=\$?
+
+        # Cleanup
+        kill \$ACTOR1_PID \$ACTOR2_PID \$LB_PID 2>/dev/null
+        echo \"[actor-node] Collection finished (exit code: \$COLLECT_EXIT)\"
+        exit \$COLLECT_EXIT
+    " &
+ACTOR_SRUN_PID=$!
+
+# ============================================================================
+# Wait for completion
+# ============================================================================
+# Wait for the actor srun (which runs collection). When it finishes, kill planner.
+wait $ACTOR_SRUN_PID
+COLLECT_EXIT=$?
+
+echo "[sbatch] Actor node finished (exit: $COLLECT_EXIT). Stopping planner..."
+kill $PLANNER_SRUN_PID 2>/dev/null
+wait $PLANNER_SRUN_PID 2>/dev/null
+
+echo "[sbatch] Done. Log: $LOG_FILE"
+exit $COLLECT_EXIT
diff --git a/cua/scripts/debug_check_kvm.sbatch b/cua/scripts/debug_check_kvm.sbatch
index 27f2719da..650b8671e 100644
--- a/cua/scripts/debug_check_kvm.sbatch
+++ b/cua/scripts/debug_check_kvm.sbatch
@@ -1,29 +1,53 @@
 #!/bin/bash
 
-#SBATCH --array=0%1  # todo set the number of runs here
-#SBATCH --partition=cpu_interactive
-#SBATCH --reservation=sla_res_osworld_agent_vlm_cpu_only
-#SBATCH --account=nvr_lpr_agentic
-#SBATCH --job-name=cua-check_kvm
+#SBATCH --array=0%1
+#SBATCH --partition=interactive
+#SBATCH --account=llmservice_fm_vision
+#SBATCH --reservation=sla_res_osworld_agent_vlm
+#SBATCH --gpus-per-node=8
+#SBATCH --job-name=cua-check_kvm_gpu
 #SBATCH --nodes=1
 #SBATCH --ntasks-per-node=1
-#SBATCH --time=04:00:00
-#SBATCH --output=logs/slurm-%A.out
-#SBATCH --error=logs/slurm-%A.out
+#SBATCH --time=00:30:00
+#SBATCH --exclusive
+#SBATCH --output=logs/slurm-kvm-gpu-%A.out
+#SBATCH --error=logs/slurm-kvm-gpu-%A.out
 
+# ============================================================================
+# Test KVM availability on GPU partition nodes
+# This validates that /dev/kvm is accessible inside a container on GPU nodes,
+# which is required for the consolidated 2-node setup.
+# ============================================================================
 
-CONTAINER_IMAGE=/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/images/cua_cpu.sqsh
+CONTAINER_IMAGE=/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/images/cua_vllm.sqsh
 
 # 1. Identify the Compute Node
-#    In case this script runs on a head node, we grab the assigned node name.
 TARGET_NODE=$(scontrol show hostnames "$SLURM_NODELIST" | head -n 1)
-echo "[Script] Job assigned to node: $TARGET_NODE"
+echo "[Script] Job assigned to GPU node: $TARGET_NODE"
+
+# 1.5. Check if /dev/kvm exists on the node (before container)
+echo "[Script] Checking if /dev/kvm exists on $TARGET_NODE (host level)..."
+ssh -q -o StrictHostKeyChecking=no "$TARGET_NODE" "
+    echo '--- Host-level KVM check ---'
+    if [ -e /dev/kvm ]; then
+        echo '/dev/kvm EXISTS on host'
+        ls -la /dev/kvm
+        stat /dev/kvm
+        getent group kvm 2>/dev/null || echo 'kvm group not found'
+    else
+        echo 'WARNING: /dev/kvm does NOT exist on this GPU node!'
+        echo 'KVM may not be enabled on GPU partition nodes.'
+        echo 'This is a HARD BLOCKER for the consolidated setup.'
+    fi
+    echo '--- End host-level check ---'
+"
 
-# 2. Launch Container on that Node
+# 2. Launch Container on that Node (with /dev/kvm mount)
 echo "[Script] Launching container (sleep infinity) on $TARGET_NODE..."
 srun --nodelist="$TARGET_NODE" \
      --container-image="$CONTAINER_IMAGE" \
-     --container-mounts=/lustre:/lustre \
+     --container-mounts=/lustre:/lustre,/dev/kvm:/dev/kvm \
+     --container-writable \
      sleep infinity &
 
 SRUN_PID=$!
@@ -35,34 +59,54 @@ CONTAINER_PID=""
 while [ -z "$CONTAINER_PID" ]; do
     sleep 2
 
-    # explanation of the command sent via ssh:
-    # 1. enroot list -f  -> Lists processes
-    # 2. grep pyxis      -> Filters for your container name
-    # 3. grep sleep      -> Ensures the command column shows 'sleep'
-    # 4. awk print $2    -> Grabs the PID
     CONTAINER_PID=$(ssh -q -o StrictHostKeyChecking=no "$TARGET_NODE" \
         "enroot list -f | grep 'pyxis' | grep 'sleep' | awk '{print \$2}' | head -n 1")
 
     if [ -z "$CONTAINER_PID" ]; then
-        echo "[Script] Container registered, but 'sleep' command not yet visible. Retrying..."
+        echo "[Script] Container not yet visible. Retrying..."
     fi
 done
 
 echo "[Script] Found PID: $CONTAINER_PID on $TARGET_NODE"
 
-# 4. Execute the Test (via SSH -> Enroot Exec)
-echo "[Script] Checking kvm write permission..."
+# 4. Execute KVM checks inside container
+echo "[Script] Checking KVM inside container on GPU node..."
 ssh -o StrictHostKeyChecking=no "$TARGET_NODE" "enroot exec $CONTAINER_PID bash -c '
-    if touch /dev/kvm 2>/dev/null; then
-        echo \"SUCCESS: /dev/kvm is writable!\"
+    echo \"--- Container-level KVM check (GPU node) ---\"
+    echo \"Running as user: \$(whoami) (uid=\$(id -u), gid=\$(id -g))\"
+    echo \"User groups: \$(groups)\"
+    echo \"\"
+
+    if [ -e /dev/kvm ]; then
+        echo \"/dev/kvm EXISTS in container\"
+        ls -la /dev/kvm
+
+        if [ -r /dev/kvm ]; then
+            echo \"READ permission: YES\"
+        else
+            echo \"READ permission: NO\"
+        fi
+
+        if [ -w /dev/kvm ]; then
+            echo \"WRITE permission: YES\"
+            echo \"\"
+            echo \"SUCCESS: /dev/kvm is writable on GPU node!\"
+            echo \"The consolidated 2-node setup should work.\"
+        else
+            echo \"WRITE permission: NO\"
+            echo \"\"
+            echo \"FAILURE: Cannot write to /dev/kvm on GPU node\"
+            echo \"Admin intervention may be needed.\"
+            exit 1
+        fi
     else
-        echo \"FAILURE: Permission denied.\"
+        echo \"FAILURE: /dev/kvm does NOT exist in container\"
+        echo \"The /dev/kvm mount may have failed or KVM is not available on this GPU node.\"
         exit 1
     fi
-'"
-
-
-
-
-
 
+    echo \"\"
+    echo \"--- GPU check ---\"
+    nvidia-smi -L 2>/dev/null || echo \"nvidia-smi not available (expected in non-CUDA container)\"
+    echo \"--- End checks ---\"
+'"
diff --git a/cua/scripts/debug_interactive.sh b/cua/scripts/debug_interactive.sh
index 0f7299f0a..9d43e850d 100644
--- a/cua/scripts/debug_interactive.sh
+++ b/cua/scripts/debug_interactive.sh
@@ -3,13 +3,14 @@
 # --- 1. Submit the "Holder" Job ---
 echo "[Local] Submitting background job to reserve node..."
 
-# for CPU reservation
-IMAGE="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/images/cua_cpu.sqsh"
+# GPU reservation (consolidated: runs VMs on GPU node)
+# IMAGE="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/images/cua_vllm.sqsh"
+IMAGE="/lustre/fsw/portfolios/nvr/users/bcui/images/cua-vllm-0.13.0.sqsh"
 JOB_ID=$(sbatch --parsable \
     --job-name=kvm_interactive \
-    --account=nvr_lpr_agentic \
-    --partition=cpu_interactive \
-    --reservation=sla_res_osworld_agent_vlm_cpu_only \
+    --account=nvr_lacr_llm \
+    --partition=interactive \
+    --gpus-per-node=8 \
     --nodes=1 \
     --ntasks-per-node=1 \
     --time=04:00:00 \
@@ -18,14 +19,15 @@ JOB_ID=$(sbatch --parsable \
     --error=/dev/null \
     --wrap="srun --container-image=$IMAGE --container-mounts=/lustre:/lustre sleep infinity")
 
-# for GPU reservation - note: the container image for GPU node is not ready yet
-#IMAGE="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/images/cua_vllm.sqsh"
+# Old CPU-only reservation (kept for reference)
+#IMAGE="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/images/cua_cpu.sqsh"
 #JOB_ID=$(sbatch --parsable \
 #    --job-name=kvm_interactive \
-#    --account=llmservice_fm_vision \
-#    --partition=interactive \
-#    --gpus-per-node=8 \
-#    --reservation=sla_res_osworld_agent_vlm \
+#    --account=nvr_lpr_agentic \
+#    --partition=cpu_interactive \
+#    --reservation=sla_res_osworld_agent_vlm_cpu_only \
+#    --nodes=1 \
+#    --ntasks-per-node=1 \
 #    --time=04:00:00 \
 #    --exclusive \
 #    --output=/dev/null \
@@ -87,7 +89,10 @@ echo "[Local] Found Container PID: $CONTAINER_PID"
 
 # --- 5. Launch Interactive Session ---
 echo "=========================================================="
-echo "                KVM-enabled shell on $NODE                "
+echo "       KVM-enabled GPU shell on $NODE                     "
+echo "=========================================================="
+echo "  Container: $(basename $IMAGE)"
+echo "  /dev/kvm mounted for VM support"
 echo "=========================================================="
 
 # -t forces pseudo-terminal allocation so you get an interactive shell
diff --git a/cua/scripts/debug_interactive_2node.sh b/cua/scripts/debug_interactive_2node.sh
new file mode 100755
index 000000000..2af498c94
--- /dev/null
+++ b/cua/scripts/debug_interactive_2node.sh
@@ -0,0 +1,218 @@
+#!/bin/bash
+# ============================================================================
+# 2-Node Interactive GPU Debug Script
+# ============================================================================
+# Node 1 (Planner): Runs Qwen3-VL-235B vLLM server (tp=8, all 8 GPUs)
+# Node 2 (Actor):   Runs UI-TARS-1.5-7B vLLM server (tp=4) + KVM for VMs
+#
+# Both vLLM servers auto-start, then you get an interactive shell on the
+# Actor node for manual debugging / data collection.
+# ============================================================================
+
+IMAGE="/lustre/fsw/portfolios/nvr/users/bcui/images/cua-vllm-0.13.0.sqsh"
+
+PROJECT_ROOT="/lustre/fsw/portfolios/nvr/users/bcui/ProRL-Agent-Server"
+PROJECT_DIR="$PROJECT_ROOT/cua"
+
+PLANNER_MODEL="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/models/Qwen3-VL-235B-A22B-Thinking"
+# ACTOR_MODEL="ByteDance-Seed/UI-TARS-1.5-7B"
+ACTOR_MODEL="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/bcui/huggingface_models/UI-TARS-1.5-7B"
+
+PLANNER_PORT=8000
+ACTOR_PORT=8000
+
+# --- 1. Submit Holder Job (2 GPU nodes) ---
+echo "[Local] Submitting 2-node GPU job..."
+
+JOB_ID=$(sbatch --parsable \
+    --job-name=debug_2node \
+    --account=llmservice_fm_vision \
+    --partition=interactive \
+    --reservation=sla_res_osworld_agent_vlm \
+    --gpus-per-node=8 \
+    --nodes=2 \
+    --ntasks-per-node=1 \
+    --mem=0 \
+    --time=04:00:00 \
+    --exclusive \
+    --output=$PROJECT_DIR/scripts/logs/debug_2node_holder_%j.out \
+    --error=$PROJECT_DIR/scripts/logs/debug_2node_holder_%j.err \
+    --wrap="srun --container-image=$IMAGE --container-mounts=/lustre:/lustre --container-writable sleep infinity")
+
+if [ -z "$JOB_ID" ]; then
+    echo "Error: Job submission failed."
+    exit 1
+fi
+
+echo "[Local] Job submitted. ID: $JOB_ID"
+
+# --- 2. Cleanup Trap ---
+cleanup() {
+    echo ""
+    echo "[Local] Cleaning up... Cancelling Job $JOB_ID"
+    scancel "$JOB_ID"
+}
+trap cleanup EXIT
+
+# --- 3. Wait for Job to Start ---
+echo "[Local] Waiting for job to start..."
+NODES=""
+while [ -z "$NODES" ]; do
+    JOB_STATE=$(squeue -j "$JOB_ID" -h -o %T)
+    if [ "$JOB_STATE" == "RUNNING" ]; then
+        NODES=$(squeue -j "$JOB_ID" -h -o %N)
+    elif [ -z "$JOB_STATE" ]; then
+        echo "Error: Job disappeared from queue!"
+        exit 1
+    fi
+    sleep 2
+done
+
+# Expand nodelist to individual hostnames
+ALL_NODES=$(scontrol show hostnames "$NODES")
+PLANNER_NODE=$(echo "$ALL_NODES" | head -n 1)
+ACTOR_NODE=$(echo "$ALL_NODES" | tail -n 1)
+
+echo "[Local] Job is RUNNING"
+echo "[Local] Planner Node: $PLANNER_NODE"
+echo "[Local] Actor Node:   $ACTOR_NODE"
+
+# --- 4. Wait for Containers on Both Nodes ---
+wait_for_container() {
+    local node=$1
+    local name=$2
+    local pid=""
+
+    echo "[Local] Waiting for container on $name ($node)..." >&2
+    while [ -z "$pid" ]; do
+        sleep 2
+        pid=$(ssh -q -o StrictHostKeyChecking=no "$node" \
+            "enroot list -f | grep 'pyxis' | grep 'sleep' | awk '{print \$2}' | head -n 1")
+        if [ -z "$pid" ]; then
+            printf "." >&2
+        fi
+    done
+    echo "" >&2
+    echo "[Local] $name container ready (PID: $pid)" >&2
+    echo "$pid"
+}
+
+PLANNER_PID=$(wait_for_container "$PLANNER_NODE" "Planner")
+ACTOR_PID=$(wait_for_container "$ACTOR_NODE" "Actor")
+
+# --- 5. Launch Planner vLLM Server (background) ---
+echo "[Local] Starting Planner vLLM server on $PLANNER_NODE..."
+ssh -q -o StrictHostKeyChecking=no "$PLANNER_NODE" \
+    "enroot exec $PLANNER_PID bash -c '
+      mkdir -p $PROJECT_DIR/scripts/logs
+      nohup vllm serve $PLANNER_MODEL \
+        --api-key gen \
+        --tensor-parallel-size 8 \
+        --enable-expert-parallel \
+        --limit-mm-per-prompt.video 0 \
+        --limit-mm-per-prompt.image 3 \
+        --async-scheduling \
+        --max-model-len 65536 \
+        --gpu-memory-utilization 0.9 \
+        > $PROJECT_DIR/scripts/logs/planner_debug.log 2>&1 &
+      echo \"[Planner] vLLM server launched (PID: \$!)\"
+    '" &
+
+# --- 6. Launch Actor vLLM Server (background) ---
+echo "[Local] Starting Actor vLLM server on $ACTOR_NODE..."
+ssh -q -o StrictHostKeyChecking=no "$ACTOR_NODE" \
+    "enroot exec $ACTOR_PID bash -c '
+      mkdir -p $PROJECT_DIR/scripts/logs
+      nohup vllm serve $ACTOR_MODEL \
+        --served-model-name ByteDance-Seed/UI-TARS-1.5-7B \
+        --api-key gen \
+        --tensor-parallel-size 4 \
+        --limit-mm-per-prompt.image 5 \
+        --limit-mm-per-prompt.video 0 \
+        --max-model-len 65536 \
+        --disable-log-requests \
+        --disable-log-stats \
+        > $PROJECT_DIR/scripts/logs/actor_debug.log 2>&1 &
+      echo \"[Actor] vLLM server launched (PID: \$!)\"
+    '" &
+
+wait  # wait for both SSH commands to return
+
+# --- 7. Wait for Both Servers to Be Healthy ---
+echo "[Local] Waiting for vLLM servers to become healthy..."
+
+wait_for_server() {
+    local node=$1
+    local container_pid=$2
+    local port=$3
+    local name=$4
+    local max_wait=600
+    local elapsed=0
+
+    while [ $elapsed -lt $max_wait ]; do
+        if ssh -q -o StrictHostKeyChecking=no "$node" \
+            "enroot exec $container_pid curl -sf http://localhost:$port/health" > /dev/null 2>&1; then
+            echo "[Local] $name server healthy on $node:$port"
+            return 0
+        fi
+        sleep 10
+        elapsed=$((elapsed + 10))
+        if [ $((elapsed % 60)) -eq 0 ]; then
+            echo "[Local] Still waiting for $name (${elapsed}s)..."
+        fi
+    done
+
+    echo "[Local] ERROR: $name server did not start within ${max_wait}s"
+    return 1
+}
+
+wait_for_server "$PLANNER_NODE" "$PLANNER_PID" "$PLANNER_PORT" "Planner" &
+WAIT_PLANNER_PID=$!
+
+wait_for_server "$ACTOR_NODE" "$ACTOR_PID" "$ACTOR_PORT" "Actor" &
+WAIT_ACTOR_PID=$!
+
+wait $WAIT_PLANNER_PID
+PLANNER_OK=$?
+
+wait $WAIT_ACTOR_PID
+ACTOR_OK=$?
+
+if [ $PLANNER_OK -ne 0 ] || [ $ACTOR_OK -ne 0 ]; then
+    echo "[Local] ERROR: One or both servers failed to start."
+    echo "[Local] Check logs:"
+    echo "  Planner: $PROJECT_DIR/scripts/logs/planner_debug.log"
+    echo "  Actor:   $PROJECT_DIR/scripts/logs/actor_debug.log"
+    exit 1
+fi
+
+# --- 8. Launch Interactive Shell on Actor Node ---
+echo ""
+echo "=========================================================="
+echo "  2-Node Interactive Debug Session"
+echo "=========================================================="
+echo "  Planner: $PLANNER_NODE (Qwen3-VL-235B, port $PLANNER_PORT)"
+echo "  Actor:   $ACTOR_NODE (UI-TARS-1.5-7B, port $ACTOR_PORT)"
+echo "  KVM:     available via reservation"
+echo ""
+echo "  Planner API: http://$PLANNER_NODE:$PLANNER_PORT"
+echo "  Actor API:   http://localhost:$ACTOR_PORT"
+echo ""
+echo "  Logs:"
+echo "    tail -f $PROJECT_DIR/scripts/logs/planner_debug.log"
+echo "    tail -f $PROJECT_DIR/scripts/logs/actor_debug.log"
+echo "=========================================================="
+
+ssh -t -q -o StrictHostKeyChecking=no "$ACTOR_NODE" \
+    "enroot exec $ACTOR_PID bash -c '
+      cd $PROJECT_DIR
+      source cua_env_reqs/bin/activate
+      export PYTHONPATH=$PROJECT_ROOT:\$PYTHONPATH
+      export PLANNER_NODE=$PLANNER_NODE
+      export PLANNER_PORT=$PLANNER_PORT
+      export ACTOR_PORT=$ACTOR_PORT
+      exec /bin/bash -l
+    '"
+
+# --- 9. End ---
+echo "[Local] Session ended."
diff --git a/cua/scripts/run.sh b/cua/scripts/run.sh
new file mode 100755
index 000000000..98cd852a2
--- /dev/null
+++ b/cua/scripts/run.sh
@@ -0,0 +1,138 @@
+#!/bin/bash
+# ============================================================================
+# CUA Data Collection - Multi-Actor Launcher
+# ============================================================================
+# Orchestrates 1 planner + N actor nodes:
+#   1. Submits planner sbatch job (Qwen3-VL-235B vLLM)
+#   2. Waits for planner to write its hostname to a coordination file
+#   3. Launches NUM_ACTORS instances of run_actor_and_vm.sh in parallel
+#   4. Each actor submits its own holder job on a reserved node
+#   5. Waits for all actors to finish, then cancels planner
+#
+# Usage:
+#   bash run.sh                                         # 1 actor (default)
+#   NUM_ACTORS=3 MAX_PARALLEL=4 MAX_TRAJECTORIES=500 bash run.sh
+# ============================================================================
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+export LOG_DIR="${LOG_DIR:-$SCRIPT_DIR/logs_multi_thread}"
+
+# Configurable parameters
+NUM_ACTORS="${NUM_ACTORS:-1}"
+MAX_PARALLEL="${MAX_PARALLEL:-16}"
+MAX_TRAJECTORIES="${MAX_TRAJECTORIES:-10000}"
+
+# Create logs directory
+mkdir -p "$LOG_DIR"
+
+# Generate unique coordination file
+COORD_ID="$(date +%Y%m%d_%H%M%S)_$$"
+COORD_FILE="$LOG_DIR/.planner_host_${COORD_ID}"
+
+PLANNER_JOB_ID=""
+
+echo "============================================"
+echo "CUA Data Collection - Multi-Actor Launcher"
+echo "============================================"
+echo "NUM_ACTORS:        $NUM_ACTORS"
+echo "MAX_PARALLEL:      $MAX_PARALLEL (per actor)"
+echo "MAX_TRAJECTORIES:  $MAX_TRAJECTORIES (per actor)"
+echo "Coordination file: $COORD_FILE"
+echo ""
+
+# --- Cleanup: cancel planner on exit ---
+cleanup() {
+    echo ""
+    echo "[run.sh] Cleaning up..."
+    if [ -n "$PLANNER_JOB_ID" ]; then
+        echo "[run.sh] Cancelling planner job $PLANNER_JOB_ID"
+        scancel "$PLANNER_JOB_ID" 2>/dev/null
+    fi
+    rm -f "$COORD_FILE"
+}
+trap cleanup EXIT
+
+# --- 1. Submit Planner Job ---
+echo "[run.sh] Submitting planner job..."
+PLANNER_JOB_ID=$(sbatch \
+    --output="$LOG_DIR/planner-%j.out" \
+    --export=ALL,COORD_FILE="$COORD_FILE" \
+    --parsable \
+    "$SCRIPT_DIR/run_planner.sbatch")
+echo "[run.sh] Planner job submitted: $PLANNER_JOB_ID"
+
+# --- 2. Wait for planner hostname ---
+echo "[run.sh] Waiting for planner to start and write hostname..."
+PLANNER_NODE=""
+COORD_ELAPSED=0
+MAX_COORD_WAIT=900  # 15 minutes
+
+while [ $COORD_ELAPSED -lt $MAX_COORD_WAIT ]; do
+    if [ -f "$COORD_FILE" ]; then
+        PLANNER_NODE=$(cat "$COORD_FILE" | tr -d '[:space:]')
+        if [ -n "$PLANNER_NODE" ]; then
+            echo "[run.sh] Planner node discovered: $PLANNER_NODE"
+            break
+        fi
+    fi
+    sleep 10
+    COORD_ELAPSED=$((COORD_ELAPSED + 10))
+    if [ $((COORD_ELAPSED % 60)) -eq 0 ]; then
+        echo "[run.sh] Still waiting for planner (${COORD_ELAPSED}s)..."
+    fi
+done
+
+if [ -z "$PLANNER_NODE" ]; then
+    echo "[run.sh] ERROR: Planner did not start within ${MAX_COORD_WAIT}s."
+    exit 1
+fi
+
+# --- 3. Launch N Actor Instances ---
+echo "[run.sh] Launching $NUM_ACTORS actor(s)..."
+ACTOR_PIDS=()
+
+for i in $(seq 1 "$NUM_ACTORS"); do
+    echo "[run.sh] Starting actor $i..."
+    PLANNER_NODE="$PLANNER_NODE" \
+    MAX_PARALLEL="$MAX_PARALLEL" \
+    MAX_TRAJECTORIES="$MAX_TRAJECTORIES" \
+        bash "$SCRIPT_DIR/run_actor_and_vm.sh" "$i" \
+        > "$LOG_DIR/actor_launcher_${i}.log" 2>&1 &
+    ACTOR_PIDS+=($!)
+    echo "[run.sh] Actor $i launched (PID ${ACTOR_PIDS[-1]}, log: $LOG_DIR/actor_launcher_${i}.log)"
+done
+
+# --- 4. Wait for all actors ---
+echo ""
+echo "[run.sh] All actors launched. Waiting for completion..."
+echo ""
+echo "Monitor with:"
+echo "  squeue -u \$USER"
+echo "  tail -f $LOG_DIR/planner-*.out"
+for i in $(seq 1 "$NUM_ACTORS"); do
+    echo "  tail -f $LOG_DIR/actor_launcher_${i}.log"
+done
+echo ""
+
+FAILED=0
+for i in "${!ACTOR_PIDS[@]}"; do
+    ACTOR_NUM=$((i + 1))
+    wait "${ACTOR_PIDS[$i]}" 2>/dev/null
+    EXIT_CODE=$?
+    if [ $EXIT_CODE -eq 0 ]; then
+        echo "[run.sh] Actor $ACTOR_NUM finished successfully."
+    else
+        echo "[run.sh] Actor $ACTOR_NUM failed (exit code $EXIT_CODE)."
+        FAILED=$((FAILED + 1))
+    fi
+done
+
+echo ""
+echo "============================================"
+echo "[run.sh] All actors finished. $FAILED/$NUM_ACTORS failed."
+echo "[run.sh] Planner will be cancelled by cleanup trap."
+echo "============================================"
+
+if [ $FAILED -gt 0 ]; then
+    exit 1
+fi
diff --git a/cua/scripts/run_actor_and_vm.sh b/cua/scripts/run_actor_and_vm.sh
new file mode 100755
index 000000000..81b20a89a
--- /dev/null
+++ b/cua/scripts/run_actor_and_vm.sh
@@ -0,0 +1,191 @@
+#!/bin/bash
+# ============================================================================
+# Actor + VM Launcher (SSH+Enroot Pattern)
+# ============================================================================
+# Submits a holder "sleep infinity" job on a reserved GPU node, waits for
+# the container to be ready, then SSH+enroot execs into it to run:
+#   1. UI-TARS-1.5-7B vLLM server (background, TP=4)
+#   2. Data collection via parallel_collect_trajectories.py
+#
+# Required env vars:
+#   PLANNER_NODE  - hostname of the planner vLLM server
+#
+# Optional env vars:
+#   MAX_PARALLEL      - parallel VMs per actor (default: 1)
+#   MAX_TRAJECTORIES  - trajectories to collect (default: 10000)
+#
+# Optional arg:
+#   $1 = actor index (for log naming, default: 0)
+#
+# Usage:
+#   PLANNER_NODE=pool0-12345 bash run_actor_and_vm.sh 1
+# ============================================================================
+
+ACTOR_IDX="${1:-0}"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+LOG_DIR="${LOG_DIR:-$SCRIPT_DIR/logs_single}"
+
+# Project paths
+PROJECT_ROOT="/lustre/fsw/portfolios/nvr/users/bcui/ProRL-Agent-Server"
+PROJECT_DIR="$PROJECT_ROOT/cua"
+
+ACTOR_IMAGE="/lustre/fsw/portfolios/nvr/users/bcui/images/cua-vllm-0.13.0.sqsh"
+
+# Model
+ACTOR_MODEL="ByteDance-Seed/UI-TARS-1.5-7B"
+
+# Data collection params
+MAX_PARALLEL=${MAX_PARALLEL:-1}
+MAX_TRAJECTORIES=${MAX_TRAJECTORIES:-10000}
+
+PLANNER_PORT=8000
+ACTOR_PORT=8000
+
+# Validate
+if [ -z "$PLANNER_NODE" ]; then
+    echo "[Actor $ACTOR_IDX] ERROR: PLANNER_NODE not set."
+    exit 1
+fi
+
+mkdir -p "$LOG_DIR"
+
+echo "[Actor $ACTOR_IDX] PLANNER_NODE=$PLANNER_NODE"
+echo "[Actor $ACTOR_IDX] MAX_PARALLEL=$MAX_PARALLEL MAX_TRAJECTORIES=$MAX_TRAJECTORIES"
+
+# --- 1. Submit holder job on reserved node ---
+echo "[Actor $ACTOR_IDX] Submitting holder job..."
+ACTOR_JOB_ID=$(sbatch --parsable \
+    --job-name="cua_actor_${ACTOR_IDX}" \
+    --account=llmservice_fm_vision \
+    --partition=interactive \
+    --reservation=sla_res_osworld_agent_vlm \
+    --gpus-per-node=8 \
+    --mem=0 \
+    --time=04:00:00 \
+    --exclusive \
+    --output="$LOG_DIR/actor_holder_${ACTOR_IDX}-%j.out" \
+    --wrap="srun --container-image=$ACTOR_IMAGE --container-mounts=/lustre:/lustre sleep infinity")
+
+if [ -z "$ACTOR_JOB_ID" ]; then
+    echo "[Actor $ACTOR_IDX] ERROR: Job submission failed."
+    exit 1
+fi
+echo "[Actor $ACTOR_IDX] Holder job submitted: $ACTOR_JOB_ID"
+
+# --- 2. Cleanup trap ---
+cleanup() {
+    echo ""
+    echo "[Actor $ACTOR_IDX] Cleaning up... Cancelling holder job $ACTOR_JOB_ID"
+    scancel "$ACTOR_JOB_ID" 2>/dev/null
+}
+trap cleanup EXIT
+
+# --- 3. Wait for job to start ---
+echo "[Actor $ACTOR_IDX] Waiting for holder job to start..."
+ACTOR_NODE=""
+while [ -z "$ACTOR_NODE" ]; do
+    JOB_STATE=$(squeue -j "$ACTOR_JOB_ID" -h -o %T 2>/dev/null)
+
+    if [ "$JOB_STATE" == "RUNNING" ]; then
+        ACTOR_NODE=$(squeue -j "$ACTOR_JOB_ID" -h -o %N)
+    elif [ -z "$JOB_STATE" ]; then
+        echo "[Actor $ACTOR_IDX] ERROR: Job $ACTOR_JOB_ID disappeared from queue!"
+        exit 1
+    fi
+    sleep 2
+done
+echo "[Actor $ACTOR_IDX] Job RUNNING on node: $ACTOR_NODE"
+
+# --- 4. Wait for container readiness ---
+echo "[Actor $ACTOR_IDX] Polling for container readiness on $ACTOR_NODE..."
+CONTAINER_PID=""
+while [ -z "$CONTAINER_PID" ]; do
+    sleep 2
+    CONTAINER_PID=$(ssh -q -o StrictHostKeyChecking=no "$ACTOR_NODE" \
+        "enroot list -f | grep 'pyxis' | grep 'sleep' | awk '{print \$2}' | head -n 1" 2>/dev/null)
+
+    if [ -z "$CONTAINER_PID" ]; then
+        printf "."
+    fi
+done
+echo ""
+echo "[Actor $ACTOR_IDX] Container ready, PID: $CONTAINER_PID"
+
+# --- 5. Wait for planner to be ready ---
+echo "[Actor $ACTOR_IDX] Waiting for planner at $PLANNER_NODE:$PLANNER_PORT..."
+PLANNER_WAIT=0
+PLANNER_MAX_WAIT=900  # 15 minutes
+while ! nc -z "$PLANNER_NODE" "$PLANNER_PORT" 2>/dev/null; do
+    sleep 10
+    PLANNER_WAIT=$((PLANNER_WAIT + 10))
+    if [ $((PLANNER_WAIT % 60)) -eq 0 ]; then
+        echo "[Actor $ACTOR_IDX] Still waiting for planner (${PLANNER_WAIT}s)..."
+    fi
+    if [ $PLANNER_WAIT -ge $PLANNER_MAX_WAIT ]; then
+        echo "[Actor $ACTOR_IDX] ERROR: Planner not ready within ${PLANNER_MAX_WAIT}s."
+        exit 1
+    fi
+done
+echo "[Actor $ACTOR_IDX] Planner is accepting connections!"
+
+# --- 6. SSH+enroot exec: launch actor vLLM + data collection ---
+ACTOR_LOG_FILE="$LOG_DIR/actor_${ACTOR_IDX}-${ACTOR_JOB_ID}.out"
+echo "=========================================================="
+echo "[Actor $ACTOR_IDX] Executing on $ACTOR_NODE (job $ACTOR_JOB_ID)"
+echo "[Actor $ACTOR_IDX] Logging to: $ACTOR_LOG_FILE"
+echo "=========================================================="
+
+ssh -t -q -o StrictHostKeyChecking=no "$ACTOR_NODE" \
+    "enroot exec $CONTAINER_PID /bin/bash -c '
+        set -e
+
+        echo \"[Actor $ACTOR_IDX] Launching UI-TARS-1.5-7B vLLM...\"
+        vllm serve $ACTOR_MODEL \
+            --api-key gen \
+            --tensor-parallel-size 4 \
+            --limit-mm-per-prompt.image 5 \
+            --limit-mm-per-prompt.video 0 \
+            --max-model-len 65536 \
+            --disable-log-requests \
+            --disable-log-stats \
+            > $LOG_DIR/vllm_actor_${ACTOR_IDX}.log 2>&1 &
+        VLLM_PID=\$!
+
+        # Wait for local actor vLLM to be healthy
+        echo \"[Actor $ACTOR_IDX] Waiting for actor vLLM health...\"
+        ELAPSED=0
+        MAX_WAIT=600
+        while [ \$ELAPSED -lt \$MAX_WAIT ]; do
+            if curl -sf http://localhost:$ACTOR_PORT/health > /dev/null 2>&1; then
+                echo \"[Actor $ACTOR_IDX] Actor vLLM healthy!\"
+                break
+            fi
+            sleep 10
+            ELAPSED=\$((ELAPSED + 10))
+            if [ \$((ELAPSED % 60)) -eq 0 ]; then
+                echo \"[Actor $ACTOR_IDX] Still waiting for actor vLLM (\${ELAPSED}s)...\"
+            fi
+        done
+
+        if [ \$ELAPSED -ge \$MAX_WAIT ]; then
+            echo \"[Actor $ACTOR_IDX] ERROR: Actor vLLM did not start within \${MAX_WAIT}s\"
+            kill \$VLLM_PID 2>/dev/null
+            exit 1
+        fi
+
+        echo \"[Actor $ACTOR_IDX] Starting data collection...\"
+        cd $PROJECT_DIR
+        source cua_env_reqs/bin/activate
+        export PYTHONPATH=$PROJECT_ROOT:\$PYTHONPATH
+
+        python parallel_collect_trajectories.py \
+            --planner_node $PLANNER_NODE \
+            --actor_node localhost \
+            --max_parallel $MAX_PARALLEL \
+            --max_trajectories $MAX_TRAJECTORIES
+
+        COLLECT_EXIT=\$?
+        echo \"[Actor $ACTOR_IDX] Data collection finished with exit code \$COLLECT_EXIT\"
+        kill \$VLLM_PID 2>/dev/null
+        exit \$COLLECT_EXIT
+    '" 2>&1 | tee "$ACTOR_LOG_FILE"
diff --git a/cua/scripts/run_all.sbatch b/cua/scripts/run_all.sbatch
new file mode 100644
index 000000000..baacd93b4
--- /dev/null
+++ b/cua/scripts/run_all.sbatch
@@ -0,0 +1,181 @@
+#!/bin/bash
+#SBATCH --job-name=cua_all_single_thread
+#SBATCH --account=nvr_lacr_llm
+#SBATCH --partition=interactive
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=1
+#SBATCH --gpus-per-node=8
+#SBATCH --mem=0
+#SBATCH --time=04:00:00
+#SBATCH --exclusive
+#SBATCH --output=logs_single/run_all-%j.out
+
+# ============================================================================
+# Consolidated 2-Node CUA Data Collection Script
+# ============================================================================
+# Node 1 (Planner): Runs Qwen3-VL-235B vLLM server (tp=8, all 8 GPUs)
+# Node 2 (Actor):   Runs UI-TARS-1.5-7B vLLM server (tp=4) + data collection VMs
+#
+# This replaces the previous 3-node setup (2 GPU + 1 CPU) by co-locating
+# VMs on the Actor node which has spare GPU/CPU capacity.
+# ============================================================================
+
+# Combined container with vLLM/CUDA + QEMU/KVM dependencies
+# CONTAINER_IMAGE="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/images/cua_vllm.sqsh"
+# CONTAINER_IMAGE="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/images/vllm-0.13.0.sqsh"
+CONTAINER_IMAGE="/lustre/fsw/portfolios/nvr/users/bcui/images/cua-vllm-0.13.0.sqsh"
+
+# Project paths
+PROJECT_ROOT="/lustre/fsw/portfolios/nvr/users/bcui/ProRL-Agent-Server"
+PROJECT_DIR="$PROJECT_ROOT/cua"
+
+# Model paths
+PLANNER_MODEL="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/models/Qwen3-VL-235B-A22B-Thinking"
+# PLANNER_MODEL="/lustre/fsw/portfolios/nvr/users/bcui/models/qwen-3-vl-4b-thinking"
+ACTOR_MODEL="ByteDance-Seed/UI-TARS-1.5-7B"
+
+PLANNER_PORT=8000
+ACTOR_PORT=8000
+
+# Trying to run this w/ KVM?
+MAX_PARALLEL=${MAX_PARALLEL:-1}
+MAX_TRAJECTORIES=${MAX_TRAJECTORIES:-10000}
+
+# --- 1. Get Node List and Assign Roles ---
+ALL_NODES=$(scontrol show hostnames "$SLURM_JOB_NODELIST")
+PLANNER_NODE=$(echo "$ALL_NODES" | head -n 1)
+ACTOR_NODE=$(echo "$ALL_NODES" | head -n 2 | tail -n 1)
+
+echo "[run_all] Job ID:        $SLURM_JOB_ID"
+echo "[run_all] Planner Node:  $PLANNER_NODE"
+echo "[run_all] Actor Node:    $ACTOR_NODE"
+echo "[run_all] Max Parallel:  $MAX_PARALLEL"
+echo "[run_all] Max Trajs:     $MAX_TRAJECTORIES"
+
+# TODO: add back --enable-expert-parallel \
+# --enable-expert-parallel \
+# --- 2. Launch Planner vLLM Server (Node 1, all 8 GPUs) ---
+srun -w "$PLANNER_NODE" \
+     --nodes=1 \
+     --ntasks=1 \
+     --container-image="$CONTAINER_IMAGE" \
+     --container-mounts=/lustre:/lustre \
+     --output=logs_single/planner.out \
+     bash -c "
+      echo '[Planner] Launching Qwen3-VL-235B on $PLANNER_NODE'
+      vllm serve $PLANNER_MODEL \
+        --api-key gen \
+        --tensor-parallel-size 8 \
+        --enable-expert-parallel \
+        --limit-mm-per-prompt.video 0 \
+        --limit-mm-per-prompt.image 3 \
+        --async-scheduling \
+        --max-model-len 65536 \
+        --gpu-memory-utilization 0.9
+     " &
+PLANNER_SRUN_PID=$!
+
+# --- 3. Launch Actor vLLM + Data Collection (Node 2) ---
+# The Actor node runs both the vLLM server AND the data collection VMs.
+# /dev/kvm is mounted for QEMU/KVM hardware virtualization.
+srun -w "$ACTOR_NODE" \
+     --nodes=1 \
+     --ntasks=1 \
+     --container-image="$CONTAINER_IMAGE" \
+     --container-mounts=/lustre:/lustre,/dev/kvm:/dev/kvm \
+     --container-writable \
+     --output=logs_single/actor_and_collect.out \
+     bash -c "
+      set -e
+
+      # Suppress vLLM noise: only show warnings and above
+    #   export VLLM_LOGGING_LEVEL=WARNING
+
+      echo '[Actor] Launching UI-TARS-1.5-7B on $ACTOR_NODE'
+      # Redirect vLLM output to separate log to keep collection logs_single clean
+      vllm serve $ACTOR_MODEL \
+        --api-key gen \
+        --tensor-parallel-size 4 \
+        --limit-mm-per-prompt.image 5 \
+        --limit-mm-per-prompt.video 0 \
+        --max-model-len 65536 \
+        --disable-log-requests \
+        --disable-log-stats \
+        > $PROJECT_DIR/scripts/logs_single/vllm_actor.log 2>&1 &
+      VLLM_PID=\$!
+
+      # --- Health Check: Wait for both vLLM servers ---
+      echo '[Actor] Waiting for vLLM servers to become healthy...'
+
+      wait_for_server() {
+          local host=\$1
+          local port=\$2
+          local name=\$3
+          local max_wait=600  # 10 minutes
+          local elapsed=0
+
+          while [ \$elapsed -lt \$max_wait ]; do
+              if curl -sf http://\${host}:\${port}/health > /dev/null 2>&1; then
+                  echo \"[Actor] \$name server healthy at \${host}:\${port}\"
+                  return 0
+              fi
+              sleep 10
+              elapsed=\$((elapsed + 10))
+              if [ \$((elapsed % 60)) -eq 0 ]; then
+                  echo \"[Actor] Still waiting for \$name (\${elapsed}s)...\"
+              fi
+          done
+
+          echo \"[Actor] ERROR: \$name server did not start within \${max_wait}s\"
+          return 1
+      }
+
+      # Wait for local Actor server
+      wait_for_server localhost $ACTOR_PORT Actor
+      ACTOR_OK=\$?
+
+      # Wait for remote Planner server
+      wait_for_server $PLANNER_NODE $PLANNER_PORT Planner
+      PLANNER_OK=\$?
+
+      if [ \$ACTOR_OK -ne 0 ] || [ \$PLANNER_OK -ne 0 ]; then
+          echo '[Actor] ERROR: Server health check failed. Aborting.'
+          kill \$VLLM_PID 2>/dev/null
+          exit 1
+      fi
+
+      echo '[Actor] Both servers healthy. Starting data collection...'
+
+      # --- Launch Data Collection ---
+      cd $PROJECT_DIR
+      source cua_env_reqs/bin/activate
+      export PYTHONPATH=$PROJECT_ROOT:\$PYTHONPATH
+
+      python parallel_collect_trajectories.py \
+          --planner_node $PLANNER_NODE \
+          --actor_node localhost \
+          --max_parallel $MAX_PARALLEL \
+          --max_trajectories $MAX_TRAJECTORIES
+
+      COLLECT_EXIT=\$?
+      echo \"[Actor] Data collection finished with exit code \$COLLECT_EXIT\"
+
+      # Cleanup
+      kill \$VLLM_PID 2>/dev/null
+      exit \$COLLECT_EXIT
+     " &
+ACTOR_SRUN_PID=$!
+
+# --- 4. Wait for completion ---
+# The Actor srun will finish when data collection completes (or fails).
+# The Planner srun runs indefinitely until we kill it.
+echo "[run_all] Waiting for Actor node (data collection) to finish..."
+wait $ACTOR_SRUN_PID
+ACTOR_EXIT=$?
+
+echo "[run_all] Actor node exited with code $ACTOR_EXIT. Stopping Planner..."
+kill $PLANNER_SRUN_PID 2>/dev/null
+wait $PLANNER_SRUN_PID 2>/dev/null
+
+echo "[run_all] All done. Exit code: $ACTOR_EXIT"
+exit $ACTOR_EXIT
diff --git a/cua/scripts/run_planner.sbatch b/cua/scripts/run_planner.sbatch
new file mode 100644
index 000000000..cc1202d12
--- /dev/null
+++ b/cua/scripts/run_planner.sbatch
@@ -0,0 +1,59 @@
+#!/bin/bash
+#SBATCH --job-name=cua_planner
+#SBATCH --account=nvr_lacr_llm
+#SBATCH --partition=interactive
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --gpus-per-node=8
+#SBATCH --mem=0
+#SBATCH --time=04:00:00
+#SBATCH --exclusive
+#SBATCH --output=logs_single/planner-%j.out
+
+# ============================================================================
+# Planner Node: Runs Qwen3-VL-235B vLLM server (tp=8, all 8 GPUs)
+# ============================================================================
+# Requires COORD_FILE env var (set by run.sh wrapper)
+# Writes hostname to COORD_FILE so the actor job can discover this node.
+# ============================================================================
+
+if [ -z "$COORD_FILE" ]; then
+    echo "[Planner] ERROR: COORD_FILE not set. Use run.sh or pass via --export."
+    exit 1
+fi
+
+CONTAINER_IMAGE="/lustre/fsw/portfolios/nvr/users/bcui/images/cua-vllm-0.13.0.sqsh"
+PLANNER_MODEL="/lustre/fs1/portfolios/nvr/projects/nvr_lacr_llm/users/jaehunj/models/Qwen3-VL-235B-A22B-Thinking"
+
+# Write hostname for actor discovery
+PLANNER_HOST=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -1)
+echo "$PLANNER_HOST" > "$COORD_FILE"
+echo "[Planner] Wrote hostname '$PLANNER_HOST' to $COORD_FILE"
+
+# Cleanup coordination file on exit
+cleanup() {
+    echo "[Planner] Cleaning up coordination file..."
+    rm -f "$COORD_FILE"
+}
+trap cleanup EXIT
+
+echo "[Planner] Job ID:   $SLURM_JOB_ID"
+echo "[Planner] Node:     $PLANNER_HOST"
+
+# Launch Planner vLLM server (runs in foreground until cancelled or time limit)
+srun --nodes=1 \
+     --ntasks=1 \
+     --container-image="$CONTAINER_IMAGE" \
+     --container-mounts=/lustre:/lustre \
+     bash -c "
+      echo '[Planner] Launching Qwen3-VL-235B on $(hostname)'
+      vllm serve $PLANNER_MODEL \
+        --api-key gen \
+        --tensor-parallel-size 8 \
+        --enable-expert-parallel \
+        --limit-mm-per-prompt.video 0 \
+        --limit-mm-per-prompt.image 3 \
+        --async-scheduling \
+        --max-model-len 65536 \
+        --gpu-memory-utilization 0.9
+     "
diff --git a/examples/__init__.py b/examples/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/osworld_nvcf_example.py b/examples/osworld_nvcf_example.py
new file mode 100644
index 000000000..21f0e33b4
--- /dev/null
+++ b/examples/osworld_nvcf_example.py
@@ -0,0 +1,533 @@
+#!/usr/bin/env python3
+"""
+Example script demonstrating OSWorld NVCF runtime usage with OSWorldInteractiveAction.
+
+This script mirrors examples/osworld_example.py (Singularity) so that the NVCF runtime
+achieves exactly similar results: same sections (1–14), same actions and checks.
+The runtime deploys an OSWorld function at connect() if NVCF_FUNCTION_ID is not set,
+then runs the same flow as the Singularity example.
+
+Prerequisites:
+- NGC_API_KEY and NGC_ORG in environment (e.g. ~/.bashrc)
+- Optional: NVCF_FUNCTION_ID set to use an existing deployed function (no deploy on connect)
+- Optional: pip install ngcsdk (required for deploy-on-connect)
+- Container image is hardcoded in openhands.nvidia.os_world.nvcf.config (DEFAULT_CONTAINER_IMAGE)
+
+Run from project root:
+  PYTHONPATH=. python examples/osworld_nvcf_example.py
+"""
+
+import asyncio
+import sys
+import os
+
+# Ensure openhands is importable
+if __name__ == "__main__":
+    _root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    if _root not in sys.path:
+        sys.path.insert(0, _root)
+
+from openhands.core.config import OpenHandsConfig
+from openhands.core.logger import openhands_logger as logger
+from openhands.events import EventStream
+from openhands.events.action.os import OSWorldInteractiveAction
+from openhands.events.observation import ErrorObservation
+from openhands.runtime.impl.nvcf import OSWorldNVCFRuntime
+from openhands.storage import get_file_store
+
+# Use /tmp paths with nvcf prefix so we don't overwrite Singularity example outputs
+PREFIX = "osworld_nvcf"
+
+
+async def main():
+    """Main example function: same flow as osworld_example.py."""
+    print("=" * 80)
+    print("OSWorld NVCF Runtime Example")
+    print("=" * 80)
+    print()
+
+    # 1. Create configuration
+    print("1. Creating configuration...")
+    config = OpenHandsConfig()
+    config.runtime = 'osworld_nvcf'
+    config.sandbox.base_container_image = 'ubuntu:24.04'
+    if os.environ.get("NVCF_FUNCTION_ID"):
+        print(f"✓ Using existing NVCF function (NVCF_FUNCTION_ID set)")
+    else:
+        print(f"✓ Will deploy OSWorld function on connect (NGC_ORG required)")
+    print(f"  Runtime: {config.runtime}")
+    print()
+
+    # 2. Create event stream
+    print("2. Creating event stream...")
+    file_store = get_file_store('local', f'/tmp/{PREFIX}_example')
+    event_stream = EventStream(sid=f'{PREFIX}-example', file_store=file_store)
+    print("✓ Event stream created")
+    print()
+
+    # 3. Create OSWorld NVCF runtime (deploy on connect if no NVCF_FUNCTION_ID)
+    print("3. Creating OSWorld NVCF runtime...")
+    runtime = OSWorldNVCFRuntime(
+        config=config,
+        event_stream=event_stream,
+        sid=f'{PREFIX}-example',
+        os_type='linux',
+        nvcf_api_key=os.environ.get("NGC_API_KEY"),
+        nvcf_org=os.environ.get("NGC_ORG"),
+        undeploy_on_close=True,
+        #nvcf_function_id="",
+        #nvcf_version_id=""
+    )
+    print("✓ Runtime created")
+    print(f"  OS Type: {runtime.os_type}")
+    print()
+
+    try:
+        # 4. Connect to runtime (deploys if needed, then verifies)
+        print("4. Connecting to runtime (this may take several minutes if deploying)...")
+        print("   - Deploying NVCF function (if not using existing)")
+        print("   - Waiting for function to become ACTIVE")
+        print("   - Verifying OSWorld server is reachable")
+        await runtime.connect()
+        print("✓ Runtime connected and VM is ready!")
+        print(f"  VM Server URL: {runtime.osworld_vm_url}")
+        print(f"  VNC URL: {runtime.vnc_url}")
+        print()
+
+        # 5. Check if VM is alive
+        print("5. Checking VM health...")
+        runtime.check_if_alive()
+        print("✓ VM is alive and responding")
+        print()
+
+        await asyncio.sleep(10)
+
+        # 6. Get VM screenshot using OSWorldInteractiveAction
+        print("6. Taking VM screenshot...")
+        action = OSWorldInteractiveAction(
+            method='get_screenshot',
+            params={},
+            thought='Taking initial screenshot of the VM desktop'
+        )
+        observation = runtime.run_action(action)
+        print(f"   Observation: {observation.content[:100]}..." if observation.content else "   Observation: —")
+
+        screenshot = runtime.get_vm_screenshot()
+        if screenshot:
+            screenshot_path = f'/tmp/{PREFIX}_screenshot.png'
+            with open(screenshot_path, 'wb') as f:
+                f.write(screenshot)
+            print(f"✓ Screenshot saved to {screenshot_path}")
+            print(f"  Size: {len(screenshot)} bytes")
+        else:
+            print("✗ Failed to get screenshot")
+        print()
+
+        # 7. Execute some actions using OSWorldInteractiveAction
+        print("7. Executing VM actions...")
+        print("   a. Clicking at position (10, 10)...")
+        action = OSWorldInteractiveAction(
+            method='execute_action',
+            params={
+                'action': {
+                    'action_type': 'CLICK',
+                    'parameters': {'x': 10, 'y': 10, 'button': 'left'}
+                }
+            },
+            thought='Clicking at center-ish position on the screen'
+        )
+        observation = runtime.run_action(action)
+        print(f"      Result: {observation.content}")
+        print(f"      Exit code: {observation.exit_code}")
+        await asyncio.sleep(1)
+
+        print("   b. Typing 'Hello OSWorld'...")
+        action = OSWorldInteractiveAction(
+            method='execute_action',
+            params={
+                'action': {
+                    'action_type': 'TYPING',
+                    'parameters': {'text': 'Hello OSWorld'}
+                }
+            },
+            thought='Typing a greeting message'
+        )
+        observation = runtime.run_action(action)
+        print(f"      Result: {observation.content}")
+        print(f"      Exit code: {observation.exit_code}")
+        await asyncio.sleep(1)
+
+        print("   c. Pressing Enter key...")
+        action = OSWorldInteractiveAction(
+            method='execute_action',
+            params={
+                'action': {
+                    'action_type': 'PRESS',
+                    'parameters': {'key': 'enter'}
+                }
+            },
+            thought='Pressing Enter to confirm'
+        )
+        observation = runtime.run_action(action)
+        print(f"      Result: {observation.content}")
+        print(f"      Exit code: {observation.exit_code}")
+        print("✓ Actions executed successfully")
+        print()
+
+        # 8. Get VM information
+        print("8. Getting VM information...")
+        print("   a. Getting VM platform...")
+        action = OSWorldInteractiveAction(
+            method='get_vm_platform',
+            params={},
+            thought='Getting the operating system platform'
+        )
+        observation = runtime.run_action(action)
+        print(f"      Platform: {observation.content}")
+        print("   b. Getting screen size...")
+        action = OSWorldInteractiveAction(
+            method='get_vm_screen_size',
+            params={},
+            thought='Getting the screen dimensions'
+        )
+        observation = runtime.run_action(action)
+        print(f"      Screen size: {observation.content}")
+        print("✓ VM information retrieved")
+        print()
+
+        # 9. Test advanced OSWorld methods
+        print("9. Testing advanced OSWorld methods...")
+        print("   a. Getting accessibility tree...")
+        action = OSWorldInteractiveAction(
+            method='get_accessibility_tree',
+            params={},
+            thought='Getting UI accessibility tree for element inspection'
+        )
+        observation = runtime.run_action(action)
+        axltree = observation.content[0] if isinstance(observation.content, (list, tuple)) and observation.content else observation.content
+        if axltree and len(axltree) > 0:
+            print(f"      Accessibility tree retrieved ({len(axltree)} chars)")
+            print(f"      Preview: {axltree[:200]}...")
+            with open(f'/tmp/{PREFIX}_accessibility_tree.xml', 'w') as f:
+                f.write(axltree)
+            print(f"      Accessibility tree saved to /tmp/{PREFIX}_accessibility_tree.xml")
+        else:
+            print("      Note: Accessibility tree not available or empty")
+        await asyncio.sleep(1)
+
+        print("   b. Getting terminal output...")
+        action = OSWorldInteractiveAction(
+            method='get_terminal_output',
+            params={},
+            thought='Getting terminal output from the VM'
+        )
+        observation = runtime.run_action(action)
+        if observation.content and len(observation.content) > 0:
+            print(f"      Terminal output retrieved ({len(observation.content)} chars)")
+            print(f"      Preview: {observation.content[:200]}...")
+        else:
+            print("      Note: No terminal output available")
+        await asyncio.sleep(1)
+
+        print("   c. Executing Python command...")
+        action = OSWorldInteractiveAction(
+            method='execute_python_command',
+            params={
+                'command': "print('Hello from Python!'); import sys; print(f'Python version: {sys.version}')"
+            },
+            thought='Running a simple Python command in the VM'
+        )
+        observation = runtime.run_action(action)
+        print(f"      Python output: {observation.content}")
+        print(f"      Exit code: {observation.exit_code}")
+        await asyncio.sleep(1)
+
+        print("   d. Running Python script...")
+        python_script = """
+import os
+import platform
+
+print(f"Hostname: {platform.node()}")
+print(f"Python: {platform.python_version()}")
+print(f"OS: {platform.system()} {platform.release()}")
+print(f"Current directory: {os.getcwd()}")
+print(f"Home directory: {os.path.expanduser('~')}")
+"""
+        action = OSWorldInteractiveAction(
+            method='run_python_script',
+            params={'script': python_script},
+            thought='Running a multi-line Python script to get system info'
+        )
+        observation = runtime.run_action(action)
+        print(f"      Script output:")
+        for line in (observation.content or "").split('\n')[:10]:
+            if line.strip():
+                print(f"        {line}")
+        print(f"      Exit code: {observation.exit_code}")
+        await asyncio.sleep(1)
+
+        print("   e. Running bash script...")
+        bash_script = """echo "Hello from Bash!"
+echo "Current user: $(whoami)"
+echo "Current directory: $(pwd)"
+echo "Date: $(date)"
+"""
+        action = OSWorldInteractiveAction(
+            method='run_bash_script',
+            params={'script': bash_script, 'timeout': 30},
+            thought='Running a simple bash script'
+        )
+        observation = runtime.run_action(action)
+        if isinstance(observation, ErrorObservation):
+            print(f"      ⚠ Bash script error: {observation.content}")
+        else:
+            print(f"      Bash output:")
+            for line in (observation.content or "").split('\n'):
+                if line.strip():
+                    print(f"        {line}")
+            if hasattr(observation, 'exit_code'):
+                print(f"      Exit code: {observation.exit_code}")
+        print("✓ Advanced methods tested")
+        print()
+
+        # 10. Test file download with get_file
+        print("10. Testing file download (get_file)...")
+        print("   a. Creating test file in VM...")
+        test_content = "Hello from OSWorld VM!\nThis is a test file.\nCreated at: $(date)"
+        action = OSWorldInteractiveAction(
+            method='run_bash_script',
+            params={
+                'script': f'echo "{test_content}" > /tmp/test_file.txt && cat /tmp/test_file.txt',
+                'timeout': 10
+            },
+            thought='Creating a test file for download'
+        )
+        observation = runtime.run_action(action)
+        if isinstance(observation, ErrorObservation):
+            print(f"      ⚠ Could not create test file: {observation.content}")
+        else:
+            print(f"      Test file created")
+        await asyncio.sleep(1)
+        print("   b. Downloading test file using get_file...")
+        action = OSWorldInteractiveAction(
+            method='get_file',
+            params={'file_path': '/tmp/test_file.txt'},
+            thought='Downloading test file from VM'
+        )
+        observation = runtime.run_action(action)
+        if isinstance(observation, ErrorObservation):
+            print(f"      ⚠ Failed to download: {observation.content}")
+        else:
+            try:
+                import base64
+                if observation.content.startswith('base64:'):
+                    content_b64 = observation.content[7:]
+                    file_data = base64.b64decode(content_b64)
+                    download_path = f'/tmp/{PREFIX}_downloaded_file.txt'
+                    with open(download_path, 'wb') as f:
+                        f.write(file_data)
+                    print(f"      ✓ File downloaded to {download_path} ({len(file_data)} bytes)")
+                    print(f"      Content preview: {file_data.decode('utf-8')[:100]}")
+                else:
+                    print(f"      Unexpected format: {observation.content[:100]}")
+            except Exception as e:
+                print(f"      Could not save file: {e}")
+        print("✓ File download tested")
+        print()
+
+        # 11. Test VM information methods
+        print("11. Testing VM information methods...")
+        print("   a. Getting VM window size...")
+        action = OSWorldInteractiveAction(
+            method='get_vm_window_size',
+            params={'app_class_name': 'gnome-terminal-server'},
+            thought='Getting window size for a specific application'
+        )
+        observation = runtime.run_action(action)
+        if isinstance(observation, ErrorObservation):
+            print(f"      Note: {observation.content}")
+        else:
+            print(f"      Window size: {observation.content}")
+        await asyncio.sleep(1)
+        print("   b. Getting VM wallpaper...")
+        action = OSWorldInteractiveAction(
+            method='get_vm_wallpaper',
+            params={},
+            thought='Getting the desktop wallpaper image'
+        )
+        observation = runtime.run_action(action)
+        if isinstance(observation, ErrorObservation):
+            print(f"      Note: {observation.content}")
+        else:
+            try:
+                import base64
+                if observation.content.startswith('base64:'):
+                    content_b64 = observation.content[7:]
+                    wallpaper_data = base64.b64decode(content_b64)
+                    wallpaper_path = f'/tmp/{PREFIX}_wallpaper.png'
+                    with open(wallpaper_path, 'wb') as f:
+                        f.write(wallpaper_data)
+                    print(f"      ✓ Wallpaper saved to {wallpaper_path} ({len(wallpaper_data)} bytes)")
+                else:
+                    print(f"      Unexpected format: {observation.content[:100]}")
+            except Exception as e:
+                print(f"      Could not save wallpaper: {e}")
+        await asyncio.sleep(1)
+        print("   c. Getting VM desktop path...")
+        action = OSWorldInteractiveAction(
+            method='get_vm_desktop_path',
+            params={},
+            thought='Getting the desktop directory path'
+        )
+        observation = runtime.run_action(action)
+        desktop_path = observation.content
+        print(f"      Desktop path: {desktop_path}")
+        await asyncio.sleep(1)
+        print("   d. Getting VM directory tree...")
+        action = OSWorldInteractiveAction(
+            method='get_vm_directory_tree',
+            params={'path': desktop_path if desktop_path else '/home'},
+            thought='Listing directory contents'
+        )
+        observation = runtime.run_action(action)
+        if isinstance(observation, ErrorObservation):
+            print(f"      Error: {observation.content}")
+        else:
+            print(f"      Directory tree:")
+            for i, line in enumerate((observation.content or "").split('\n')[:10]):
+                if line.strip():
+                    print(f"        {line}")
+            lines = (observation.content or "").split('\n')
+            if len(lines) > 10:
+                print(f"        ... ({len(lines) - 10} more lines)")
+        print("✓ VM information methods tested")
+        print()
+
+        # 12. Test screen recording
+        print("12. Testing screen recording...")
+        print("   a. Starting screen recording...")
+        action = OSWorldInteractiveAction(
+            method='start_recording',
+            params={},
+            thought='Starting to record the VM screen'
+        )
+        observation = runtime.run_action(action)
+        if isinstance(observation, ErrorObservation):
+            print(f"      ⚠ Recording not available: {observation.content}")
+        else:
+            print(f"      Recording started: {observation.content}")
+            print("   b. Recording for 3 seconds...")
+            await asyncio.sleep(3)
+            print("   c. Performing actions while recording...")
+            action = OSWorldInteractiveAction(
+                method='execute_action',
+                params={
+                    'action': {
+                        'action_type': 'MOVE_TO',
+                        'parameters': {'x': 100, 'y': 100}
+                    }
+                },
+                thought='Moving mouse to top-left during recording'
+            )
+            runtime.run_action(action)
+            await asyncio.sleep(1)
+            print("   d. Moving to center...")
+            action = OSWorldInteractiveAction(
+                method='execute_action',
+                params={
+                    'action': {
+                        'action_type': 'MOVE_TO',
+                        'parameters': {'x': 512, 'y': 384}
+                    }
+                },
+                thought='Moving mouse to center during recording'
+            )
+            runtime.run_action(action)
+            await asyncio.sleep(1)
+            print("   e. Clicking at center...")
+            action = OSWorldInteractiveAction(
+                method='execute_action',
+                params={
+                    'action': {
+                        'action_type': 'CLICK',
+                        'parameters': {'x': 512, 'y': 384}
+                    }
+                },
+                thought='Clicking at center during recording'
+            )
+            runtime.run_action(action)
+            await asyncio.sleep(1)
+            print("   f. Stopping recording and downloading...")
+            action = OSWorldInteractiveAction(
+                method='end_recording',
+                params={},
+                thought='Stopping the screen recording'
+            )
+            observation = runtime.run_action(action)
+            if isinstance(observation, ErrorObservation):
+                print(f"      ⚠ Failed to stop recording: {observation.content}")
+            else:
+                try:
+                    import base64
+                    if observation.content.startswith('base64:'):
+                        content_b64 = observation.content[7:]
+                        video_data = base64.b64decode(content_b64)
+                        video_path = f'/tmp/{PREFIX}_recording.mp4'
+                        with open(video_path, 'wb') as f:
+                            f.write(video_data)
+                        print(f"      ✓ Recording saved to {video_path} ({len(video_data)} bytes)")
+                    else:
+                        print(f"      Unexpected format: {observation.content[:100]}")
+                except Exception as e:
+                    print(f"      Could not save recording: {e}")
+        print("✓ Screen recording tested")
+        print()
+
+        # 13. Final screenshot
+        print("13. Taking final screenshot...")
+        action = OSWorldInteractiveAction(
+            method='get_screenshot',
+            params={},
+            thought='Taking final screenshot after interactions'
+        )
+        runtime.run_action(action)
+        screenshot = runtime.get_vm_screenshot()
+        if screenshot:
+            screenshot_path = f'/tmp/{PREFIX}_screenshot_after.png'
+            with open(screenshot_path, 'wb') as f:
+                f.write(screenshot)
+            print(f"✓ Final screenshot saved to {screenshot_path}")
+        await asyncio.sleep(2)
+        print()
+        print("=" * 80)
+        print("Example completed successfully!")
+        print("=" * 80)
+        print()
+        print("You can:")
+        print(f"1. View screenshots: open /tmp/{PREFIX}_screenshot*.png")
+        print(f"2. View wallpaper: open /tmp/{PREFIX}_wallpaper.png")
+        print(f"3. View recording: vlc /tmp/{PREFIX}_recording.mp4")
+        print(f"4. View downloaded file: cat /tmp/{PREFIX}_downloaded_file.txt")
+        print()
+        print("VM Service URLs (NVCF):")
+        print(f"  • OSWorld API: {runtime.osworld_vm_url}")
+        print(f"  • VNC: {runtime.vnc_url}")
+        print(f"  • Chrome DevTools: {runtime.chromium_devtools_url}")
+        print(f"  • VLC Web Interface: {runtime.vlc_url}")
+        print()
+        return 0
+
+    except Exception as e:
+        print(f"✗ Error: {e}")
+        logger.exception("Failed to run NVCF example")
+        return 1
+    finally:
+        print("14. Cleaning up...")
+        runtime.close()
+        print("✓ Runtime closed")
+        print()
+
+
+if __name__ == '__main__':
+    exit_code = asyncio.run(main())
+    sys.exit(exit_code)
diff --git a/examples/setup.py b/examples/setup.py
index a93297530..1a23c0f20 100644
--- a/examples/setup.py
+++ b/examples/setup.py
@@ -35,7 +35,7 @@
 
 FILE_PATH = os.path.dirname(os.path.abspath(__file__))
 
-MAX_RETRIES = 20
+MAX_RETRIES = 5
 
 from openhands.nvidia.os_world import metrics, getters
 
@@ -102,7 +102,7 @@ def normalize_url(url):
     return norm_url1 == norm_url2
 
 class SetupController:
-    def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 9222, vlc_port: int = 8080, cache_dir: str = "cache", client_password: str = "", screen_width: int = 1920, screen_height: int = 1080, runtime=None):
+    def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 9222, vlc_port: int = 8080, cache_dir: str = "cache", client_password: str = "", screen_width: int = 1920, screen_height: int = 1080, runtime=None, http_client=None):
         self.vm_ip: str = vm_ip
         self.server_port: int = server_port
         self.chromium_port: int = chromium_port
@@ -114,8 +114,72 @@ def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 922
         self.screen_width: int = screen_width
         self.screen_height: int = screen_height
         self.runtime = runtime  # Runtime object for interacting with the environment
+        self.http_client = http_client  # Optional HTTP client (e.g. NVCFHttpClient) for routing VM requests
         self.additional_wait_time = 3
 
+    def _vm_post(self, endpoint: str, **kwargs) -> requests.Response:
+        """POST to the VM server, routing through http_client if available."""
+        if self.http_client:
+            return self.http_client.post(endpoint, **kwargs)
+        url = self.http_server + endpoint
+        return requests.post(url, **kwargs)
+
+    def _vm_get(self, endpoint: str, **kwargs) -> requests.Response:
+        """GET from the VM server, routing through http_client if available."""
+        if self.http_client:
+            return self.http_client.get(endpoint, **kwargs)
+        url = self.http_server + endpoint
+        return requests.get(url, **kwargs)
+
+    def _get_cdp_url(self) -> str:
+        """Get the Chrome DevTools Protocol URL.
+
+        Always uses the local address since:
+        - Singularity: vm_ip=127.0.0.1, chromium_port=actual Chrome port
+        - NVCF: vm_ip=127.0.0.1, chromium_port=local proxy port (proxy adds auth headers)
+        Playwright's connect_over_cdp() can't send custom auth headers on WebSocket,
+        so we must go through the local proxy for NVCF rather than direct to the NVCF WSS URL.
+        """
+        return f"http://{self.vm_ip}:{self.chromium_port}"
+
+    def _get_cdp_headers(self) -> Optional[Dict[str, str]]:
+        """Get CDP headers (for NVCF auth), or None for direct connections."""
+        if self.http_client and hasattr(self.http_client, 'get_cdp_headers'):
+            return self.http_client.get_cdp_headers()
+        return None
+
+    def _restart_vm_services(self, services: str = "chrome"):
+        """Restart crashed services inside the VM via bash script.
+
+        Args:
+            services: Which services to restart. "chrome" restarts Chrome + socat.
+        """
+        if services == "chrome":
+            script = """
+# Restart Chrome and socat (CDP proxy)
+pkill -9 -f socat || true
+pkill -9 -f 'chrome' || true
+sleep 2
+
+# Re-launch socat to proxy Chrome DevTools (port 9222 -> NVCF /chrome path)
+nohup socat TCP-LISTEN:9222,fork,reuseaddr TCP:127.0.0.1:9223 &>/dev/null &
+
+# Re-launch Chrome
+nohup google-chrome-wrapper --remote-debugging-port=9223 --remote-debugging-address=127.0.0.1 --remote-allow-origins=* --no-first-run --no-default-browser-check --disable-infobars --disable-session-crashed-bubble --disable-features=TranslateUI --start-maximized &>/dev/null &
+sleep 3
+"""
+        else:
+            return
+
+        try:
+            r = self._vm_post("/run_bash_script", json={"script": script, "timeout": 30}, timeout=60)
+            if r.status_code == 200:
+                logger.info(f"VM service restart ({services}) completed successfully")
+            else:
+                logger.warning(f"VM service restart ({services}) returned HTTP {r.status_code}")
+        except Exception as e:
+            logger.warning(f"VM service restart ({services}) failed: {e}")
+
     def reset_cache_dir(self, cache_dir: str):
         self.cache_dir = cache_dir
 
@@ -323,8 +387,8 @@ def _upload_file_setup(self, files: List[Dict[str, str]]):
                         logger.debug(form.content_type)
 
                         # Explicit connect/read timeout to avoid hanging forever
-                        response = requests.post(
-                            self.http_server + "/setup" + "/upload",
+                        response = self._vm_post(
+                            "/setup/upload",
                             headers=headers,
                             data=form,
                             timeout=(10, 600)
@@ -366,7 +430,7 @@ def _change_wallpaper_setup(self, path: str):
         # send request to server to change wallpaper
         # Note: This uses a custom /setup endpoint, not a standard OSWorld method
         try:
-            response = requests.post(self.http_server + "/setup" + "/change_wallpaper", headers=headers, data=payload)
+            response = self._vm_post("/setup/change_wallpaper", headers=headers, data=payload)
             if response.status_code == 200:
                 logger.info("Command executed successfully: %s", response.text)
             else:
@@ -391,16 +455,28 @@ def _open_setup(self, path: str):
 
         # send request to server to open file
         # Note: This uses a custom /setup endpoint, not a standard OSWorld method
-        try:
-            # The server-side call is now blocking and can take time.
-            # We set a timeout that is slightly longer than the server's timeout (1800s).
-            response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload, timeout=1810)
-            response.raise_for_status()  # This will raise an exception for 4xx and 5xx status codes
-            logger.info("Command executed successfully: %s", response.text)
-            time.sleep(self.additional_wait_time)
-        except requests.exceptions.RequestException as e:
-            logger.error(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}")
-            raise Exception(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}") from e
+        max_retries = 5
+        for attempt in range(max_retries):
+            try:
+                # The server-side call is now blocking and can take time.
+                # We set a timeout that is slightly longer than the server's timeout (1800s).
+                response = self._vm_post("/setup/open_file", headers=headers, data=payload, timeout=1810)
+                response.raise_for_status()  # This will raise an exception for 4xx and 5xx status codes
+                logger.info("Command executed successfully: %s", response.text)
+                time.sleep(self.additional_wait_time)
+                return  # Success
+            except requests.exceptions.RequestException as e:
+                status = getattr(getattr(e, 'response', None), 'status_code', None)
+                if status in (502, 503, 504) and attempt < max_retries - 1:
+                    wait_time = 20 * (attempt + 1)
+                    logger.warning(
+                        f"open_file attempt {attempt + 1}/{max_retries} failed for '{path}' "
+                        f"(HTTP {status}). Retrying in {wait_time}s..."
+                    )
+                    time.sleep(wait_time)
+                    continue
+                logger.error(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}")
+                raise Exception(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}") from e
 
     def _ensure_launch_command_finish(self, command):
         if isinstance(command, list):
@@ -532,13 +608,18 @@ def _launch_setup(self, command: Union[str, List[str]], shell: bool = False):
             logger.warning("Command should be a list of strings. Now it is a string. Will split it by space.")
             command = command.split()
 
+        # For NVCF, rewrite launch commands (e.g. Chrome flags differ on cloud VMs)
+        if self.http_client and hasattr(self.http_client, 'update_launch_command'):
+            command = self.http_client.update_launch_command(command)
+
         payload = json.dumps({"command": command, "shell": shell})
         headers = {"Content-Type": "application/json"}
 
         # Note: This uses a custom /setup endpoint, not a standard OSWorld method
         try:
-            logger.info("REQUEST ADDRESS: %s", self.http_server + "/setup" + "/launch")
-            response = requests.post(self.http_server + "/setup" + "/launch", headers=headers, data=payload)
+            target = "NVCF" if self.http_client else (self.http_server + "/setup/launch")
+            logger.info("REQUEST ADDRESS: %s", target)
+            response = self._vm_post("/setup/launch", headers=headers, data=payload, timeout=300)
             if response.status_code == 200:
                 logger.info("Command executed successfully: %s", response.text)
             else:
@@ -611,7 +692,7 @@ def replace_screen_env_in_command(command):
         # Execute using runtime
         while not terminates:
             try:
-                response = requests.post(self.http_server + "/setup" + "/execute", headers=headers, data=payload)
+                response = self._vm_post("/setup/execute", headers=headers, data=payload, timeout=300)
                 if response.status_code == 200:
                     results: Dict[str, str] = response.json()
                     if stdout:
@@ -683,7 +764,7 @@ def _execute_with_verification_setup(
 
         # Note: This uses a custom /setup endpoint, not a standard OSWorld method
         try:
-            response = requests.post(self.http_server + "/setup" + "/execute_with_verification",
+            response = self._vm_post("/setup/execute_with_verification",
                                    headers=headers, data=payload, timeout=max_wait_time + 10)
             if response.status_code == 200:
                 result = response.json()
@@ -734,7 +815,7 @@ def _activate_window_setup(self, window_name: str, strict: bool = False, by_clas
         # send request to server to open file
         # Note: This uses a custom /setup endpoint, not a standard OSWorld method
         try:
-            response = requests.post(self.http_server + "/setup" + "/activate_window", headers=headers, data=payload)
+            response = self._vm_post("/setup/activate_window", headers=headers, data=payload)
             if response.status_code == 200:
                 logger.info("Command executed successfully: %s", response.text)
             else:
@@ -758,7 +839,7 @@ def _close_window_setup(self, window_name: str, strict: bool = False, by_class:
         # send request to server to open file
         # Note: This uses a custom /setup endpoint, not a standard OSWorld method
         try:
-            response = requests.post(self.http_server + "/setup" + "/close_window", headers=headers, data=payload)
+            response = self._vm_post("/setup/close_window", headers=headers, data=payload)
             if response.status_code == 200:
                 logger.info("Command executed successfully: %s", response.text)
             else:
@@ -771,13 +852,45 @@ async def _chrome_open_tabs_setup(self, urls_to_open: List[str]):
         if not self.runtime:
             raise Exception("Runtime is required for SetupController. Please provide a runtime object.")
 
-        host = self.vm_ip
-        port = self.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
+        # Pre-validate: check if Chrome DevTools is reachable via NVCF before Playwright retries
+        if self.http_client and hasattr(self.http_client, 'get_cdp_headers'):
+            max_pre_checks = 8
+            r = None
+            for pre_attempt in range(max_pre_checks):
+                try:
+                    cdp_headers = self.http_client.get_cdp_headers()
+                    r = requests.get(
+                        "https://grpc.nvcf.nvidia.com/chrome/json/version",
+                        headers=cdp_headers,
+                        timeout=10.0,
+                    )
+                    if r.status_code == 200:
+                        logger.info("Chrome pre-check passed (HTTP 200)")
+                        break
+                    logger.warning(f"Chrome pre-check attempt {pre_attempt+1}/{max_pre_checks}: HTTP {r.status_code}")
+                except Exception as e:
+                    logger.warning(f"Chrome pre-check attempt {pre_attempt+1}/{max_pre_checks}: {e}")
+
+                # After 3 consecutive failures, try restarting Chrome/socat inside the VM
+                if pre_attempt == 2:
+                    logger.warning("Chrome pre-check failed 3 times, attempting VM-side Chrome restart...")
+                    self._restart_vm_services("chrome")
 
-        remote_debugging_url = f"http://{host}:{port}"
+                if pre_attempt < max_pre_checks - 1:
+                    wait_time = 10 * (pre_attempt + 1)
+                    logger.info(f"Waiting {wait_time}s before next Chrome pre-check...")
+                    time.sleep(wait_time)
+                else:
+                    raise Exception(
+                        f"Chrome DevTools unreachable after {max_pre_checks} pre-check attempts "
+                        f"(last status: {getattr(r, 'status_code', 'N/A')}). "
+                        f"Chrome or socat likely crashed inside the VM."
+                    )
+
+        remote_debugging_url = self._get_cdp_url()
         logger.info("Connect to Chrome @: %s", remote_debugging_url)
         logger.debug("PLAYWRIGHT ENV: %s", repr(os.environ))
-        for attempt in range(15):
+        for attempt in range(5):
             if attempt > 0:
                 time.sleep(5)
 
@@ -787,12 +900,11 @@ async def _chrome_open_tabs_setup(self, urls_to_open: List[str]):
                     browser = await p.chromium.connect_over_cdp(remote_debugging_url)
                     # break
                 except Exception as e:
-                    if attempt < 14:
+                    if attempt < 4:
                         logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}")
-                        # time.sleep(10)
                         continue
                     else:
-                        logger.error(f"Failed to connect after multiple attempts: {e}")
+                        logger.error(f"Failed to connect after 5 attempts: {e}")
                         raise e
 
                 if not browser:
@@ -825,22 +937,19 @@ async def _chrome_close_tabs_setup(self, urls_to_close: List[str]):
 
         time.sleep(5)  # Wait for Chrome to finish launching
 
-        host = self.vm_ip
-        port = self.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
-
-        remote_debugging_url = f"http://{host}:{port}"
+        remote_debugging_url = self._get_cdp_url()
         async with async_playwright() as p:
             browser = None
-            for attempt in range(15):
+            for attempt in range(5):
                 try:
                     browser = await p.chromium.connect_over_cdp(remote_debugging_url)
                     break
                 except Exception as e:
-                    if attempt < 14:
+                    if attempt < 4:
                         logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}")
                         time.sleep(5)
                     else:
-                        logger.error(f"Failed to connect after multiple attempts: {e}")
+                        logger.error(f"Failed to connect after 5 attempts: {e}")
                         raise e
 
             if not browser:
@@ -962,22 +1071,19 @@ async def _login_setup(self, **config):
         if not self.runtime:
             raise Exception("Runtime is required for SetupController. Please provide a runtime object.")
 
-        host = self.vm_ip
-        port = self.chromium_port
-
-        remote_debugging_url = f"http://{host}:{port}"
+        remote_debugging_url = self._get_cdp_url()
         async with async_playwright() as p:
             browser = None
-            for attempt in range(15):
+            for attempt in range(5):
                 try:
                     browser = await p.chromium.connect_over_cdp(remote_debugging_url)
                     break
                 except Exception as e:
-                    if attempt < 14:
+                    if attempt < 4:
                         logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}")
                         time.sleep(5)
                     else:
-                        logger.error(f"Failed to connect after multiple attempts: {e}")
+                        logger.error(f"Failed to connect after 5 attempts: {e}")
                         raise e
             if not browser:
                 return
@@ -1025,7 +1131,7 @@ def execute_python_command(self, command: str):
 
         for _ in range(3):
             try:
-                response = requests.post(self.http_server + "/execute", headers={'Content-Type': 'application/json'},
+                response = self._vm_post("/execute", headers={'Content-Type': 'application/json'},
                                          data=payload, timeout=90)
                 if response.status_code == 200:
                     logger.info("Command executed successfully: %s", response.text)
@@ -1139,7 +1245,7 @@ def _update_browse_history_setup(self, **config):
             # send request to server to upload file
             try:
                 logger.debug("REQUEST ADDRESS: %s", self.http_server + "/setup" + "/upload")
-                response = requests.post(self.http_server + "/setup" + "/upload", headers=headers, data=form)
+                response = self._vm_post("/setup/upload", headers=headers, data=form)
                 if response.status_code == 200:
                     logger.info("Command executed successfully: %s", response.text)
                 else:
diff --git a/openhands/agenthub/gui_agent/function_calling.py b/openhands/agenthub/gui_agent/function_calling.py
index 028092901..d3d2312bd 100644
--- a/openhands/agenthub/gui_agent/function_calling.py
+++ b/openhands/agenthub/gui_agent/function_calling.py
@@ -70,6 +70,7 @@ def response_to_actions(
     mcp_tool_names: list[str] | None = None,
     timeout: float | None = None,
 ) -> Action:
+    actions: list[Action] = []
     assert len(response.choices) == 1, 'Only one choice is supported for now'
     choice = response.choices[0]
     assistant_msg = choice.message
@@ -84,247 +85,250 @@ def response_to_actions(
                     thought += msg['text']
 
         # Process each tool call to OpenHands action
-        tool_call = assistant_msg.tool_calls[0]
-        action: Action
-        logger.debug(f'Tool call in function_calling.py: {tool_call}')
-        try:
-            arguments = json.loads(tool_call.function.arguments)
-        except json.decoder.JSONDecodeError as e:
-            raise FunctionCallValidationError(
-                f'Failed to parse tool call arguments: {tool_call.function.arguments}'
-            ) from e
+        for tool_call in assistant_msg.tool_calls:
+            action: Action
+            logger.debug(f'Tool call in function_calling.py: {tool_call}')
+            try:
+                arguments = json.loads(tool_call.function.arguments)
+            except json.decoder.JSONDecodeError as e:
+                raise FunctionCallValidationError(
+                    f'Failed to parse tool call arguments: {tool_call.function.arguments}'
+                ) from e
 
-        # ================================================
-        # ClickTool
-        # ================================================
+            # ================================================
+            # ClickTool
+            # ================================================
 
-        if tool_call.function.name == ClickTool['function']['name']:
-            arguments = validate_arguments(arguments, tool_call.function.name)
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'CLICK',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        # ================================================
-        # RightClickTool
-        # ================================================
-        elif tool_call.function.name == RightClickTool['function']['name']:
-            arguments = validate_arguments(arguments, tool_call.function.name)
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'RIGHT_CLICK',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        # ================================================
-        # MiddleClickTool
-        # ================================================
-        elif tool_call.function.name == MiddleClickTool['function']['name']:
-            arguments = validate_arguments(arguments, tool_call.function.name)
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'MIDDLE_CLICK',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        # ================================================
-        # DoubleClickTool
-        # ================================================
-        elif tool_call.function.name == DoubleClickTool['function']['name']:
-            arguments = validate_arguments(arguments, tool_call.function.name)
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'DOUBLE_CLICK',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        # ================================================
-        # TripleClickTool
-        # ================================================
-        elif tool_call.function.name == TripleClickTool['function']['name']:
-            arguments = validate_arguments(arguments, tool_call.function.name)
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'TRIPLE_CLICK',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        # ================================================
-        # MoveToTool
-        # ================================================
-        elif tool_call.function.name == MoveToTool['function']['name']:
-            arguments = validate_arguments(arguments, tool_call.function.name)
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'MOVE_TO',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        # ================================================
-        # DragToTool
-        # ================================================
-        elif tool_call.function.name == DragToTool['function']['name']:
-            arguments = validate_arguments(arguments, tool_call.function.name)
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'DRAG_TO',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        # ================================================
-        # ScrollTool
-        # ================================================
-        elif tool_call.function.name == ScrollTool['function']['name']:
-            if 'amount' not in arguments:
-                raise FunctionCallValidationError(
-                    f'Missing required argument "amount" in tool call {tool_call.function.name}'
+            if tool_call.function.name == ClickTool['function']['name']:
+                arguments = validate_arguments(arguments, tool_call.function.name)
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'CLICK',
+                            'parameters': arguments,
+                        }
+                    },
                 )
-            # Map vertical scroll amount to dy; dx defaults to 0
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'SCROLL',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        # ================================================
-        # HorizontalScrollTool
-        # ================================================
-        elif tool_call.function.name == HorizontalScrollTool['function']['name']:
-            if 'amount' not in arguments:
-                raise FunctionCallValidationError(
-                    f'Missing required argument "amount" in tool call {tool_call.function.name}'
+            # ================================================
+            # RightClickTool
+            # ================================================
+            elif tool_call.function.name == RightClickTool['function']['name']:
+                arguments = validate_arguments(arguments, tool_call.function.name)
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'RIGHT_CLICK',
+                            'parameters': arguments,
+                        }
+                    },
                 )
-            # Map vertical scroll amount to dy; dx defaults to 0
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'SCROLL',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        # ================================================
-        # WriteTool
-        # ================================================
-        elif tool_call.function.name == WriteTool['function']['name']:
-            if 'text' not in arguments:
-                raise FunctionCallValidationError(
-                    f'Missing required argument "text" in tool call {tool_call.function.name}'
+            # ================================================
+            # MiddleClickTool
+            # ================================================
+            elif tool_call.function.name == MiddleClickTool['function']['name']:
+                arguments = validate_arguments(arguments, tool_call.function.name)
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'MIDDLE_CLICK',
+                            'parameters': arguments,
+                        }
+                    },
                 )
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'TYPING',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        # ================================================
-        # PressTool
-        # ================================================
-        elif tool_call.function.name == PressTool['function']['name']:
-            if 'key' not in arguments:
-                raise FunctionCallValidationError(
-                    f'Missing required argument "key" in tool call {tool_call.function.name}'
+            # ================================================
+            # DoubleClickTool
+            # ================================================
+            elif tool_call.function.name == DoubleClickTool['function']['name']:
+                arguments = validate_arguments(arguments, tool_call.function.name)
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'DOUBLE_CLICK',
+                            'parameters': arguments,
+                        }
+                    },
                 )
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'PRESS',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        # ================================================
-        # HotkeyTool
-        # ================================================
-        elif tool_call.function.name == HotkeyTool['function']['name']:
-            if 'keys' not in arguments:
-                raise FunctionCallValidationError(
-                    f'Missing required argument "keys" in tool call {tool_call.function.name}'
+            # ================================================
+            # TripleClickTool
+            # ================================================
+            elif tool_call.function.name == TripleClickTool['function']['name']:
+                arguments = validate_arguments(arguments, tool_call.function.name)
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'TRIPLE_CLICK',
+                            'parameters': arguments,
+                        }
+                    },
                 )
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'HOTKEY',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        # ================================================
-        # FailTool
-        # ================================================
-        elif tool_call.function.name == FailTool['function']['name']:
-            action = AgentFinishAction(
-                task_completed='false',
-            )
-        # ================================================
-        # FinishTool
-        # ================================================
-        elif tool_call.function.name == FinishTool['function']['name']:
-            action = AgentFinishAction(
-                task_completed='true',
-            )
-        # ================================================
-        # WaitTool
-        # ================================================
-        elif tool_call.function.name == WaitTool['function']['name']:
-            if 'seconds' not in arguments:
-                raise FunctionCallValidationError(
-                    f'Missing required argument "seconds" in tool call {tool_call.function.name}'
+            # ================================================
+            # MoveToTool
+            # ================================================
+            elif tool_call.function.name == MoveToTool['function']['name']:
+                arguments = validate_arguments(arguments, tool_call.function.name)
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'MOVE_TO',
+                            'parameters': arguments,
+                        }
+                    },
+                )
+            # ================================================
+            # DragToTool
+            # ================================================
+            elif tool_call.function.name == DragToTool['function']['name']:
+                arguments = validate_arguments(arguments, tool_call.function.name)
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'DRAG_TO',
+                            'parameters': arguments,
+                        }
+                    },
+                )
+            # ================================================
+            # ScrollTool
+            # ================================================
+            elif tool_call.function.name == ScrollTool['function']['name']:
+                if 'amount' not in arguments:
+                    raise FunctionCallValidationError(
+                        f'Missing required argument "amount" in tool call {tool_call.function.name}'
+                    )
+                # Map vertical scroll amount to dy; dx defaults to 0
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'SCROLL',
+                            'parameters': arguments,
+                        }
+                    },
+                )
+            # ================================================
+            # HorizontalScrollTool
+            # ================================================
+            elif tool_call.function.name == HorizontalScrollTool['function']['name']:
+                if 'amount' not in arguments:
+                    raise FunctionCallValidationError(
+                        f'Missing required argument "amount" in tool call {tool_call.function.name}'
+                    )
+                # Map vertical scroll amount to dy; dx defaults to 0
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'SCROLL',
+                            'parameters': arguments,
+                        }
+                    },
+                )
+            # ================================================
+            # WriteTool
+            # ================================================
+            elif tool_call.function.name == WriteTool['function']['name']:
+                if 'text' not in arguments:
+                    raise FunctionCallValidationError(
+                        f'Missing required argument "text" in tool call {tool_call.function.name}'
+                    )
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'TYPING',
+                            'parameters': arguments,
+                        }
+                    },
+                )
+            # ================================================
+            # PressTool
+            # ================================================
+            elif tool_call.function.name == PressTool['function']['name']:
+                if 'key' not in arguments:
+                    raise FunctionCallValidationError(
+                        f'Missing required argument "key" in tool call {tool_call.function.name}'
+                    )
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'PRESS',
+                            'parameters': arguments,
+                        }
+                    },
+                )
+            # ================================================
+            # HotkeyTool
+            # ================================================
+            elif tool_call.function.name == HotkeyTool['function']['name']:
+                if 'keys' not in arguments:
+                    raise FunctionCallValidationError(
+                        f'Missing required argument "keys" in tool call {tool_call.function.name}'
+                    )
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'HOTKEY',
+                            'parameters': arguments,
+                        }
+                    },
+                )
+            # ================================================
+            # FailTool
+            # ================================================
+            elif tool_call.function.name == FailTool['function']['name']:
+                action = AgentFinishAction(
+                    task_completed='false',
+                )
+            # ================================================
+            # FinishTool
+            # ================================================
+            elif tool_call.function.name == FinishTool['function']['name']:
+                action = AgentFinishAction(
+                    task_completed='true',
+                )
+            # ================================================
+            # WaitTool
+            # ================================================
+            elif tool_call.function.name == WaitTool['function']['name']:
+                if 'seconds' not in arguments:
+                    raise FunctionCallValidationError(
+                        f'Missing required argument "seconds" in tool call {tool_call.function.name}'
+                    )
+                action = OSWorldInteractiveAction(
+                    method='execute_agentic_action',
+                    params={
+                        'action': {
+                            'action_type': 'WAIT',
+                            'parameters': arguments,
+                        }
+                    },
+                )
+            else:
+                raise FunctionCallNotExistsError(
+                    f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
                 )
-            action = OSWorldInteractiveAction(
-                method='execute_agentic_action',
-                params={
-                    'action': {
-                        'action_type': 'WAIT',
-                        'parameters': arguments,
-                    }
-                },
-            )
-        else:
-            raise FunctionCallNotExistsError(
-                f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
-            )
 
-           
-        action = combine_thought(action, thought)
-        # Add metadata for tool calling
-        action.tool_call_metadata = ToolCallMetadata(
-            tool_call_id=tool_call.id,
-            function_name=tool_call.function.name,
-            model_response=response,
-            total_calls_in_response=len(assistant_msg.tool_calls),
-        )
+            
+            action = combine_thought(action, thought)
+            # Add metadata for tool calling
+            action.tool_call_metadata = ToolCallMetadata(
+                tool_call_id=tool_call.id,
+                function_name=tool_call.function.name,
+                model_response=response,
+                total_calls_in_response=len(assistant_msg.tool_calls),
+            )
+            if timeout:
+                action.set_hard_timeout(timeout)
+            actions.append(action)
     else:
         action = MessageAction(
             content=str(assistant_msg.content) if assistant_msg.content else '',
@@ -336,12 +340,14 @@ def response_to_actions(
                 model_response=response,
                 total_calls_in_response=0,
         )
+        actions.append(action)
 
     # Add response id to actions
     # This will ensure we can match both actions without tool calls (e.g. MessageAction)
     # and actions with tool calls (e.g. CmdRunAction, IPythonRunCellAction, etc.)
     # with the token usage data
-    action.response_id = response.id
-    if timeout:
-        action.set_hard_timeout(timeout)
-    return action
+    for action in actions:
+        action.response_id = response.id
+
+    assert len(actions) >= 1
+    return actions
diff --git a/openhands/agenthub/gui_agent/osworld_agent.py b/openhands/agenthub/gui_agent/osworld_agent.py
index 6d474f0b8..3c1fddb28 100644
--- a/openhands/agenthub/gui_agent/osworld_agent.py
+++ b/openhands/agenthub/gui_agent/osworld_agent.py
@@ -1,5 +1,6 @@
 import os
 from jinja2 import Template
+from collections import deque
 
 from openhands.agenthub.gui_agent.tools import OSWORLD_TOOLS
 
@@ -210,6 +211,8 @@ def __init__(
             self.llm.model_info['supports_vision'] = True
         else:
             self.llm.model_info = {'supports_vision': True}
+        
+        self.pending_actions: deque['Action'] = deque()
 
         self.reset()
 
@@ -218,6 +221,7 @@ def reset(self) -> None:
         super().reset()
         self.cost_accumulator = 0
         self.error_accumulator = 0
+        self.pending_actions.clear()
 
     def _get_initial_user_message(self, history: list[Event]) -> MessageAction:
         """Get the initial user message from the conversation history.
@@ -272,6 +276,9 @@ def _get_messages(
         include_a11y_tree = self.config.enable_a11y_tree
         total_screenshot_count = 0
 
+        llm_response_ids_action = set()
+        llm_response_ids_observation = set()
+
         # Build history prompts (alternating assistant/user messages) in reverse order
         for event in reversed(events):
             include_screenshot = self.config.enable_vision
@@ -279,16 +286,24 @@ def _get_messages(
                 include_screenshot = False
 
             if isinstance(event, OSWorldInteractiveAction):
+                llm_response_id = event.tool_call_metadata.model_response.id
+                if llm_response_id in llm_response_ids_action:
+                    continue
+                llm_response_ids_action.add(llm_response_id)
                 messages.append(convert_action_to_message(event))
             elif isinstance(event, MessageAction):
                 messages.append(convert_message_action_to_message(
                     event, include_a11y_tree=include_a11y_tree, include_screenshot=include_screenshot))
                 total_screenshot_count += 1
             elif isinstance(event, OSWorldOutputObservation) or isinstance(event, ErrorObservation):
+                llm_response_id = event.tool_call_metadata.model_response.id
+                if llm_response_id in llm_response_ids_observation:
+                    continue
                 msg = convert_observation_to_message(
                     event, instruction, include_a11y_tree=include_a11y_tree, include_screenshot=include_screenshot)
                 messages.append(msg)
                 total_screenshot_count += 1
+                llm_response_ids_observation.add(llm_response_id)
 
         # System message
         messages.append(Message(role='system', content=[TextContent(text=self.system_prompt)]))
@@ -308,6 +323,9 @@ def step(self, state: State) -> Action:
         - MessageAction(content) - Message action to run (e.g. ask for clarification)
         - AgentFinishAction() - end the interaction
         """
+        # Continue with pending actions if any
+        if self.pending_actions:
+            return self.pending_actions.popleft()
 
         format_error = state.get_last_agent_format_error()
         if format_error and isinstance(format_error, str):
@@ -332,14 +350,20 @@ def step(self, state: State) -> Action:
         }
         params['tools'] = self.tools
         params['extra_body'] = {'metadata': state.to_llm_metadata(agent_name=self.name)}
+        for msg in params['messages']:
+            if msg.get('role') == 'tool':
+                msg['role'] = 'user'
         response = self.llm.completion(**params)
         logger.debug(f'Response from LLM: {response}')
-        action = codeact_function_calling.response_to_actions(response, timeout=self.config.action_timeout)
-        if self.pause_time > 0.5:
-            logger.info(f'Setting pause time to {self.pause_time} seconds for agentic action')
-            action.pause_time = self.pause_time
-        logger.debug(f'Actions after response_to_actions: {action}')
-        return action
+        actions = codeact_function_calling.response_to_actions(response, timeout=self.config.action_timeout)
+        logger.debug(f'Actions after response_to_actions: {actions}')
+        for action in actions:
+            if self.pause_time > 0.5:
+                logger.info(f'Setting pause time to {self.pause_time} seconds for agentic action')
+                action.pause_time = self.pause_time
+            self.pending_actions.append(action)
+        
+        return self.pending_actions.popleft()
 
     def _get_messages_from_agent_state(
         self, events: list[Event], initial_user_message: MessageAction,
@@ -356,6 +380,7 @@ def _get_messages_from_agent_state(
         Returns:
             list[dict]: A list of formatted messages ready for LLM consumption
         """
+        """
         messages: list[Message] = []
 
         # System message
@@ -376,7 +401,42 @@ def _get_messages_from_agent_state(
                 msg = convert_observation_to_message_full_state(
                     event, instruction, include_a11y_tree=include_a11y_tree)
                 messages.append(msg)
+        """
+        messages: list[Message] = []
+
+        # Get instruction from initial user message
+        # User message is a MessageAction with content and image_urls, will be processed in events
+        instruction = get_instruction(initial_user_message)
+        include_a11y_tree = self.config.enable_a11y_tree
+
+        llm_response_ids_action = set()
+        llm_response_ids_observation = set()
+
+        # Build history prompts (alternating assistant/user messages) in reverse order
+        for event in reversed(events):
+            if isinstance(event, AgentFinishAction):
+                messages.append(convert_action_to_message(event))
+            elif isinstance(event, OSWorldInteractiveAction):
+                llm_response_id = event.tool_call_metadata.model_response.id
+                if llm_response_id in llm_response_ids_action:
+                    continue
+                llm_response_ids_action.add(llm_response_id)
+                messages.append(convert_action_to_message(event))
+            elif isinstance(event, MessageAction):
+                messages.append(convert_message_action_to_message_full_state(event, include_a11y_tree=include_a11y_tree))
+            elif isinstance(event, OSWorldOutputObservation) or isinstance(event, ErrorObservation):
+                llm_response_id = event.tool_call_metadata.model_response.id
+                if llm_response_id in llm_response_ids_observation:
+                    continue
+                msg = convert_observation_to_message_full_state(
+                    event, instruction, include_a11y_tree=include_a11y_tree)
+                messages.append(msg)
+                llm_response_ids_observation.add(llm_response_id)
+
+        # System message
+        messages.append(Message(role='system', content=[TextContent(text=self.system_prompt)]))
 
+        messages = messages[::-1]
         # set flags to know how to serialize the messages
         for message in messages:
             message.cache_enabled = False
diff --git a/openhands/agenthub/gui_agent/prompts/osworld.py b/openhands/agenthub/gui_agent/prompts/osworld.py
index d738c6250..5cc3018ce 100644
--- a/openhands/agenthub/gui_agent/prompts/osworld.py
+++ b/openhands/agenthub/gui_agent/prompts/osworld.py
@@ -1,14 +1,10 @@
 OSWORLD_OBSERVATION_FEEDBACK_PROMPT = """Action executed. Please generate the next move according to the UI screenshot and instruction.
 
 Instruction: {instruction}
-
-First describe the screenshot in detail, think step by step, then generate the next move. You need to at least make a tool call.
 """
 
 ERROR_OBSERVATION_FEEDBACK_PROMPT = """Action failed. Please continue working on the task according to the instruction.
 Error message: {error_message}
 
 Instruction: {instruction}
-
-First describe the screenshot in detail, think step by step, then generate the next move. You need to at least make a tool call.
 """
\ No newline at end of file
diff --git a/openhands/agenthub/gui_agent/prompts/system_prompt_osworld.j2 b/openhands/agenthub/gui_agent/prompts/system_prompt_osworld.j2
index 4a9feaa3d..ffa37d92a 100644
--- a/openhands/agenthub/gui_agent/prompts/system_prompt_osworld.j2
+++ b/openhands/agenthub/gui_agent/prompts/system_prompt_osworld.j2
@@ -40,57 +40,7 @@ In such cases, reassess and choose a different action.
 
 ### 4. Sudo Password
 
-My computer’s password is **`{CLIENT_PASSWORD}`**.  
+My computer's password is **`{CLIENT_PASSWORD}`**.
 Use it whenever `sudo` rights are required.
 
 ---
-
-## Response Structure Requirements
-
-Each response **must** follow this structure:
-
----
-
-### Observation:
-Provide a detailed description of the screenshot, including:
-- Visible UI elements
-- Buttons, menus, icons, windows, dialogs
-- Pop-ups, notifications, warnings, errors
-- Loading indicators or anything affecting progress
-
-Include **all relevant details**.
-
----
-
-### Thought:
-Provide detailed reasoning before choosing an action.
-
-#### Step-by-Step Progress Assessment
-- Summarize what has been accomplished so far.
-- Identify unexpected outcomes or errors.
-- If the previous action seems incorrect, explain how to recover.
-
-#### Next Action Analysis
-- List possible next actions based on the current screen.
-- Evaluate them considering the current state and previous actions.
-- Select the **most logical** action.
-- Anticipate the consequence of that action.
-
----
-
-### Action:
-Output exactly **one** action using the allowed tools:
-- Mouse actions  
-- Keyboard actions  
-- `wait()`  
-- `fail()`  
-- `finish()`  
-
-Only **one** tool call per step.
-
----
-
-## Final Notes
-
-- Your decisions must be grounded **strictly in the screenshot**.  
-- Do not assume any UI that is not visible.
\ No newline at end of file
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index dc541f64f..aa8f142ad 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -204,6 +204,7 @@ def __init__(
         kwargs: dict[str, Any] = {
             'temperature': self.config.temperature,
             'max_completion_tokens': self.config.max_output_tokens,
+            'skip_special_tokens': False,
         }
         if self.config.top_k is not None:
             # openai doesn't expose top_k
diff --git a/openhands/nvidia/async_server_osworld.py b/openhands/nvidia/async_server_osworld.py
index 420148045..ebdf53e60 100644
--- a/openhands/nvidia/async_server_osworld.py
+++ b/openhands/nvidia/async_server_osworld.py
@@ -88,6 +88,11 @@ def __init__(
                 'No reward server IP provided. Evaluations would only work for swebench problems.'
             )
 
+        # For sequential VM start-ups to mitigate boot storm
+        self._launch_lock = threading.Lock()
+        self._last_launch_time = 0
+        self._launch_delay_seconds = 15.0  # Wait 15s between starts
+
     def get_unique_id(self, instance, max_retries=10):
         base = f'{get_instance_id(instance)}_{instance["trajectory_id"]}'
         base_hash = hashlib.sha256(base.encode('utf-8')).hexdigest()[:16]
@@ -396,6 +401,25 @@ async def _process_init(self, job_id: str, job_details, dataset_type: str, wid:
         if job_details.timer is None:
             raise RuntimeError('Timer is not initialized')
 
+        # Rate Limit Logic: Prevent Boot Storm
+        wait_time = 0.0
+        with self._launch_lock:
+            now = time.time()
+            # The earliest this worker can start is either NOW,
+            # or 15s after the last scheduled launch.
+            target_start_time = max(now, self._last_launch_time + self._launch_delay_seconds)
+
+            wait_time = target_start_time - now
+
+            # Reserve this slot by updating the global timestamp immediately
+            self._last_launch_time = target_start_time
+
+        # Perform the wait asynchronously (outside the lock)
+        if wait_time > 0:
+            if wait_time > 1.0:
+                logger.info(f"Delayed boot-up: waiting {wait_time:.1f}s...")
+            await asyncio.sleep(wait_time)
+
         with phase_context(job_details.timer, 'init'):
             init_coro = func(job_details=job_details, sid=job_id)
             runtime, metadata, config = await run_with_timeout_awareness(
diff --git a/openhands/nvidia/os_world/controllers/python.py b/openhands/nvidia/os_world/controllers/python.py
index 7844d7307..292dfb441 100644
--- a/openhands/nvidia/os_world/controllers/python.py
+++ b/openhands/nvidia/os_world/controllers/python.py
@@ -5,18 +5,34 @@
 import traceback
 import requests
 from openhands.core.logger import openhands_logger as logger
+from openhands.runtime.utils.osworld_http_client import OSWorldHttpClient
 
 KEYBOARD_KEYS = ['\t', '\n', '\r', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 'accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace', 'browserback', 'browserfavorites', 'browserforward', 'browserhome', 'browserrefresh', 'browsersearch', 'browserstop', 'capslock', 'clear', 'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete', 'divide', 'down', 'end', 'enter', 'esc', 'escape', 'execute', 'f1', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f2', 'f20', 'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja', 'kana', 'kanji', 'launchapp1', 'launchapp2', 'launchmail', 'launchmediaselect', 'left', 'modechange', 'multiply', 'nexttrack', 'nonconvert', 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9', 'numlock', 'pagedown', 'pageup', 'pause', 'pgdn', 'pgup', 'playpause', 'prevtrack', 'print', 'printscreen', 'prntscrn', 'prtsc', 'prtscr', 'return', 'right', 'scrolllock', 'select', 'separator', 'shift', 'shiftleft', 'shiftright', 'sleep', 'stop', 'subtract', 'tab', 'up', 'volumedown', 'volumemute', 'volumeup', 'win', 'winleft', 'winright', 'yen', 'command', 'option', 'optionleft', 'optionright']
 
 class PythonController:
     def __init__(self, vm_ip: str,
                  server_port: int,
-                 pkgs_prefix: str = "import pyautogui; import time; pyautogui.FAILSAFE = False; {command}"):
+                 pkgs_prefix: str = "import pyautogui; import time; pyautogui.FAILSAFE = False; {command}",
+                 http_client: OSWorldHttpClient = None):
         self.vm_ip = vm_ip
         self.http_server = f"http://{vm_ip}:{server_port}"
         self.pkgs_prefix = pkgs_prefix  # fixme: this is a hacky way to execute python commands. fix it and combine it with installation of packages
         self.retry_times = 3
         self.retry_interval = 5
+        # Use http_client if provided, otherwise fall back to direct requests
+        self.client = http_client
+
+    def _get(self, endpoint: str, **kwargs) -> requests.Response:
+        """Make a GET request using client or direct requests."""
+        if self.client:
+            return self.client.get(endpoint, **kwargs)
+        return requests.get(self.http_server + endpoint, **kwargs)
+
+    def _post(self, endpoint: str, **kwargs) -> requests.Response:
+        """Make a POST request using client or direct requests."""
+        if self.client:
+            return self.client.post(endpoint, **kwargs)
+        return requests.post(self.http_server + endpoint, **kwargs)
 
     @staticmethod
     def _is_valid_image_response(content_type: str, data: Optional[bytes]) -> bool:
@@ -43,7 +59,7 @@ def get_screenshot(self) -> Optional[bytes]:
 
         for attempt_idx in range(self.retry_times):
             try:
-                response = requests.get(self.http_server + "/screenshot", timeout=10)
+                response = self._get("/screenshot", timeout=10)
                 if response.status_code == 200:
                     content_type = response.headers.get("Content-Type", "")
                     content = response.content
@@ -71,7 +87,7 @@ def get_accessibility_tree(self) -> Optional[str]:
 
         for _ in range(self.retry_times):
             try:
-                response: requests.Response = requests.get(self.http_server + "/accessibility")
+                response: requests.Response = self._get("/accessibility")
                 if response.status_code == 200:
                     logger.info("Got accessibility tree successfully")
                     return response.json()["AT"]
@@ -93,7 +109,7 @@ def get_terminal_output(self) -> Optional[str]:
 
         for _ in range(self.retry_times):
             try:
-                response = requests.get(self.http_server + "/terminal")
+                response = self._get("/terminal")
                 if response.status_code == 200:
                     logger.info("Got terminal output successfully")
                     return response.json()["output"]
@@ -115,7 +131,7 @@ def get_file(self, file_path: str) -> Optional[bytes]:
 
         for _ in range(self.retry_times):
             try:
-                response = requests.post(self.http_server + "/file", data={"file_path": file_path})
+                response = self._post("/file", data={"file_path": file_path})
                 if response.status_code == 200:
                     logger.info("File downloaded successfully")
                     return response.content
@@ -141,8 +157,8 @@ def execute_python_command(self, command: str) -> None:
 
         for _ in range(self.retry_times):
             try:
-                response = requests.post(self.http_server + "/execute", headers={'Content-Type': 'application/json'},
-                                         data=payload, timeout=90)
+                response = self._post("/execute", headers={'Content-Type': 'application/json'},
+                                      data=payload, timeout=90)
                 if response.status_code == 200:
                     logger.info("Command executed successfully: %s", response.text)
                     return response.json()
@@ -167,8 +183,8 @@ def run_python_script(self, script: str) -> Optional[Dict[str, Any]]:
 
         for _ in range(self.retry_times):
             try:
-                response = requests.post(self.http_server + "/run_python", headers={'Content-Type': 'application/json'},
-                                         data=payload, timeout=90)
+                response = self._post("/run_python", headers={'Content-Type': 'application/json'},
+                                      data=payload, timeout=90)
                 if response.status_code == 200:
                     return response.json()
                 else:
@@ -200,8 +216,8 @@ def run_bash_script(self, script: str, timeout: int = 30, working_dir: Optional[
 
         for _ in range(self.retry_times):
             try:
-                response = requests.post(
-                    self.http_server + "/run_bash_script", 
+                response = self._post(
+                    "/run_bash_script", 
                     headers={'Content-Type': 'application/json'},
                     data=payload, 
                     timeout=timeout + 100  # Add buffer to HTTP timeout
@@ -416,7 +432,7 @@ def start_recording(self):
 
         for _ in range(self.retry_times):
             try:
-                response = requests.post(self.http_server + "/start_recording")
+                response = self._post("/start_recording")
                 if response.status_code == 200:
                     logger.info("Recording started successfully")
                     return
@@ -437,7 +453,7 @@ def end_recording(self, dest: str):
 
         for _ in range(self.retry_times):
             try:
-                response = requests.post(self.http_server + "/end_recording")
+                response = self._post("/end_recording")
                 if response.status_code == 200:
                     logger.info("Recording stopped successfully")
                     with open(dest, 'wb') as f:
@@ -469,7 +485,7 @@ def get_vm_screen_size(self):
 
         for _ in range(self.retry_times):
             try:
-                response = requests.post(self.http_server + "/screen_size")
+                response = self._post("/screen_size")
                 if response.status_code == 200:
                     logger.info("Got screen size successfully")
                     return response.json()
@@ -491,7 +507,7 @@ def get_vm_window_size(self, app_class_name: str):
 
         for _ in range(self.retry_times):
             try:
-                response = requests.post(self.http_server + "/window_size", data={"app_class_name": app_class_name})
+                response = self._post("/window_size", data={"app_class_name": app_class_name})
                 if response.status_code == 200:
                     logger.info("Got window size successfully")
                     return response.json()
@@ -513,7 +529,7 @@ def get_vm_wallpaper(self):
 
         for _ in range(self.retry_times):
             try:
-                response = requests.post(self.http_server + "/wallpaper")
+                response = self._post("/wallpaper")
                 if response.status_code == 200:
                     logger.info("Got wallpaper successfully")
                     return response.content
@@ -535,7 +551,7 @@ def get_vm_desktop_path(self) -> Optional[str]:
 
         for _ in range(self.retry_times):
             try:
-                response = requests.post(self.http_server + "/desktop_path")
+                response = self._post("/desktop_path")
                 if response.status_code == 200:
                     logger.info("Got desktop path successfully")
                     return response.json()["desktop_path"]
@@ -558,7 +574,7 @@ def get_vm_directory_tree(self, path) -> Optional[Dict[str, Any]]:
 
         for _ in range(self.retry_times):
             try:
-                response = requests.post(self.http_server + "/list_directory", headers={'Content-Type': 'application/json'}, data=payload)
+                response = self._post("/list_directory", headers={'Content-Type': 'application/json'}, data=payload)
                 if response.status_code == 200:
                     logger.info("Got directory tree successfully")
                     return response.json()["directory_tree"]
diff --git a/openhands/nvidia/os_world/controllers/setup.py b/openhands/nvidia/os_world/controllers/setup.py
index 1cd5353a4..ab85b2b75 100644
--- a/openhands/nvidia/os_world/controllers/setup.py
+++ b/openhands/nvidia/os_world/controllers/setup.py
@@ -21,13 +21,14 @@
 from openhands.events.action.os import OSWorldInteractiveAction
 from openhands.core.logger import openhands_logger as logger
 from openhands.nvidia.os_world.metrics.utils import compare_urls
+from openhands.runtime.utils.osworld_http_client import OSWorldHttpClient
 
 FILE_PATH = os.path.dirname(os.path.abspath(__file__))
 
 MAX_RETRIES = 20
 
 class SetupController:
-    def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 9222, vlc_port: int = 8080, cache_dir: str = "cache", client_password: str = "", screen_width: int = 1920, screen_height: int = 1080, runtime=None):
+    def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 9222, vlc_port: int = 8080, cache_dir: str = "cache", client_password: str = "", screen_width: int = 1920, screen_height: int = 1080, runtime=None, http_client: OSWorldHttpClient = None):
         self.vm_ip: str = vm_ip
         self.server_port: int = server_port
         self.chromium_port: int = chromium_port
@@ -40,6 +41,20 @@ def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 922
         self.screen_height: int = screen_height
         self.runtime = runtime  # Runtime object for interacting with the environment
         self.additional_wait_time = 3
+        # Use http_client if provided, otherwise fall back to direct requests
+        self.client = http_client
+
+    def _get(self, endpoint: str, **kwargs) -> requests.Response:
+        """Make a GET request using client or direct requests."""
+        if self.client:
+            return self.client.get(endpoint, **kwargs)
+        return requests.get(self.http_server + endpoint, **kwargs)
+
+    def _post(self, endpoint: str, **kwargs) -> requests.Response:
+        """Make a POST request using client or direct requests."""
+        if self.client:
+            return self.client.post(endpoint, **kwargs)
+        return requests.post(self.http_server + endpoint, **kwargs)
 
     def reset_cache_dir(self, cache_dir: str):
         self.cache_dir = cache_dir
@@ -227,8 +242,8 @@ def _upload_file_setup(self, files: List[Dict[str, str]]):
                         logger.debug(form.content_type)
 
                         # Explicit connect/read timeout to avoid hanging forever
-                        response = requests.post(
-                            self.http_server + "/setup" + "/upload",
+                        response = self._post(
+                            "/setup/upload",
                             headers=headers,
                             data=form,
                             timeout=(10, 600)
@@ -270,7 +285,7 @@ def _change_wallpaper_setup(self, path: str):
         # send request to server to change wallpaper
         # Note: This uses a custom /setup endpoint, not a standard OSWorld method
         try:
-            response = requests.post(self.http_server + "/setup" + "/change_wallpaper", headers=headers, data=payload)
+            response = self._post("/setup/change_wallpaper", headers=headers, data=payload)
             if response.status_code == 200:
                 logger.info("Command executed successfully: %s", response.text)
             else:
@@ -295,16 +310,30 @@ def _open_setup(self, path: str):
 
         # send request to server to open file
         # Note: This uses a custom /setup endpoint, not a standard OSWorld method
-        try:
-            # The server-side call is now blocking and can take time.
-            # We set a timeout that is slightly longer than the server's timeout (1800s).
-            response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload, timeout=1810)
-            response.raise_for_status()  # This will raise an exception for 4xx and 5xx status codes
-            logger.info("Command executed successfully: %s", response.text)
-            time.sleep(self.additional_wait_time)
-        except requests.exceptions.RequestException as e:
-            logger.error(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}")
-            raise Exception(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}") from e
+        max_retries = 3
+        last_error = None
+        for attempt in range(max_retries):
+            try:
+                # The server-side call is now blocking and can take time.
+                # We set a timeout that is slightly longer than the server's timeout (1800s).
+                response = self._post("/setup/open_file", headers=headers, data=payload, timeout=1810)
+                response.raise_for_status()  # This will raise an exception for 4xx and 5xx status codes
+                logger.info("Command executed successfully: %s", response.text)
+                time.sleep(self.additional_wait_time)
+                return  # Success
+            except requests.exceptions.RequestException as e:
+                last_error = e
+                status = getattr(getattr(e, 'response', None), 'status_code', None)
+                if status in (502, 503, 504) and attempt < max_retries - 1:
+                    wait_time = 10 * (attempt + 1)
+                    logger.warning(
+                        f"open_file attempt {attempt + 1}/{max_retries} failed for '{path}' "
+                        f"(HTTP {status}). Retrying in {wait_time}s..."
+                    )
+                    time.sleep(wait_time)
+                    continue
+                logger.error(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}")
+                raise Exception(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}") from e
 
     def _ensure_launch_command_finish(self, command):
         if isinstance(command, list):
@@ -436,13 +465,16 @@ def _launch_setup(self, command: Union[str, List[str]], shell: bool = False):
             logger.warning("Command should be a list of strings. Now it is a string. Will split it by space.")
             command = command.split()
 
+        if self.client:
+            command = self.client.update_launch_command(command)
+
         payload = json.dumps({"command": command, "shell": shell})
         headers = {"Content-Type": "application/json"}
 
         # Note: This uses a custom /setup endpoint, not a standard OSWorld method
         try:
-            logger.info("REQUEST ADDRESS: %s", self.http_server + "/setup" + "/launch")
-            response = requests.post(self.http_server + "/setup" + "/launch", headers=headers, data=payload)
+            logger.info("REQUEST ADDRESS: %s", self.http_server + "/setup/launch")
+            response = self._post("/setup/launch", headers=headers, data=payload)
             if response.status_code == 200:
                 logger.info("Command executed successfully: %s", response.text)
             else:
@@ -516,7 +548,7 @@ def replace_screen_env_in_command(command):
         # Execute using runtime
         while not terminates:
             try:
-                response = requests.post(self.http_server + "/setup" + "/execute", headers=headers, data=payload)
+                response = self._post("/setup/execute", headers=headers, data=payload)
                 if response.status_code == 200:
                     results: Dict[str, str] = response.json()
                     if stdout:
@@ -588,8 +620,8 @@ def _execute_with_verification_setup(
 
         # Note: This uses a custom /setup endpoint, not a standard OSWorld method
         try:
-            response = requests.post(self.http_server + "/setup" + "/execute_with_verification", 
-                                   headers=headers, data=payload, timeout=max_wait_time + 10)
+            response = self._post("/setup/execute_with_verification", 
+                                  headers=headers, data=payload, timeout=max_wait_time + 10)
             if response.status_code == 200:
                 result = response.json()
                 logger.info("Command executed and verified successfully: %s -> %s"
@@ -639,7 +671,7 @@ def _activate_window_setup(self, window_name: str, strict: bool = False, by_clas
         # send request to server to open file
         # Note: This uses a custom /setup endpoint, not a standard OSWorld method
         try:
-            response = requests.post(self.http_server + "/setup" + "/activate_window", headers=headers, data=payload)
+            response = self._post("/setup/activate_window", headers=headers, data=payload)
             if response.status_code == 200:
                 logger.info("Command executed successfully: %s", response.text)
             else:
@@ -663,7 +695,7 @@ def _close_window_setup(self, window_name: str, strict: bool = False, by_class:
         # send request to server to open file
         # Note: This uses a custom /setup endpoint, not a standard OSWorld method
         try:
-            response = requests.post(self.http_server + "/setup" + "/close_window", headers=headers, data=payload)
+            response = self._post("/setup/close_window", headers=headers, data=payload)
             if response.status_code == 200:
                 logger.info("Command executed successfully: %s", response.text)
             else:
@@ -676,10 +708,16 @@ async def _chrome_open_tabs_setup(self, urls_to_open: List[str]):
         if not self.runtime:
             raise Exception("Runtime is required for SetupController. Please provide a runtime object.")
 
-        host = self.vm_ip
-        port = self.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
+        # Get CDP URL and headers from client if available
+        if self.client:
+            remote_debugging_url = self.client.get_cdp_url()
+            cdp_headers = self.client.get_cdp_headers()
+        else:
+            host = self.vm_ip
+            port = self.chromium_port
+            remote_debugging_url = f"http://{host}:{port}"
+            cdp_headers = None
 
-        remote_debugging_url = f"http://{host}:{port}"
         logger.info("Connect to Chrome @: %s", remote_debugging_url)
         logger.debug("PLAYWRIGHT ENV: %s", repr(os.environ))
         for attempt in range(15):
@@ -689,7 +727,10 @@ async def _chrome_open_tabs_setup(self, urls_to_open: List[str]):
             browser = None
             async with async_playwright() as p:
                 try:
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(
+                        remote_debugging_url,
+                        headers=cdp_headers or {}
+                    )
                     # break
                 except Exception as e:
                     if attempt < 14:
@@ -730,15 +771,24 @@ async def _chrome_close_tabs_setup(self, urls_to_close: List[str]):
 
         time.sleep(5)  # Wait for Chrome to finish launching
 
-        host = self.vm_ip
-        port = self.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
+        # Get CDP URL and headers from client if available
+        if self.client:
+            remote_debugging_url = self.client.get_cdp_url()
+            cdp_headers = self.client.get_cdp_headers()
+        else:
+            host = self.vm_ip
+            port = self.chromium_port
+            remote_debugging_url = f"http://{host}:{port}"
+            cdp_headers = None
 
-        remote_debugging_url = f"http://{host}:{port}"
         async with async_playwright() as p:
             browser = None
             for attempt in range(15):
                 try:
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(
+                        remote_debugging_url,
+                        headers=cdp_headers or {}
+                    )
                     break
                 except Exception as e:
                     if attempt < 14:
@@ -867,15 +917,24 @@ async def _login_setup(self, **config):
         if not self.runtime:
             raise Exception("Runtime is required for SetupController. Please provide a runtime object.")
 
-        host = self.vm_ip
-        port = self.chromium_port
+        # Get CDP URL and headers from client if available
+        if self.client:
+            remote_debugging_url = self.client.get_cdp_url()
+            cdp_headers = self.client.get_cdp_headers()
+        else:
+            host = self.vm_ip
+            port = self.chromium_port
+            remote_debugging_url = f"http://{host}:{port}"
+            cdp_headers = None
 
-        remote_debugging_url = f"http://{host}:{port}"
         async with async_playwright() as p:
             browser = None
             for attempt in range(15):
                 try:
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(
+                        remote_debugging_url,
+                        headers=cdp_headers or {}
+                    )
                     break
                 except Exception as e:
                     if attempt < 14:
@@ -930,8 +989,8 @@ def execute_python_command(self, command: str):
 
         for _ in range(3):
             try:
-                response = requests.post(self.http_server + "/execute", headers={'Content-Type': 'application/json'},
-                                         data=payload, timeout=90)
+                response = self._post("/execute", headers={'Content-Type': 'application/json'},
+                                      data=payload, timeout=90)
                 if response.status_code == 200:
                     logger.info("Command executed successfully: %s", response.text)
                     return response.json()
@@ -1042,8 +1101,8 @@ def _update_browse_history_setup(self, **config):
 
             # send request to server to upload file
             try:
-                logger.debug("REQUEST ADDRESS: %s", self.http_server + "/setup" + "/upload")
-                response = requests.post(self.http_server + "/setup" + "/upload", headers=headers, data=form)
+                logger.debug("REQUEST ADDRESS: %s", self.http_server + "/setup/upload")
+                response = self._post("/setup/upload", headers=headers, data=form)
                 if response.status_code == 200:
                     logger.info("Command executed successfully: %s", response.text)
                 else:
diff --git a/openhands/nvidia/os_world/evaluate.py b/openhands/nvidia/os_world/evaluate.py
index cd3478021..f87bde184 100644
--- a/openhands/nvidia/os_world/evaluate.py
+++ b/openhands/nvidia/os_world/evaluate.py
@@ -25,12 +25,18 @@ def __init__(self, task_config: Dict[str, Any], controller):
         self.client_password = controller.client_password
         self.screen_width = controller.screen_width
         self.screen_height = controller.screen_height
+        
+        # Get http_client from controller for runtime-agnostic HTTP calls
+        self.client = getattr(controller, 'client', None)
 
         # Assume Linux platform for OS World VMs
         # TODO: get from runtime/controller, mismatch initial letter is lowercase 
         self.vm_platform = 'Linux'
+        
+        # Current proxy setting (for chrome getters)
+        self.current_use_proxy = False
 
-        self.controller = PythonController(self.vm_ip, self.server_port)
+        self.controller = PythonController(self.vm_ip, self.server_port, http_client=self.client)
         self._set_evaluator_info(task_config)
  
     def _set_evaluator_info(self, task_config: Dict[str, Any]):
diff --git a/openhands/nvidia/os_world/getters/chrome.py b/openhands/nvidia/os_world/getters/chrome.py
index cd2acd906..40a707888 100644
--- a/openhands/nvidia/os_world/getters/chrome.py
+++ b/openhands/nvidia/os_world/getters/chrome.py
@@ -18,6 +18,28 @@
 
 from openhands.core.logger import openhands_logger as logger
 
+
+def _get_cdp_connection_info(env):
+    """Get CDP URL and headers from env, using http_client if available."""
+    if hasattr(env, 'client') and env.client:
+        return env.client.get_cdp_url(), env.client.get_cdp_headers()
+    else:
+        host = env.vm_ip
+        port = env.chromium_port
+        return f"http://{host}:{port}", None
+
+
+def _make_post_request(env, endpoint: str, **kwargs):
+    """Make a POST request using env.client if available, otherwise direct."""
+    if hasattr(env, 'client') and env.client:
+        return env.client.post(endpoint, **kwargs)
+    else:
+        host = env.vm_ip
+        port = env.server_port
+        url = f"http://{host}:{port}{endpoint}"
+        return requests.post(url, **kwargs)
+
+
 _accessibility_ns_map = {
     "st": "uri:deskat:state.at-spi.gnome.org",
     "attr": "uri:deskat:attributes.at-spi.gnome.org",
@@ -58,11 +80,7 @@ async def get_info_from_website(env, config: Dict[Any, Any]) -> Any:
     logger.debug(f"[INFO_FROM_WEBSITE] Full config: {config}")
     
     try:
-        host = env.vm_ip
-        port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
-        server_port = env.server_port
-        remote_debugging_url = f"http://{host}:{port}"
-        backend_url = f"http://{host}:{server_port}"
+        remote_debugging_url, cdp_headers = _get_cdp_connection_info(env)
         use_proxy = env.current_use_proxy
         
         logger.info(f"[INFO_FROM_WEBSITE] Connecting to Chrome at {remote_debugging_url}")
@@ -70,7 +88,7 @@ async def get_info_from_website(env, config: Dict[Any, Any]) -> Any:
         async with async_playwright() as p:
             # connect to remote Chrome instance
             try:
-                browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                 logger.info(f"[INFO_FROM_WEBSITE] Successfully connected to existing Chrome instance")
             except Exception as e:
                 logger.warning(f"[INFO_FROM_WEBSITE] Failed to connect to existing Chrome instance: {e}")
@@ -89,10 +107,9 @@ async def get_info_from_website(env, config: Dict[Any, Any]) -> Any:
                 logger.info(f"[INFO_FROM_WEBSITE] Starting browser with command: {' '.join(command)}")
                 payload = json.dumps({"command": command, "shell": False})
                 headers = {"Content-Type": "application/json"}
-                #requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
-                requests.post(backend_url + "/setup" + "/launch", headers=headers, data=payload)
+                _make_post_request(env, "/setup/launch", headers=headers, data=payload)
                 await asyncio.sleep(5)
-                browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                 logger.info(f"[INFO_FROM_WEBSITE] Successfully connected to new Chrome instance")
 
             page = await browser.new_page()
@@ -536,12 +553,8 @@ def get_extensions_installed_from_shop(env, config: Dict[str, str]):
 # port info to allow remote debugging, see README.md for details
 
 async def get_page_info(env, config: Dict[str, str]):
-    host = env.vm_ip
-    port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
-    server_port = env.server_port
+    remote_debugging_url, cdp_headers = _get_cdp_connection_info(env)
     url = config["url"]
-
-    remote_debugging_url = f"http://{host}:{port}"
     
     # Configuration for retry and timeout
     max_retries = 2
@@ -554,12 +567,11 @@ async def get_page_info(env, config: Dict[str, str]):
             async with async_playwright() as p:
                 # connect to remote Chrome instance
                 try:
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                     logger.info(f"[PAGE_INFO] Successfully connected to existing Chrome instance")
                 except Exception as e:
                     logger.warning(f"[PAGE_INFO] Failed to connect to existing Chrome instance: {e}")
                     # If the connection fails, start a new browser instance
-                    platform.machine()
                     if "arm" in platform.machine():
                         # start a new browser instance if the connection fails
                         payload = json.dumps({"command": [
@@ -573,9 +585,9 @@ async def get_page_info(env, config: Dict[str, str]):
                         ], "shell": False})
 
                     headers = {"Content-Type": "application/json"}
-                    requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
+                    _make_post_request(env, "/setup/launch", headers=headers, data=payload)
                     await asyncio.sleep(5)
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                     logger.info(f"[PAGE_INFO] Successfully connected to new Chrome instance")
 
                 page = await browser.new_page()
@@ -621,11 +633,7 @@ async def get_page_info(env, config: Dict[str, str]):
 
 
 async def get_open_tabs_info(env, config: Dict[str, str]):
-    host = env.vm_ip
-    port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
-    server_port = env.server_port
-
-    remote_debugging_url = f"http://{host}:{port}"
+    remote_debugging_url, cdp_headers = _get_cdp_connection_info(env)
     
     # Configuration for retry and timeout
     max_retries = 2
@@ -638,12 +646,11 @@ async def get_open_tabs_info(env, config: Dict[str, str]):
             async with async_playwright() as p:
                 # connect to remote Chrome instance
                 try:
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                     logger.info(f"[OPEN_TABS_INFO] Successfully connected to existing Chrome instance")
                 except Exception as e:
                     logger.warning(f"[OPEN_TABS_INFO] Failed to connect to existing Chrome instance: {e}")
                     # If the connection fails, start a new browser instance
-                    platform.machine()
                     if "arm" in platform.machine():
                         # start a new browser instance if the connection fails
                         payload = json.dumps({"command": [
@@ -657,10 +664,10 @@ async def get_open_tabs_info(env, config: Dict[str, str]):
                         ], "shell": False})
 
                     headers = {"Content-Type": "application/json"}
-                    requests.post(f"http://{host}:{server_port}/setup/launch", headers=headers, data=payload)
+                    _make_post_request(env, "/setup/launch", headers=headers, data=payload)
                     await asyncio.sleep(5)
                     try:
-                        browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                        browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                         logger.info(f"[OPEN_TABS_INFO] Successfully connected to new Chrome instance")
                     except Exception as e:
                         logger.error(f"[OPEN_TABS_INFO] Failed to connect to new Chrome instance: {e}")
@@ -796,10 +803,7 @@ async def get_active_tab_info(env, config: Dict[str, str]):
         
     logger.info(f"[ACTIVE_TAB_INFO] Active tab URL: {active_tab_url}")
     
-    host = env.vm_ip
-    port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
-
-    remote_debugging_url = f"http://{host}:{port}"
+    remote_debugging_url, cdp_headers = _get_cdp_connection_info(env)
     
     # Configuration for retry and timeout
     max_retries = 2
@@ -812,7 +816,7 @@ async def get_active_tab_info(env, config: Dict[str, str]):
             async with async_playwright() as p:
                 # connect to remote Chrome instance, since it is supposed to be the active one, we won't start a new one if failed
                 try:
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                     logger.info(f"[ACTIVE_TAB_INFO] Successfully connected to Chrome instance")
                 except Exception as e:
                     logger.error(f"[ACTIVE_TAB_INFO] Failed to connect to Chrome instance: {e}")
@@ -879,11 +883,7 @@ async def get_pdf_from_url(env, config: Dict[str, str]) -> str:
     logger.info(f"[PDF_FROM_URL] Starting PDF download from URL: {_url}")
     logger.info(f"[PDF_FROM_URL] Target path: {_path}")
 
-    host = env.vm_ip
-    port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
-    server_port = env.server_port
-
-    remote_debugging_url = f"http://{host}:{port}"
+    remote_debugging_url, cdp_headers = _get_cdp_connection_info(env)
     
     # Configuration for retry and timeout
     max_retries = 3
@@ -895,14 +895,13 @@ async def get_pdf_from_url(env, config: Dict[str, str]) -> str:
             
             async with async_playwright() as p:
                 try:
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                     logger.info(f"[PDF_FROM_URL] Successfully connected to existing Chrome instance")
                 except Exception as e:
                     logger.warning(f"[PDF_FROM_URL] Failed to connect to existing Chrome instance: {e}")
                     logger.info(f"[PDF_FROM_URL] Starting new Chrome instance...")
                     
                     # If the connection fails, start a new browser instance
-                    platform.machine()
                     if "arm" in platform.machine():
                         # start a new browser instance if the connection fails
                         payload = json.dumps({"command": [
@@ -916,9 +915,9 @@ async def get_pdf_from_url(env, config: Dict[str, str]) -> str:
                         ], "shell": False})
 
                     headers = {"Content-Type": "application/json"}
-                    requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
+                    _make_post_request(env, "/setup/launch", headers=headers, data=payload)
                     await asyncio.sleep(5)
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                     logger.info(f"[PDF_FROM_URL] Successfully connected to new Chrome instance")
 
                 page = await browser.new_page()
@@ -983,11 +982,7 @@ async def get_pdf_from_url(env, config: Dict[str, str]) -> str:
 
 # fixme: needs to be changed (maybe through post-processing) since it's not working
 async def get_chrome_saved_address(env, config: Dict[str, str]):
-    host = env.vm_ip
-    port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
-    server_port = env.server_port
-
-    remote_debugging_url = f"http://{host}:{port}"
+    remote_debugging_url, cdp_headers = _get_cdp_connection_info(env)
     
     # Configuration for retry and timeout
     max_retries = 2
@@ -1000,12 +995,11 @@ async def get_chrome_saved_address(env, config: Dict[str, str]):
             async with async_playwright() as p:
                 # connect to remote Chrome instance
                 try:
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                     logger.info(f"[CHROME_SAVED_ADDRESS] Successfully connected to existing Chrome instance")
                 except Exception as e:
                     logger.warning(f"[CHROME_SAVED_ADDRESS] Failed to connect to existing Chrome instance: {e}")
                     # If the connection fails, start a new browser instance
-                    platform.machine()
                     if "arm" in platform.machine():
                         # start a new browser instance if the connection fails
                         payload = json.dumps({"command": [
@@ -1019,9 +1013,9 @@ async def get_chrome_saved_address(env, config: Dict[str, str]):
                         ], "shell": False})
 
                     headers = {"Content-Type": "application/json"}
-                    requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
+                    _make_post_request(env, "/setup/launch", headers=headers, data=payload)
                     await asyncio.sleep(5)
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                     logger.info(f"[CHROME_SAVED_ADDRESS] Successfully connected to new Chrome instance")
 
                 page = await browser.new_page()
@@ -1097,11 +1091,7 @@ def get_shortcuts_on_desktop(env, config: Dict[str, str]):
 async def get_number_of_search_results(env, config: Dict[str, str]):
     # todo: move into the config file
     url, result_selector = "https://google.com/search?q=query", '.search-result'
-    host = env.vm_ip
-    port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
-    server_port = env.server_port
-
-    remote_debugging_url = f"http://{host}:{port}"
+    remote_debugging_url, cdp_headers = _get_cdp_connection_info(env)
     
     # Configuration for retry and timeout
     max_retries = 2
@@ -1113,12 +1103,11 @@ async def get_number_of_search_results(env, config: Dict[str, str]):
             
             async with async_playwright() as p:
                 try:
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                     logger.info(f"[SEARCH_RESULTS] Successfully connected to existing Chrome instance")
                 except Exception as e:
                     logger.warning(f"[SEARCH_RESULTS] Failed to connect to existing Chrome instance: {e}")
                     # If the connection fails, start a new browser instance
-                    platform.machine()
                     if "arm" in platform.machine():
                         # start a new browser instance if the connection fails
                         payload = json.dumps({"command": [
@@ -1132,9 +1121,9 @@ async def get_number_of_search_results(env, config: Dict[str, str]):
                         ], "shell": False})
 
                     headers = {"Content-Type": "application/json"}
-                    requests.post("http://" + host + ":" + server_port + "/setup" + "/launch", headers=headers, data=payload)
+                    _make_post_request(env, "/setup/launch", headers=headers, data=payload)
                     await asyncio.sleep(5)
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                     logger.info(f"[SEARCH_RESULTS] Successfully connected to new Chrome instance")
                     
                 page = await browser.new_page()
@@ -1520,11 +1509,8 @@ async def get_active_tab_html_parse(env, config: Dict[str, Any]):
     if not isinstance(active_tab_url, str):
         logger.error(f"[DEBUG] active_tab_url is not a string, got {type(active_tab_url)}: {active_tab_url}")
         return None
-    host = env.vm_ip
-    port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
-    server_port = env.server_port
-
-    remote_debugging_url = f"http://{host}:{port}"
+    
+    remote_debugging_url, cdp_headers = _get_cdp_connection_info(env)
     
     # DEBUG: Add logging for configuration
     logger.info(f"[DEBUG] get_active_tab_html_parse called with config: {config}")
@@ -1532,10 +1518,9 @@ async def get_active_tab_html_parse(env, config: Dict[str, Any]):
     async with async_playwright() as p:
         # connect to remote Chrome instance
         try:
-            browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+            browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
         except Exception as e:
             # If the connection fails, start a new browser instance
-            platform.machine()
             if "arm" in platform.machine():
                 # start a new browser instance if the connection fails
                 payload = json.dumps({"command": [
@@ -1549,9 +1534,9 @@ async def get_active_tab_html_parse(env, config: Dict[str, Any]):
                 ], "shell": False})
 
             headers = {"Content-Type": "application/json"}
-            requests.post("http://" + host + ":" + str(server_port) + "/setup" + "/launch", headers=headers, data=payload)
+            _make_post_request(env, "/setup/launch", headers=headers, data=payload)
             await asyncio.sleep(5)
-            browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+            browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
         target_page = None
         for context in browser.contexts:
             for page in context.pages:
@@ -1984,13 +1969,8 @@ async def get_gotoRecreationPage_and_get_html_content(env, config: Dict[str, Any
     logger.info(f"[RECREATION_PAGE] Starting recreation.gov page processing")
     logger.debug(f"[RECREATION_PAGE] Config: {config}")
     
-    host = env.vm_ip
-    port = env.chromium_port  # fixme: this port is hard-coded, need to be changed from config file
-    server_port = env.server_port
+    remote_debugging_url, cdp_headers = _get_cdp_connection_info(env)
     use_proxy = env.current_use_proxy
-
-    remote_debugging_url = f"http://{host}:{port}"
-    backend_url = f"http://{host}:{server_port}"
     
     # Configuration for retry and timeout
     max_retries = 3
@@ -2013,7 +1993,7 @@ async def get_gotoRecreationPage_and_get_html_content(env, config: Dict[str, Any
             async with async_playwright() as p:
                 # Connect to remote Chrome instance
                 try:
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                     logger.info(f"[RECREATION_PAGE] Successfully connected to existing Chrome instance")
                 except Exception as e:
                     logger.warning(f"[RECREATION_PAGE] Failed to connect to existing Chrome instance: {e}")
@@ -2038,9 +2018,9 @@ async def get_gotoRecreationPage_and_get_html_content(env, config: Dict[str, Any
                     logger.info(f"[RECREATION_PAGE] Starting browser with command: {' '.join(command)}")
                     payload = json.dumps({"command": command, "shell": False})
                     headers = {"Content-Type": "application/json"}
-                    requests.post(backend_url + "/setup/launch", headers=headers, data=payload)
+                    _make_post_request(env, "/setup/launch", headers=headers, data=payload)
                     await asyncio.sleep(8)  # Give more time for browser to start
-                    browser = await p.chromium.connect_over_cdp(remote_debugging_url)
+                    browser = await p.chromium.connect_over_cdp(remote_debugging_url, headers=cdp_headers or {})
                     logger.info(f"[RECREATION_PAGE] Successfully connected to new Chrome instance")
 
                 page = await browser.new_page()
diff --git a/openhands/nvidia/os_world/getters/general.py b/openhands/nvidia/os_world/getters/general.py
index 30dc239b9..37e13024e 100644
--- a/openhands/nvidia/os_world/getters/general.py
+++ b/openhands/nvidia/os_world/getters/general.py
@@ -3,36 +3,54 @@
 
 from openhands.core.logger import openhands_logger as logger
 
+
+def _make_post_request(env, endpoint: str, **kwargs):
+    """Make a POST request using env.client if available, otherwise direct."""
+    if hasattr(env, 'client') and env.client:
+        return env.client.post(endpoint, **kwargs)
+    else:
+        vm_ip = env.vm_ip
+        port = env.server_port
+        url = f"http://{vm_ip}:{port}{endpoint}"
+        return requests.post(url, **kwargs)
+
+
 def get_vm_command_line(env, config: Dict[str, str]):
-    vm_ip = env.vm_ip
-    port = env.server_port
     command = config["command"]
     shell = config.get("shell", False)
 
-    response = requests.post(f"http://{vm_ip}:{port}/execute", json={"command": command, "shell": shell})
-
-    print(response.json())
+    response = _make_post_request(env, "/execute", json={"command": command, "shell": shell})
 
     if response.status_code == 200:
-        return response.json()["output"]
+        try:
+            result = response.json()
+            logger.debug(f"VM command response: {result}")
+            return result.get("output")
+        except Exception as e:
+            logger.error(f"Failed to parse VM command response: {e}")
+            return None
     else:
-        logger.error("Failed to get vm command line. Status code: %d", response.status_code)
+        logger.error("Failed to get vm command line. Status code: %d, Response: %s", 
+                     response.status_code, response.text[:200] if response.text else "empty")
         return None
 
 def get_vm_command_error(env, config: Dict[str, str]):
-    vm_ip = env.vm_ip
-    port = env.server_port
     command = config["command"]
     shell = config.get("shell", False)
 
-    response = requests.post(f"http://{vm_ip}:{port}/execute", json={"command": command, "shell": shell})
-
-    print(response.json())
+    response = _make_post_request(env, "/execute", json={"command": command, "shell": shell})
 
     if response.status_code == 200:
-        return response.json()["error"]
+        try:
+            result = response.json()
+            logger.debug(f"VM command error response: {result}")
+            return result.get("error")
+        except Exception as e:
+            logger.error(f"Failed to parse VM command error response: {e}")
+            return None
     else:
-        logger.error("Failed to get vm command line error. Status code: %d", response.status_code)
+        logger.error("Failed to get vm command line error. Status code: %d, Response: %s", 
+                     response.status_code, response.text[:200] if response.text else "empty")
         return None
 
 
diff --git a/openhands/nvidia/os_world/getters/vlc.py b/openhands/nvidia/os_world/getters/vlc.py
index c84816acd..d4177ceab 100644
--- a/openhands/nvidia/os_world/getters/vlc.py
+++ b/openhands/nvidia/os_world/getters/vlc.py
@@ -11,14 +11,21 @@ def get_vlc_playing_info(env, config: Dict[str, str]):
     """
     Gets the current playing information from VLC's HTTP interface.
     """
-
-    host = env.vm_ip
-    port = env.vlc_port
     password = 'password'
-
     _path = os.path.join(env.cache_dir, config["dest"])
-    url = f'http://{host}:{port}/requests/status.xml'
-    response = requests.get(url, auth=('', password))
+    
+    # Use http_client if available for VLC URL
+    if hasattr(env, 'client') and env.client:
+        vlc_url = env.client.get_vlc_url()
+        url = f'{vlc_url}/requests/status.xml'
+        headers = env.client.get_cdp_headers() or {}
+        response = requests.get(url, auth=('', password), headers=headers)
+    else:
+        host = env.vm_ip
+        port = env.vlc_port
+        url = f'http://{host}:{port}/requests/status.xml'
+        response = requests.get(url, auth=('', password))
+    
     if response.status_code == 200:
         content = response.content
     else:
diff --git a/openhands/nvidia/os_world/nvcf/__init__.py b/openhands/nvidia/os_world/nvcf/__init__.py
new file mode 100644
index 000000000..637fea4b8
--- /dev/null
+++ b/openhands/nvidia/os_world/nvcf/__init__.py
@@ -0,0 +1,15 @@
+"""OSWorld NVCF: deploy and interact with OSWorld on NVIDIA Cloud Functions (NVCF)."""
+
+from openhands.nvidia.os_world.nvcf.config import (
+    DEFAULT_CONTAINER_IMAGE,
+    OSWorldDeploymentConfig,
+    OSWorldFunctionConfig,
+)
+from openhands.nvidia.os_world.nvcf.deployer import OSWorldDeployer
+
+__all__ = [
+    "OSWorldDeployer",
+    "OSWorldFunctionConfig",
+    "OSWorldDeploymentConfig",
+    "DEFAULT_CONTAINER_IMAGE",
+]
diff --git a/openhands/nvidia/os_world/nvcf/config.py b/openhands/nvidia/os_world/nvcf/config.py
new file mode 100644
index 000000000..4924433bc
--- /dev/null
+++ b/openhands/nvidia/os_world/nvcf/config.py
@@ -0,0 +1,93 @@
+"""Configuration dataclasses for OSWorld NVCF deployment."""
+
+import os
+import logging
+logger = logging.getLogger(__name__)
+from dataclasses import dataclass, field
+from typing import List, Optional
+
+# NGC_ORG = os.environ.get("NGC_ORG", "nvidian")
+# DEFAULT_CONTAINER_IMAGE = f"nvcr.io/{NGC_ORG}/nemo:osworld-linux-2"
+
+DEFAULT_CONTAINER_IMAGE = "nvcr.io/i01fc6pe8nwm/nemo:osworld-linux-2-debug"
+
+
+@dataclass
+class OSWorldFunctionConfig:
+    """Configuration for creating an OSWorld NVCF function.
+
+    Attributes:
+        name: Display name of the function in NVCF.
+        container_image: NGC container image path. Defaults to DEFAULT_CONTAINER_IMAGE.
+        inference_url: Endpoint path for inference requests.
+        inference_port: Port the container exposes for inference.
+        health_uri: Endpoint path for health checks.
+        description: Optional description of the function.
+        container_args: Optional arguments to pass to the container.
+        container_environment_variables: Optional environment variables
+            in the format ["KEY1:value1", "KEY2:value2"].
+        tags: Optional list of tags for the function.
+    """
+
+    name: str = "osworld-linux"
+    container_image: Optional[str] = None  # Set via __post_init__
+    inference_url: str = "/api"
+    inference_port: int = 8000
+    health_uri: str = "/api/version"
+    description: str = "OSWorld Linux environment for AI agent evaluation"
+    container_args: Optional[str] = None
+    container_environment_variables: Optional[List[str]] = None
+    tags: Optional[List[str]] = field(default_factory=lambda: ["osworld"])
+
+    def __post_init__(self):
+        """Set default container image if not provided."""
+        if self.container_image is None:
+            self.container_image = DEFAULT_CONTAINER_IMAGE
+        logger.info(f"Using container image: {self.container_image}")
+
+
+@dataclass
+class OSWorldDeploymentConfig:
+    """Configuration for deploying an OSWorld function.
+
+    Attributes:
+        gpu: GPU type to use (e.g., "L40", "H100", "A100").
+        instance_type: Specific instance type (e.g., "GFN.GPU.L40_1x").
+            If not provided, will use a default based on GPU type.
+        min_instances: Minimum number of instances (0 allows scale-to-zero).
+        max_instances: Maximum number of instances for autoscaling.
+        max_request_concurrency: Maximum concurrent requests per instance.
+        backend: Cluster backend (e.g., "GFN", "AZURE", "GCP", "OCI").
+        regions: Optional list of regions (e.g., ["us-west-2", "us-east-1"]).
+        clusters: Optional list of specific clusters.
+        attributes: Optional list of cluster attributes (e.g., ["HIPAA", "SOC2"]).
+        configuration: Optional dict of helm chart value overrides.
+    """
+
+    gpu: str = "L40S"
+    instance_type: Optional[str] = None
+    min_instances: int = 1
+    max_instances: int = 1
+    max_request_concurrency: int = 1
+    backend: str = "GFN"
+    regions: Optional[List[str]] = None
+    clusters: Optional[List[str]] = None
+    attributes: Optional[List[str]] = None
+    configuration: Optional[dict] = None
+
+    def get_instance_type(self) -> str:
+        """Get the instance type, using a default if not explicitly set."""
+        if self.instance_type:
+            return self.instance_type
+
+        # Default instance types for common GPU/backend combinations
+        # (Experiment.md: use exact type from --list-gpus when auto-detection fails)
+        defaults = {
+            ("GFN", "L40"): "gl40_1.br20_2xlarge",
+            ("GFN", "L40G"): "gl40g_1.br25_2xlarge",
+            ("GFN", "L40S"): "gl40s_4.br25_small",
+            ("GFN", "T10"): "g6.full",
+            ("AZURE", "H100"): "AZURE.GPU.H100_1x",
+            ("GCP", "H100"): "a3-highgpu-8g_1x",
+        }
+        return defaults.get((self.backend, self.gpu), f"{self.backend}.GPU.{self.gpu}_1x")
diff --git a/openhands/nvidia/os_world/nvcf/deployer.py b/openhands/nvidia/os_world/nvcf/deployer.py
new file mode 100644
index 000000000..6cbaed45c
--- /dev/null
+++ b/openhands/nvidia/os_world/nvcf/deployer.py
@@ -0,0 +1,324 @@
+"""OSWorld deployer using the NGC SDK."""
+
+import os
+import time
+from typing import Any, Dict, Iterator, List, Optional
+
+from openhands.nvidia.os_world.nvcf.config import (
+    OSWorldDeploymentConfig,
+    OSWorldFunctionConfig,
+)
+
+# NGC SDK imports (optional dependency: pip install ngcsdk)
+try:
+    from ngcsdk import Client
+    from nvcf.api.deployment_spec import TargetedDeploymentSpecification
+except ImportError as e:
+    Client = None  # type: ignore[misc, assignment]
+    TargetedDeploymentSpecification = None  # type: ignore[misc, assignment]
+    _NGCSDK_IMPORT_ERROR = e
+else:
+    _NGCSDK_IMPORT_ERROR = None
+
+
+class OSWorldDeployer:
+    """Manages OSWorld deployment to NVCF using the NGC SDK.
+
+    This class provides a simplified interface for deploying OSWorld
+    containers to NVIDIA Cloud Functions.
+
+    Example:
+        >>> deployer = OSWorldDeployer(
+        ...     api_key="nvapi-xxx",
+        ...     org_name="my-org",
+        ... )
+        >>> result = deployer.create_function()
+        >>> func_id = result["function"]["id"]
+        >>> ver_id = result["function"]["versionId"]
+        >>> deployer.deploy(func_id, ver_id)
+    """
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        org_name: Optional[str] = None,
+        team_name: str = "no-team",
+    ):
+        """Initialize the OSWorld deployer.
+
+        Args:
+            api_key: NGC API key. If not provided, reads from NGC_API_KEY
+                environment variable.
+            org_name: NGC organization name. If not provided, reads from
+                NGC_ORG environment variable.
+            team_name: NGC team name. Defaults to "no-team".
+
+        Raises:
+            ValueError: If api_key or org_name is not provided and not
+                found in environment variables.
+            ImportError: If ngcsdk is not installed.
+        """
+        if _NGCSDK_IMPORT_ERROR is not None:
+            raise ImportError(
+                "ngcsdk is required for OSWorldDeployer. Install with: pip install ngcsdk"
+            ) from _NGCSDK_IMPORT_ERROR
+
+        api_key = api_key or os.environ.get("NGC_API_KEY")
+        org_name = org_name or os.environ.get("NGC_ORG")
+
+        if not api_key:
+            raise ValueError(
+                "NGC API key required. Provide api_key or set NGC_API_KEY env var."
+            )
+        if not org_name:
+            raise ValueError(
+                "NGC org name required. Provide org_name or set NGC_ORG env var."
+            )
+
+        self._api_key = api_key
+        self._org_name = org_name
+        self._client = Client()
+        self._client.configure(api_key, org_name=org_name, team_name=team_name)
+
+    @property
+    def client(self) -> "Client":
+        """Access the underlying NGC SDK client."""
+        return self._client
+
+    def create_function(
+        self,
+        config: Optional[OSWorldFunctionConfig] = None,
+        function_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Create an OSWorld function in NVCF."""
+        if config is None:
+            config = OSWorldFunctionConfig()
+
+        kwargs: Dict[str, Any] = {
+            "name": config.name,
+            "inference_url": config.inference_url,
+            "container_image": config.container_image,
+            "inference_port": config.inference_port,
+            "health_uri": config.health_uri,
+            "description": config.description,
+        }
+
+        if config.container_args:
+            kwargs["container_args"] = config.container_args
+        if config.container_environment_variables:
+            kwargs["container_environment_variables"] = (
+                config.container_environment_variables
+            )
+        if config.tags:
+            kwargs["tags"] = config.tags
+        if function_id:
+            kwargs["function_id"] = function_id
+
+        return self._client.cloud_function.functions.create(**kwargs)
+
+    def deploy(
+        self,
+        function_id: str,
+        function_version_id: str,
+        config: Optional[OSWorldDeploymentConfig] = None,
+    ) -> Dict[str, Any]:
+        """Deploy an OSWorld function with specified GPU configuration."""
+        if config is None:
+            config = OSWorldDeploymentConfig()
+
+        spec = TargetedDeploymentSpecification(
+            gpu=config.gpu,
+            instance_type=config.get_instance_type(),
+            min_instances=config.min_instances,
+            max_instances=config.max_instances,
+            max_request_concurrency=config.max_request_concurrency,
+            regions=config.regions,
+            clusters=config.clusters,
+            attributes=config.attributes,
+            configuration=config.configuration,
+        )
+
+        return self._client.cloud_function.functions.deployments.create(
+            function_id=function_id,
+            function_version_id=function_version_id,
+            targeted_deployment_specifications=[spec],
+        )
+
+    def update_deployment(
+        self,
+        function_id: str,
+        function_version_id: str,
+        config: OSWorldDeploymentConfig,
+    ) -> Dict[str, Any]:
+        """Update an existing deployment configuration."""
+        spec = TargetedDeploymentSpecification(
+            gpu=config.gpu,
+            instance_type=config.get_instance_type(),
+            min_instances=config.min_instances,
+            max_instances=config.max_instances,
+            max_request_concurrency=config.max_request_concurrency,
+            regions=config.regions,
+            clusters=config.clusters,
+            attributes=config.attributes,
+            configuration=config.configuration,
+        )
+
+        return self._client.cloud_function.functions.deployments.update(
+            function_id=function_id,
+            function_version_id=function_version_id,
+            targeted_deployment_specifications=[spec],
+        )
+
+    def get_function_info(
+        self,
+        function_id: str,
+        function_version_id: str,
+    ) -> Dict[str, Any]:
+        """Get information about a function version."""
+        return self._client.cloud_function.functions.info(
+            function_id=function_id,
+            function_version_id=function_version_id,
+        )
+
+    def get_deployment_info(
+        self,
+        function_id: str,
+        function_version_id: str,
+    ) -> Dict[str, Any]:
+        """Get information about a function's deployment."""
+        return self._client.cloud_function.functions.deployments.info(
+            function_id=function_id,
+            function_version_id=function_version_id,
+        )
+
+    def get_status(
+        self,
+        function_id: str,
+        function_version_id: str,
+    ) -> str:
+        """Get the current status of a function."""
+        info = self.get_function_info(function_id, function_version_id)
+        return info.get("function", {}).get("status", "UNKNOWN")
+
+    def wait_for_active(
+        self,
+        function_id: str,
+        function_version_id: str,
+        timeout: int = 1800,
+        poll_interval: int = 30,
+    ) -> str:
+        """Wait for a function to become ACTIVE."""
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            status = self.get_status(function_id, function_version_id)
+            if status == "ACTIVE":
+                return status
+            if status == "ERROR":
+                raise RuntimeError("Function entered ERROR state")
+            time.sleep(poll_interval)
+
+        raise TimeoutError(
+            f"Function did not become ACTIVE within {timeout} seconds. "
+            f"Last status: {status}"
+        )
+
+    def list_functions(
+        self,
+        name_pattern: Optional[str] = None,
+        access_filter: Optional[List[str]] = None,
+    ) -> Dict[str, Any]:
+        """List functions available to the organization."""
+        return self._client.cloud_function.functions.list(
+            name_pattern=name_pattern,
+            access_filter=access_filter or ["private"],
+        )
+
+    def list_available_gpus(self) -> Dict[str, Any]:
+        """List available GPU types for deployment."""
+        return self._client.cloud_function.gpus.list()
+
+    def get_gpu_info(self, gpu_name: str) -> Dict[str, Any]:
+        """Get detailed information about a specific GPU type."""
+        return self._client.cloud_function.gpus.info(gpu_name)
+
+    def invoke(
+        self,
+        function_id: str,
+        payload: Dict[str, Any],
+        api_key: Optional[str] = None,
+        function_version_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Invoke an OSWorld function."""
+        return self._client.cloud_function.functions.invoke(
+            function_id=function_id,
+            payload=payload,
+            starfleet_api_key=api_key or self._api_key,
+            function_version_id=function_version_id,
+        )
+
+    def invoke_stream(
+        self,
+        function_id: str,
+        payload: Dict[str, Any],
+        api_key: Optional[str] = None,
+        function_version_id: Optional[str] = None,
+    ) -> Iterator[bytes]:
+        """Invoke an OSWorld function with streaming response."""
+        return self._client.cloud_function.functions.invoke_stream(
+            function_id=function_id,
+            payload=payload,
+            starfleet_api_key=api_key or self._api_key,
+            function_version_id=function_version_id,
+        )
+
+    def undeploy(
+        self,
+        function_id: str,
+        function_version_id: str,
+        graceful: bool = True,
+    ) -> None:
+        """Remove a deployment (undeploy a function)."""
+        self._client.cloud_function.functions.deployments.delete(
+            function_id=function_id,
+            function_version_id=function_version_id,
+            graceful=graceful,
+        )
+
+    def delete_function(
+        self,
+        function_id: str,
+        function_version_id: str,
+    ) -> None:
+        """Delete a function version. Must be undeployed first."""
+        self._client.cloud_function.functions.delete(
+            function_id=function_id,
+            function_version_id=function_version_id,
+        )
+
+    def query_deployment_logs(
+        self,
+        function_id: str,
+        function_version_id: str,
+        duration: Optional[str] = None,
+    ) -> Iterator[Dict[str, Any]]:
+        """Query deployment logs for a function."""
+        from datetime import timedelta
+
+        td = None
+        if duration:
+            unit = duration[-1].upper()
+            value = int(duration[:-1])
+            if unit == "H":
+                td = timedelta(hours=value)
+            elif unit == "M":
+                td = timedelta(minutes=value)
+            elif unit == "D":
+                td = timedelta(days=value)
+            elif unit == "S":
+                td = timedelta(seconds=value)
+
+        return self._client.cloud_function.functions.deployments.query_logs(
+            function_id=function_id,
+            function_version_id=function_version_id,
+            duration=td,
+        )
diff --git a/openhands/nvidia/os_world/osworld_utils.py b/openhands/nvidia/os_world/osworld_utils.py
index ff72c385e..da7654b3c 100644
--- a/openhands/nvidia/os_world/osworld_utils.py
+++ b/openhands/nvidia/os_world/osworld_utils.py
@@ -67,7 +67,7 @@ def get_config(
 
     sandbox_config = SandboxConfig(
         base_container_image='ubuntu:24.04',
-        run_as_fakeroot=True,
+        run_as_fakeroot=False,
     )
 
     config = OpenHandsConfig(
@@ -116,11 +116,9 @@ def get_instruction(instance: pd.Series | dict, metadata: EvalMetadata, runtime:
 
     include_screenshot = True #runtime.config.agents['agent'].enable_vision
     include_a11y_tree = True #runtime.config.agents['agent'].enable_a11y_tree
-    instruction = f"""Work on the following task accourding to the UI screenshot.
+    instruction = f"""Work on the following task according to the UI screenshot.
 
 Instruction: {instance['instruction']}
-
-First describe the screenshot in detail, think step by step, then generate the next move.
 """
     
     if include_a11y_tree:
@@ -216,6 +214,7 @@ async def initialize_runtime(runtime: Runtime, instance: dict, metadata: EvalMet
         vm_ip="127.0.0.1",
         server_port=runtime._vm_server_port,
         chromium_port=runtime._chromium_port,
+        vlc_port=runtime._vlc_port,
         cache_dir="/tmp/osworld_example",
         client_password="password",
         runtime=runtime  # Pass your runtime object here
@@ -514,4 +513,4 @@ def eval_exception(job_details: JobDetails, e: Exception):
     }
 ###############################################################################
 # End of exception handling
-###############################################################################
\ No newline at end of file
+###############################################################################
diff --git a/openhands/runtime/__init__.py b/openhands/runtime/__init__.py
index 4ff594d7f..8ce667d88 100644
--- a/openhands/runtime/__init__.py
+++ b/openhands/runtime/__init__.py
@@ -12,6 +12,7 @@
 from openhands.runtime.impl.runloop.runloop_runtime import RunloopRuntime
 from openhands.runtime.impl.singularity.singularity_runtime import SingularityRuntime
 from openhands.runtime.impl.singularity.osworld_singularity_runtime import OSWorldSingularityRuntime
+from openhands.runtime.impl.nvcf import OSWorldNVCFRuntime
 from openhands.utils.import_utils import get_impl
 
 # mypy: disable-error-code="type-abstract"
@@ -27,6 +28,7 @@
     'enroot': EnrootRuntime,
     'singularity': SingularityRuntime,
     'osworld': OSWorldSingularityRuntime,
+    'osworld_nvcf': OSWorldNVCFRuntime,
     'cli': CLIRuntime,
 }
 
@@ -60,5 +62,6 @@ def get_runtime_cls(name: str) -> type[Runtime]:
     'EnrootRuntime',
     'SingularityRuntime',
     'OSWorldSingularityRuntime',
+    'OSWorldNVCFRuntime',
     'get_runtime_cls',
 ]
diff --git a/openhands/runtime/impl/nvcf/__init__.py b/openhands/runtime/impl/nvcf/__init__.py
new file mode 100644
index 000000000..660ea3afd
--- /dev/null
+++ b/openhands/runtime/impl/nvcf/__init__.py
@@ -0,0 +1,9 @@
+"""NVCF (NVIDIA Cloud Functions) runtime implementation."""
+
+from openhands.runtime.impl.nvcf.nvcf_runtime import NVCFRuntime
+from openhands.runtime.impl.nvcf.osworld_nvcf_runtime import OSWorldNVCFRuntime
+
+__all__ = [
+    "NVCFRuntime",
+    "OSWorldNVCFRuntime",
+]
diff --git a/openhands/runtime/impl/nvcf/nvcf_proxy.py b/openhands/runtime/impl/nvcf/nvcf_proxy.py
new file mode 100644
index 000000000..dd2ead1b9
--- /dev/null
+++ b/openhands/runtime/impl/nvcf/nvcf_proxy.py
@@ -0,0 +1,222 @@
+"""Local proxy for NVCF services that injects authentication headers.
+
+This module provides a local HTTP/WebSocket proxy that forwards requests to NVCF
+with the required Authorization and Function-ID headers. This is necessary for
+services like Chrome DevTools Protocol and VLC that don't support custom headers.
+"""
+
+import asyncio
+import socket
+import threading
+import time
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from typing import Optional
+import urllib.request
+import urllib.error
+import ssl
+import json
+
+from openhands.core.logger import openhands_logger as logger
+
+
+class NVCFProxyHandler(BaseHTTPRequestHandler):
+    """HTTP request handler that forwards to NVCF with auth headers."""
+    
+    # Class-level configuration (set by NVCFLocalProxy)
+    nvcf_base_url: str = ""
+    nvcf_path_prefix: str = ""
+    api_key: str = ""
+    function_id: str = ""
+    
+    def log_message(self, format, *args):
+        """Suppress default logging."""
+        pass
+    
+    def _get_target_url(self, path: str) -> str:
+        """Convert local path to NVCF target URL."""
+        # Remove leading slash for clean join
+        path = path.lstrip("/")
+        # Build target URL: base + path_prefix + path
+        prefix = self.nvcf_path_prefix.strip("/")
+        if prefix:
+            return f"{self.nvcf_base_url}/{prefix}/{path}"
+        return f"{self.nvcf_base_url}/{path}"
+    
+    def _forward_request(self, method: str) -> None:
+        """Forward request to NVCF."""
+        target_url = self._get_target_url(self.path)
+        
+        # Read request body
+        content_length = int(self.headers.get('Content-Length', 0))
+        body = self.rfile.read(content_length) if content_length > 0 else None
+        
+        # Build headers
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Function-ID": self.function_id,
+        }
+        
+        # Copy relevant headers from original request
+        for header in ['Content-Type', 'Accept', 'User-Agent']:
+            if header in self.headers:
+                headers[header] = self.headers[header]
+        
+        try:
+            # Create request
+            req = urllib.request.Request(
+                target_url,
+                data=body,
+                headers=headers,
+                method=method,
+            )
+            
+            # Create SSL context that doesn't verify (for simplicity)
+            ctx = ssl.create_default_context()
+            
+            # Make request
+            with urllib.request.urlopen(req, context=ctx, timeout=60) as response:
+                # Send response status
+                self.send_response(response.status)
+                
+                # Forward response headers
+                for header, value in response.headers.items():
+                    if header.lower() not in ('transfer-encoding', 'connection'):
+                        self.send_header(header, value)
+                self.end_headers()
+                
+                # Forward response body
+                self.wfile.write(response.read())
+                
+        except urllib.error.HTTPError as e:
+            self.send_response(e.code)
+            self.send_header('Content-Type', 'text/plain')
+            self.end_headers()
+            error_body = e.read() if e.fp else b''
+            self.wfile.write(error_body or f"HTTP Error {e.code}: {e.reason}".encode())
+        except urllib.error.URLError as e:
+            self.send_response(502)
+            self.send_header('Content-Type', 'text/plain')
+            self.end_headers()
+            self.wfile.write(f"Proxy Error: {e.reason}".encode())
+        except Exception as e:
+            self.send_response(502)
+            self.send_header('Content-Type', 'text/plain')
+            self.end_headers()
+            self.wfile.write(f"Proxy Error: {e}".encode())
+    
+    def do_GET(self):
+        self._forward_request("GET")
+    
+    def do_POST(self):
+        self._forward_request("POST")
+    
+    def do_PUT(self):
+        self._forward_request("PUT")
+    
+    def do_DELETE(self):
+        self._forward_request("DELETE")
+    
+    def do_OPTIONS(self):
+        self._forward_request("OPTIONS")
+    
+    def do_HEAD(self):
+        self._forward_request("HEAD")
+
+
+class NVCFLocalProxy:
+    """Local proxy that forwards requests to NVCF with auth headers.
+    
+    Supports HTTP connections. Used for Chrome DevTools and VLC web interface
+    access through NVCF.
+    
+    Note: This is a simple HTTP proxy. For full WebSocket support (needed for
+    Chrome DevTools), you may need a more sophisticated solution.
+    """
+    
+    def __init__(
+        self,
+        nvcf_base_url: str,
+        nvcf_path_prefix: str,
+        api_key: str,
+        function_id: str,
+        local_port: Optional[int] = None,
+    ):
+        """Initialize the NVCF local proxy.
+        
+        Args:
+            nvcf_base_url: Base URL for NVCF (e.g., https://grpc.nvcf.nvidia.com)
+            nvcf_path_prefix: Path prefix for the service (e.g., /chrome, /vlc)
+            api_key: NGC API key for authentication
+            function_id: NVCF function ID
+            local_port: Local port to listen on (auto-assigned if None)
+        """
+        self.nvcf_base_url = nvcf_base_url.rstrip("/")
+        self.nvcf_path_prefix = nvcf_path_prefix
+        self.api_key = api_key
+        self.function_id = function_id
+        self.local_port = local_port or self._find_available_port()
+        
+        self._server: Optional[HTTPServer] = None
+        self._thread: Optional[threading.Thread] = None
+        self._running = False
+    
+    @staticmethod
+    def _find_available_port() -> int:
+        """Find an available port on localhost."""
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind(("127.0.0.1", 0))
+            s.listen(1)
+            port = s.getsockname()[1]
+        return port
+    
+    @property
+    def local_url(self) -> str:
+        """Get the local proxy URL."""
+        return f"http://127.0.0.1:{self.local_port}"
+    
+    def _create_handler_class(self):
+        """Create a handler class with configuration bound."""
+        class ConfiguredHandler(NVCFProxyHandler):
+            nvcf_base_url = self.nvcf_base_url
+            nvcf_path_prefix = self.nvcf_path_prefix
+            api_key = self.api_key
+            function_id = self.function_id
+        return ConfiguredHandler
+    
+    def _run_server(self) -> None:
+        """Run the HTTP server in a thread."""
+        handler_class = self._create_handler_class()
+        self._server = HTTPServer(("127.0.0.1", self.local_port), handler_class)
+        self._running = True
+        self._server.serve_forever()
+    
+    def start(self) -> None:
+        """Start the proxy server in a background thread."""
+        if self._running:
+            return
+        self._thread = threading.Thread(target=self._run_server, daemon=True)
+        self._thread.start()
+        # Wait for server to be ready
+        for _ in range(50):  # 5 seconds timeout
+            time.sleep(0.1)
+            if self._running:
+                break
+        logger.debug(f"NVCF proxy started on port {self.local_port}")
+    
+    def stop(self) -> None:
+        """Stop the proxy server."""
+        if not self._running:
+            return
+        self._running = False
+        if self._server:
+            self._server.shutdown()
+            self._server = None
+        if self._thread:
+            self._thread.join(timeout=5)
+            self._thread = None
+        logger.debug(f"NVCF proxy stopped on port {self.local_port}")
+
+
+def find_available_port() -> int:
+    """Find an available port on localhost."""
+    return NVCFLocalProxy._find_available_port()
diff --git a/openhands/runtime/impl/nvcf/nvcf_runtime.py b/openhands/runtime/impl/nvcf/nvcf_runtime.py
new file mode 100644
index 000000000..536b4041e
--- /dev/null
+++ b/openhands/runtime/impl/nvcf/nvcf_runtime.py
@@ -0,0 +1,175 @@
+"""Base NVCF runtime: extends ActionExecutionClient with deploy-on-connect and undeploy-on-close.
+
+This runtime does not run a local action execution server; connect() deploys an NVCF
+function (or attaches to an existing one), and close() undeploys if we deployed in
+this session. Subclasses (e.g. OSWorldNVCFRuntime) implement run_action by calling
+the NVCF API.
+"""
+
+import os
+from typing import Any
+
+from openhands.core.config import OpenHandsConfig
+from openhands.core.exceptions import AgentRuntimeDisconnectedError
+from openhands.core.logger import openhands_logger as logger
+from openhands.events import EventStream
+from openhands.events.observation import ErrorObservation, Observation
+from openhands.integrations.provider import PROVIDER_TOKEN_TYPE
+from openhands.runtime.impl.action_execution.action_execution_client import (
+    ActionExecutionClient,
+)
+from openhands.runtime.plugins import PluginRequirement
+
+
+class NVCFRuntime(ActionExecutionClient):
+    """Base runtime for NVIDIA Cloud Functions (NVCF).
+
+    - connect(): Deploys an NVCF function if nvcf_function_id is not set (uses
+      openhands.nvidia.os_world.nvcf), then marks runtime as initialized.
+    - close(): Undeploys the function if we deployed it in this session and
+      undeploy_on_close is True, then closes the session.
+    - check_if_alive(): Raises if runtime is not connected (no action server to ping).
+    - run_action(): Returns ErrorObservation (no local action server); subclasses
+      override to dispatch to NVCF API (e.g. OSWorld).
+    """
+
+    def __init__(
+        self,
+        config: OpenHandsConfig,
+        event_stream: EventStream,
+        sid: str = 'default',
+        plugins: list[PluginRequirement] | None = None,
+        env_vars: dict[str, str] | None = None,
+        status_callback: Any | None = None,
+        attach_to_existing: bool = False,
+        headless_mode: bool = True,
+        user_id: str | None = None,
+        git_provider_tokens: PROVIDER_TOKEN_TYPE | None = None,
+        nvcf_function_id: str | None = None,
+        nvcf_version_id: str | None = None,
+        nvcf_api_key: str | None = None,
+        nvcf_org: str | None = None,
+        nvcf_function_config: Any = None,
+        nvcf_deployment_config: Any = None,
+        undeploy_on_close: bool = True,
+    ):
+        self._nvcf_api_key = nvcf_api_key or os.environ.get("NGC_API_KEY")
+        self._nvcf_org = nvcf_org or os.environ.get("NGC_ORG")
+        _fid = nvcf_function_id or os.environ.get("NVCF_FUNCTION_ID")
+        self._nvcf_function_id = (
+            _fid.strip() if isinstance(_fid, str) and _fid else (_fid or None)
+        )
+        _vid = nvcf_version_id or os.environ.get("NVCF_VERSION_ID")
+        self._nvcf_version_id = (
+            _vid.strip() if isinstance(_vid, str) and _vid else (_vid or None)
+        )
+        self._nvcf_function_config = nvcf_function_config
+        self._nvcf_deployment_config = nvcf_deployment_config
+        self._undeploy_on_close = undeploy_on_close
+        self._we_deployed_this = False
+        self._deployer = None
+
+        if not self._nvcf_api_key:
+            raise ValueError(
+                "NGC API key required for NVCF. Provide nvcf_api_key or set NGC_API_KEY."
+            )
+        if not self._nvcf_function_id and not self._nvcf_org:
+            raise ValueError(
+                "When deploying on connect, NGC org required. "
+                "Provide nvcf_org or set NGC_ORG."
+            )
+
+        super().__init__(
+            config=config,
+            event_stream=event_stream,
+            sid=sid,
+            plugins=plugins,
+            env_vars=env_vars,
+            status_callback=status_callback,
+            attach_to_existing=attach_to_existing,
+            headless_mode=headless_mode,
+            user_id=user_id,
+            git_provider_tokens=git_provider_tokens,
+        )
+
+    def _deploy_nvcf(self) -> tuple[str, str]:
+        """Create and deploy an NVCF function; return (function_id, version_id)."""
+        from openhands.nvidia.os_world.nvcf import (
+            OSWorldDeployer,
+            OSWorldFunctionConfig,
+            OSWorldDeploymentConfig,
+        )
+        func_config = self._nvcf_function_config
+        if func_config is None:
+            func_config = OSWorldFunctionConfig(
+                name=f"nvcf-runtime-{self.sid}",
+                description="NVCF runtime deploy-on-connect",
+            )
+        deploy_config = self._nvcf_deployment_config
+        if deploy_config is None:
+            deploy_config = OSWorldDeploymentConfig(
+                gpu="L40S",
+                min_instances=1,
+                max_instances=1,
+            )
+        self.log("info", "Deploying NVCF function...")
+        self.send_status_message("STATUS$PREPARING_CONTAINER")
+        self._deployer = OSWorldDeployer(
+            api_key=self._nvcf_api_key,
+            org_name=self._nvcf_org,
+        )
+        result = self._deployer.create_function(func_config)
+        function = result.get("function", {})
+        function_id = function.get("id")
+        version_id = function.get("versionId")
+        if not function_id or not version_id:
+            raise RuntimeError(f"NVCF create_function did not return ids: {result}")
+        self.log("info", f"Created function {function_id}; deploying...")
+        self._deployer.deploy(function_id, version_id, deploy_config)
+        self.log("info", "Waiting for NVCF function to become ACTIVE...")
+        self._deployer.wait_for_active(
+            function_id, version_id, timeout=1800, poll_interval=30
+        )
+        self.log("info", "NVCF function is ACTIVE")
+        return function_id, version_id
+
+    async def connect(self) -> None:
+        """Deploy NVCF function if needed, then mark runtime as connected."""
+        self.send_status_message("STATUS$STARTING_RUNTIME")
+        from openhands.utils.async_utils import call_sync_from_async
+        if self._nvcf_function_id is None:
+            self._nvcf_function_id, self._nvcf_version_id = await call_sync_from_async(
+                self._deploy_nvcf
+            )
+            self._we_deployed_this = True
+        self._runtime_initialized = True
+        self.log("info", f"NVCF runtime connected: {self._nvcf_function_id}")
+
+    def check_if_alive(self) -> None:
+        """NVCF runtime has no local action server; require subclass to implement."""
+        if not getattr(self, "_runtime_initialized", False):
+            raise AgentRuntimeDisconnectedError("NVCF runtime is not connected.")
+
+    def run_action(self, action) -> Observation:
+        """Base NVCF runtime does not execute actions; subclasses override for NVCF API."""
+        return ErrorObservation(
+            "This runtime does not support the requested action. "
+            "Use OSWorldNVCFRuntime for OSWorld actions."
+        )
+
+    def close(self, rm_all_containers: bool | None = None) -> None:
+        """Undeploy NVCF function if we deployed in this session, then close session."""
+        if self._we_deployed_this and self._undeploy_on_close and self._deployer:
+            if self._nvcf_function_id and self._nvcf_version_id:
+                try:
+                    self.log("info", "Undeploying NVCF function...")
+                    self._deployer.undeploy(
+                        self._nvcf_function_id,
+                        self._nvcf_version_id,
+                        graceful=True,
+                    )
+                    self.log("info", "NVCF function undeployed")
+                except Exception as e:
+                    logger.warning(f"Failed to undeploy NVCF function: {e}")
+            self._deployer = None
+        super().close()
diff --git a/openhands/runtime/impl/nvcf/osworld_nvcf_runtime.py b/openhands/runtime/impl/nvcf/osworld_nvcf_runtime.py
new file mode 100644
index 000000000..291f25a13
--- /dev/null
+++ b/openhands/runtime/impl/nvcf/osworld_nvcf_runtime.py
@@ -0,0 +1,961 @@
+"""OSWorld NVCF runtime: same API as OSWorldSingularityRuntime but dispatches to NVCF API."""
+
+import os
+import time
+import threading
+from typing import TYPE_CHECKING, Any, Optional
+
+import httpx
+
+from openhands.core.config import OpenHandsConfig
+from openhands.core.exceptions import AgentRuntimeDisconnectedError
+from openhands.core.logger import openhands_logger as logger
+from openhands.events import EventStream
+from openhands.events.tool import ToolCallMetadata
+from openhands.runtime.impl.nvcf.nvcf_runtime import NVCFRuntime
+from openhands.runtime.impl.nvcf.nvcf_proxy import NVCFLocalProxy
+from openhands.runtime.plugins import PluginRequirement
+from openhands.runtime.utils.osworld_http_client import NVCFHttpClient
+
+if TYPE_CHECKING:
+    from openhands.events.observation import Observation
+
+NVCF_API_BASE = "https://grpc.nvcf.nvidia.com/api"
+NVCF_BASE_URL = "https://grpc.nvcf.nvidia.com"
+
+
+class OSWorldNVCFRuntime(NVCFRuntime):
+    """Runtime for OSWorld via NVCF. Same API as OSWorldSingularityRuntime; dispatches to NVCF."""
+
+    def __init__(
+        self,
+        config: OpenHandsConfig,
+        event_stream: EventStream,
+        sid: str = 'default',
+        plugins: list[PluginRequirement] | None = None,
+        env_vars: dict[str, str] | None = None,
+        status_callback: Any | None = None,
+        attach_to_existing: bool = False,
+        headless_mode: bool = True,
+        user_id: str | None = None,
+        git_provider_tokens: Any = None,
+        nvcf_function_id: str | None = None,
+        nvcf_version_id: str | None = None,
+        nvcf_api_key: str | None = None,
+        nvcf_org: str | None = None,
+        nvcf_function_config: Any = None,
+        nvcf_deployment_config: Any = None,
+        undeploy_on_close: bool = True,
+        os_type: str = 'linux',
+        enable_chrome_proxy: bool = True,
+        enable_vlc_proxy: bool = True,
+    ):
+        self.os_type = os_type.lower()
+        self._nvcf_client: httpx.Client | None = None
+        self.screen_size: tuple[int, int] = (1920, 1080)  # updated by get_vm_screen_size
+        
+        # Local proxy settings
+        self._enable_chrome_proxy = enable_chrome_proxy
+        self._enable_vlc_proxy = enable_vlc_proxy
+        self._chrome_proxy: Optional[NVCFLocalProxy] = None
+        self._vlc_proxy: Optional[NVCFLocalProxy] = None
+        
+        super().__init__(
+            config=config,
+            event_stream=event_stream,
+            sid=sid,
+            plugins=plugins,
+            env_vars=env_vars,
+            status_callback=status_callback,
+            attach_to_existing=attach_to_existing,
+            headless_mode=headless_mode,
+            user_id=user_id,
+            git_provider_tokens=git_provider_tokens,
+            nvcf_function_id=nvcf_function_id,
+            nvcf_version_id=nvcf_version_id,
+            nvcf_api_key=nvcf_api_key,
+            nvcf_org=nvcf_org,
+            nvcf_function_config=nvcf_function_config,
+            nvcf_deployment_config=nvcf_deployment_config,
+            undeploy_on_close=undeploy_on_close,
+        )
+
+    async def connect(self) -> None:
+        await super().connect()
+        self.log("debug", "Connecting to OSWorld NVCF function...")
+        headers = {
+            "Authorization": f"Bearer {self._nvcf_api_key}",
+            "Function-ID": self._nvcf_function_id,
+        }
+        self._nvcf_client = httpx.Client(
+            base_url=NVCF_API_BASE,
+            headers=headers,
+            timeout=60.0,
+        )
+        # Verify endpoint and capture NVCF session headers
+        r = self._nvcf_client.get("/screenshot", timeout=15.0)
+        if r.status_code != 200:
+            body = r.text[:500]
+            self._nvcf_client.close()
+            self._nvcf_client = None
+            raise AgentRuntimeDisconnectedError(
+                f"NVCF function returned HTTP {r.status_code}: {body}"
+            )
+        # NVCF stateful functions return session routing headers — persist them
+        self._nvcf_session_headers = {}
+        self.log("info", f"NVCF init response headers: {dict(r.headers)}")
+        for hdr in ("NVCF-REQID", "NVCF-SESSION-ID", "nvcf-reqid", "nvcf-session-id"):
+            val = r.headers.get(hdr)
+            if val:
+                self._nvcf_client.headers[hdr] = val
+                self._nvcf_session_headers[hdr] = val
+                self.log("info", f"Captured NVCF session header: {hdr}={val}")
+        self.log("info", f"NVCF session headers captured: {self._nvcf_session_headers}")
+        self.log("info", f"OSWorld NVCF client ready: {self._nvcf_function_id}")
+
+        # Start NVCF session keepalive to prevent idle timeout (~30-60s)
+        self._keepalive_stop = threading.Event()
+        self._keepalive_thread = threading.Thread(
+            target=self._nvcf_keepalive_loop, daemon=True
+        )
+        self._keepalive_thread.start()
+
+        # Soft-reset VM state from previous job (kill leftover apps, clear temp files)
+        self._reset_vm()
+        # self.log("info", "Skipping VM soft-reset")
+
+        # Start local proxies for Chrome DevTools and VLC
+        self._start_local_proxies()
+    
+    def _reset_vm(self) -> None:
+        """Soft-reset VM state between jobs: kill leftover apps, clear temp files.
+        
+        Important: Do NOT kill chrome, chromium, or socat -- these are part of the
+        VM base state and are expected by setup steps (_chrome_open_tabs_setup uses
+        socat to proxy Chrome DevTools). Setup will re-launch them if needed.
+        """
+        script = """
+# Kill common leftover apps from OSWorld tasks
+# NOTE: Do NOT kill chrome/chromium/socat - they are part of the VM base state
+pkill -f thunderbird || true
+pkill -f libreoffice || true
+pkill -f vlc || true
+pkill -f gimp || true
+pkill -f nautilus || true
+pkill -f gedit || true
+pkill -f "code " || true
+pkill -f evince || true
+pkill -f eog || true
+
+# Clean temp files (screenshot temp files + general tmp)
+rm -f /tmp/tmp*.png 2>/dev/null || true
+rm -f /tmp/tmp*.jpg 2>/dev/null || true
+rm -rf /tmp/tmp[A-Za-z0-9_]* 2>/dev/null || true
+rm -rf /home/user/Downloads/* 2>/dev/null || true
+
+# Remove files uploaded to Desktop by previous jobs
+find /home/user/Desktop -maxdepth 1 -newer /etc/hostname -not -name "*.desktop" -delete 2>/dev/null || true
+
+sleep 0.5
+"""
+        try:
+            r = self._nvcf_post("/run_bash_script", json={"script": script, "timeout": 30})
+            if r.status_code == 200:
+                self.log("info", "VM soft-reset completed successfully")
+            else:
+                self.log("warning", f"VM soft-reset returned HTTP {r.status_code}")
+        except Exception as e:
+            self.log("warning", f"VM soft-reset failed: {e}")
+
+    def _nvcf_keepalive_loop(self) -> None:
+        """Ping the NVCF function every 20s to prevent session idle timeout."""
+        while not self._keepalive_stop.wait(20.0):
+            try:
+                r = self.http_client.get("/platform", timeout=10.0)
+                if r.status_code != 200:
+                    self.log("warning", f"Keepalive got HTTP {r.status_code}")
+            except Exception as e:
+                self.log("warning", f"Keepalive failed: {e}")
+
+    def _start_local_proxies(self) -> None:
+        """Start local proxies for Chrome DevTools and VLC web interface."""
+        if self._enable_chrome_proxy:
+            try:
+                self._chrome_proxy = NVCFLocalProxy(
+                    nvcf_base_url=NVCF_BASE_URL,
+                    nvcf_path_prefix="/chrome",
+                    api_key=self._nvcf_api_key,
+                    function_id=self._nvcf_function_id,
+                )
+                self._chrome_proxy.start()
+                self.log("info", f"Chrome DevTools proxy started at {self._chrome_proxy.local_url}")
+            except Exception as e:
+                self.log("warning", f"Failed to start Chrome proxy: {e}")
+                self._chrome_proxy = None
+        
+        if self._enable_vlc_proxy:
+            try:
+                self._vlc_proxy = NVCFLocalProxy(
+                    nvcf_base_url=NVCF_BASE_URL,
+                    nvcf_path_prefix="/vlc",
+                    api_key=self._nvcf_api_key,
+                    function_id=self._nvcf_function_id,
+                )
+                self._vlc_proxy.start()
+                self.log("info", f"VLC web interface proxy started at {self._vlc_proxy.local_url}")
+            except Exception as e:
+                self.log("warning", f"Failed to start VLC proxy: {e}")
+                self._vlc_proxy = None
+    
+    def _stop_local_proxies(self) -> None:
+        """Stop all local proxies."""
+        if self._chrome_proxy:
+            try:
+                self._chrome_proxy.stop()
+                self.log("debug", "Chrome DevTools proxy stopped")
+            except Exception as e:
+                self.log("warning", f"Failed to stop Chrome proxy: {e}")
+            self._chrome_proxy = None
+        
+        if self._vlc_proxy:
+            try:
+                self._vlc_proxy.stop()
+                self.log("debug", "VLC web interface proxy stopped")
+            except Exception as e:
+                self.log("warning", f"Failed to stop VLC proxy: {e}")
+            self._vlc_proxy = None
+
+    def check_if_alive(self) -> None:
+        if not self._nvcf_client:
+            raise AgentRuntimeDisconnectedError("OSWorld NVCF runtime is not connected.")
+        r = self._nvcf_get("/screenshot", timeout=5.0)
+        if r.status_code != 200:
+            raise AgentRuntimeDisconnectedError("NVCF function is not responding")
+
+    def close(self, rm_all_containers: bool | None = None) -> None:
+        # Stop keepalive thread
+        if hasattr(self, '_keepalive_stop'):
+            self._keepalive_stop.set()
+
+        # Stop local proxies first
+        self._stop_local_proxies()
+
+        # Clear shared http_client
+        if hasattr(self, '_http_client'):
+            self._http_client = None
+
+        if self._nvcf_client:
+            try:
+                self._nvcf_client.close()
+            except Exception as e:
+                logger.warning(f"Failed to close NVCF client: {e}")
+            self._nvcf_client = None
+        super().close(rm_all_containers)
+
+    # --- Properties (match OSWorldSingularityRuntime) ---
+    @property
+    def osworld_vm_url(self) -> str:
+        return NVCF_API_BASE
+
+    @property
+    def vnc_url(self) -> str:
+        # VNC is not currently proxied (would need WebSocket support for noVNC)
+        return "vnc://nvcf-not-available"
+
+    @property
+    def chromium_devtools_url(self) -> str:
+        """Get the Chrome DevTools URL (local proxy or placeholder)."""
+        if self._chrome_proxy and self._chrome_proxy._running:
+            return self._chrome_proxy.local_url
+        return "http://nvcf-not-available"
+    
+    @property
+    def chromium_port(self) -> int:
+        """Get the local Chrome DevTools proxy port."""
+        if self._chrome_proxy and self._chrome_proxy._running:
+            return self._chrome_proxy.local_port
+        return 9222  # Default fallback
+
+    @property
+    def vlc_url(self) -> str:
+        """Get the VLC web interface URL (local proxy or placeholder)."""
+        if self._vlc_proxy and self._vlc_proxy._running:
+            return self._vlc_proxy.local_url
+        return "http://nvcf-not-available"
+    
+    @property
+    def vlc_port(self) -> int:
+        """Get the local VLC proxy port."""
+        if self._vlc_proxy and self._vlc_proxy._running:
+            return self._vlc_proxy.local_port
+        return 8080  # Default fallback
+    
+    @property
+    def vm_ip(self) -> str:
+        """Get the VM IP for setup controller compatibility.
+        
+        For NVCF, this returns localhost since we use local proxies.
+        """
+        return "127.0.0.1"
+    
+    @property
+    def http_client(self):
+        """Get the HTTP client for runtime-agnostic communication.
+
+        Returns a shared NVCFHttpClient that handles NVCF authentication and URL rewriting.
+        Reuses the same instance so NVCF session state is preserved across all callers.
+        """
+        if not hasattr(self, '_http_client') or self._http_client is None:
+            self._http_client = NVCFHttpClient(
+                api_key=self._nvcf_api_key,
+                function_id=self._nvcf_function_id,
+                session_headers=getattr(self, '_nvcf_session_headers', None),
+            )
+        return self._http_client
+
+    # --- OSWorld API (NVCF HTTP) ---
+    def _nvcf_get(self, endpoint: str, **kwargs):
+        """GET via shared http_client (requests-based) to maintain NVCF session."""
+        return self.http_client.get(endpoint, **kwargs)
+
+    def _nvcf_post(self, endpoint: str, **kwargs):
+        """POST via shared http_client (requests-based) to maintain NVCF session."""
+        return self.http_client.post(endpoint, **kwargs)
+
+    def get_vm_screenshot(self) -> bytes | None:
+        max_retries = 5
+        for attempt in range(max_retries):
+            try:
+                r = self._nvcf_get("/screenshot", timeout=30.0)
+                if r.status_code == 200:
+                    return r.content
+                body = r.text[:200] if r.text else ""
+                self.log("warning",
+                    f"Screenshot attempt {attempt + 1}/{max_retries} failed: "
+                    f"HTTP {r.status_code} fn={self._nvcf_function_id} body={body}")
+            except Exception as e:
+                self.log("warning", f"Screenshot attempt {attempt + 1}/{max_retries} failed: {e}")
+            if attempt < max_retries - 1:
+                backoff = min(5 * (2 ** attempt), 30)  # 5s, 10s, 20s, 30s
+                time.sleep(backoff)
+        self.log("error", f"Failed to get VM screenshot after {max_retries} retries (fn={self._nvcf_function_id})")
+        return None
+
+    def get_vm_accessibility_tree(self) -> str | None:
+        try:
+            r = self._nvcf_get("/accessibility", timeout=30.0)
+            if r.status_code != 200:
+                return None
+            try:
+                return r.json().get("AT") or r.json().get("accessibility_tree") or r.text
+            except Exception:
+                return r.text
+        except Exception as e:
+            self.log("error", f"Failed to get VM accessibility tree: {e}")
+            return None
+
+    def _execute_pyautogui_command(self, pyautogui_command: str) -> dict:
+        command = (
+            "import pyautogui; import time; pyautogui.FAILSAFE = False; "
+            + pyautogui_command
+        )
+        payload = {"command": ["python", "-c", command], "shell": False}
+        max_retries = 5
+        for attempt in range(max_retries):
+            try:
+                r = self._nvcf_post("/execute", json=payload, timeout=30.0)
+                if r.status_code == 200:
+                    return r.json()
+                body = r.text[:200] if r.text else ""
+                self.log("warning",
+                    f"Execute attempt {attempt + 1}/{max_retries} failed: "
+                    f"HTTP {r.status_code} fn={self._nvcf_function_id} body={body}")
+            except Exception as e:
+                self.log("warning", f"Execute attempt {attempt + 1}/{max_retries} failed: {e}")
+            if attempt < max_retries - 1:
+                backoff = min(5 * (2 ** attempt), 30)  # 5s, 10s, 20s, 30s
+                time.sleep(backoff)
+        return {"status": "error", "message": f"Failed after {max_retries} retries"}
+
+    def _action_to_pyautogui_command(self, action_type: str, parameters: dict) -> str | None:
+        import random
+        move_mode = random.choice([
+            "pyautogui.easeInQuad", "pyautogui.easeOutQuad", "pyautogui.easeInOutQuad",
+            "pyautogui.easeInBounce", "pyautogui.easeInElastic",
+        ])
+        if action_type == "CLICK":
+            x, y = parameters.get("x"), parameters.get("y")
+            button = parameters.get("button", "left")
+            num_clicks = parameters.get("clicks", 1)
+            interval = parameters.get("interval", 0.0)
+            duration = parameters.get("duration", 0.0)
+            if x is not None and y is not None:
+                return f"pyautogui.click(x={x}, y={y}, button='{button}', clicks={num_clicks}, interval={interval}, duration={duration})"
+            return "pyautogui.click()"
+        elif action_type == "DOUBLE_CLICK":
+            x, y = parameters.get("x"), parameters.get("y")
+            button = parameters.get("button", "left")
+            interval = parameters.get("interval", 0.0)
+            duration = parameters.get("duration", 0.0)
+            if x is not None and y is not None:
+                return f"pyautogui.doubleClick(x={x}, y={y}, button='{button}', interval={interval}, duration={duration})"
+            return "pyautogui.doubleClick()"
+        elif action_type == "TRIPLE_CLICK":
+            x, y = parameters.get("x"), parameters.get("y")
+            button = parameters.get("button", "left")
+            interval = parameters.get("interval", 0.0)
+            duration = parameters.get("duration", 0.0)
+            if x is not None and y is not None:
+                return f"pyautogui.tripleClick(x={x}, y={y}, button='{button}', interval={interval}, duration={duration})"
+            return "pyautogui.tripleClick()"
+        elif action_type == "RIGHT_CLICK":
+            x, y = parameters.get("x"), parameters.get("y")
+            interval = parameters.get("interval", 0.0)
+            duration = parameters.get("duration", 0.0)
+            if x is not None and y is not None:
+                return f"pyautogui.rightClick(x={x}, y={y}, interval={interval}, duration={duration})"
+            return "pyautogui.rightClick()"
+        elif action_type == "MIDDLE_CLICK":
+            x, y = parameters.get("x"), parameters.get("y")
+            interval = parameters.get("interval", 0.0)
+            duration = parameters.get("duration", 0.0)
+            if x is not None and y is not None:
+                return f"pyautogui.middleClick(x={x}, y={y}, interval={interval}, duration={duration})"
+            return "pyautogui.click(button='middle')"
+        elif action_type == "MOVE_TO":
+            x, y = parameters.get("x"), parameters.get("y")
+            duration = parameters.get("duration", 0.0)
+            if x is not None and y is not None:
+                return f"pyautogui.moveTo({x}, {y}, {duration}, {move_mode})"
+            return "pyautogui.moveTo()"
+        elif action_type == "DRAG_TO":
+            x, y = parameters.get("x"), parameters.get("y")
+            duration = parameters.get("duration", 0.0)
+            button = parameters.get("button", "left")
+            mouseDownUp = parameters.get("mouseDownUp", True)
+            if x is not None and y is not None:
+                return f"pyautogui.dragTo({x}, {y}, button='{button}', duration={duration}, mouseDownUp={mouseDownUp})"
+            return None
+        elif action_type == "SCROLL":
+            x, y = parameters.get("x"), parameters.get("y")
+            amount = parameters.get("amount", 1)
+            return f"pyautogui.scroll({amount}, x={x}, y={y})"
+        elif action_type == "HSCROLL":
+            x, y = parameters.get("x"), parameters.get("y")
+            amount = parameters.get("amount", 1)
+            return f"pyautogui.hscroll({amount}, x={x}, y={y})"
+        elif action_type == "TYPING":
+            text = parameters.get("text", "")
+            interval = parameters.get("interval", 0.0)
+            return f"pyautogui.typewrite({repr(text)}, interval={interval})"
+        elif action_type == "PRESS":
+            key = parameters.get("key", "")
+            presses = parameters.get("presses", 1)
+            if isinstance(key, list):
+                return f"pyautogui.hotkey({', '.join(repr(k) for k in key)})"
+            if presses > 1:
+                return f"pyautogui.press('{key}', presses={presses})"
+            return f"pyautogui.press('{key}')"
+        elif action_type == "HOTKEY":
+            keys = parameters.get("keys", [])
+            if isinstance(keys, list) and keys:
+                return f"pyautogui.hotkey({', '.join(repr(k) for k in keys)})"
+            return None
+        elif action_type == "KEY_DOWN":
+            return f"pyautogui.keyDown('{parameters.get('key', '')}')"
+        elif action_type == "KEY_UP":
+            return f"pyautogui.keyUp('{parameters.get('key', '')}')"
+        elif action_type == "MOUSE_DOWN":
+            return f"pyautogui.mouseDown(button='{parameters.get('button', 'left')}')"
+        elif action_type == "MOUSE_UP":
+            return f"pyautogui.mouseUp(button='{parameters.get('button', 'left')}')"
+        elif action_type == "WAIT":
+            return f"time.sleep({parameters.get('seconds', 1)})"
+        return None
+
+    def execute_vm_action(self, action_data: dict) -> dict:
+        action_type = action_data.get("action_type")
+        parameters = action_data.get("parameters", {})
+        cmd = self._action_to_pyautogui_command(action_type, parameters)
+        if cmd is None:
+            return {"status": "error", "message": f"Unknown action type: {action_type}"}
+        return self._execute_pyautogui_command(cmd)
+
+    def run_action(self, action) -> "Observation":
+        from openhands.events.action.os import OSWorldInteractiveAction
+        if isinstance(action, OSWorldInteractiveAction):
+            return self.osworld_interactive(action)
+        return super().run_action(action)
+
+    def osworld_interactive(self, action) -> "Observation":
+        from openhands.events.observation import ErrorObservation
+        method = action.method
+        params = action.params or {}
+        try:
+            if method == "execute_action":
+                return self._handle_execute_action(params)
+            if method == "execute_agentic_action":
+                return self._handle_execute_agentic_action(
+                    params, action.tool_call_metadata, action.pause_time
+                )
+            if method == "get_screenshot":
+                return self._handle_get_screenshot()
+            if method == "get_accessibility_tree":
+                return self._handle_get_accessibility_tree()
+            if method == "get_terminal_output":
+                return self._handle_get_terminal_output()
+            if method == "get_file":
+                return self._handle_get_file(params)
+            if method == "execute_python_command":
+                return self._handle_execute_python_command(params)
+            if method == "run_python_script":
+                return self._handle_run_python_script(params)
+            if method == "run_bash_script":
+                return self._handle_run_bash_script(params)
+            if method == "start_recording":
+                return self._handle_start_recording()
+            if method == "end_recording":
+                return self._handle_end_recording(params)
+            if method == "get_vm_platform":
+                return self._handle_get_vm_platform()
+            if method == "get_vm_screen_size":
+                return self._handle_get_vm_screen_size()
+            if method == "get_vm_window_size":
+                return self._handle_get_vm_window_size(params)
+            if method == "get_vm_wallpaper":
+                return self._handle_get_vm_wallpaper()
+            if method == "get_vm_desktop_path":
+                return self._handle_get_vm_desktop_path()
+            if method == "get_vm_directory_tree":
+                return self._handle_get_vm_directory_tree(params)
+            return ErrorObservation(f"Unknown OSWorld method: {method}")
+        except Exception as e:
+            self.log("error", f"OSWorld action failed: {e}")
+            return ErrorObservation(str(e))
+
+    def _handle_execute_action(self, params: dict) -> "Observation":
+        from openhands.events.observation import CmdOutputObservation
+        action_data = params.get("action", params)
+        result = self.execute_vm_action(action_data)
+        if result.get("status") == "success":
+            return CmdOutputObservation(
+                content=result.get("output", "Action executed successfully"),
+                command=str(action_data),
+                exit_code=0,
+            )
+        return CmdOutputObservation(
+            content=result.get("error", result.get("message", "Unknown error")),
+            command=str(action_data),
+            exit_code=1,
+        )
+
+    def _handle_execute_agentic_action(
+        self, params: dict, tool_call_metadata: ToolCallMetadata | None, pause_time: float = 0.0
+    ) -> "Observation":
+        from openhands.events.observation import ErrorObservation
+        from openhands.events.observation.osworld import OSWorldOutputObservation
+        import base64
+        action_data = params.get("action", params)
+        if not getattr(self, "screen_size", None):
+            self._handle_get_vm_screen_size()
+        width, height = self.screen_size
+        if "parameters" in action_data:
+            p = action_data["parameters"]
+            if "x" in p:
+                p["x"] = int(p["x"] * width)
+            if "y" in p:
+                p["y"] = int(p["y"] * height)
+        result = self.execute_vm_action(action_data)
+        if result.get("status") != "success":
+            return ErrorObservation(
+                result.get("error", result.get("message", "Unknown error")),
+                error_id=getattr(tool_call_metadata, "tool_call_id", None),
+                name=getattr(tool_call_metadata, "function_name", None),
+            )
+        if pause_time > 0.5:
+            time.sleep(pause_time)
+        screenshot_b64 = None
+        screenshot_bytes = self.get_vm_screenshot()
+        if screenshot_bytes:
+            screenshot_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
+        a11y = self.get_vm_accessibility_tree()
+        return OSWorldOutputObservation(
+            command=str(action_data),
+            screenshot=screenshot_b64,
+            accessibility_tree=a11y,
+            tool_call_id=tool_call_metadata.tool_call_id if tool_call_metadata else None,
+            name=tool_call_metadata.function_name if tool_call_metadata else None,
+        )
+
+    def _handle_get_screenshot(self) -> "Observation":
+        from openhands.events.observation import CmdOutputObservation, ErrorObservation
+        # NVCF may need a moment after connect; retry once to avoid transient failure
+        screenshot_bytes = self.get_vm_screenshot()
+        if not screenshot_bytes:
+            time.sleep(2.0)
+            screenshot_bytes = self.get_vm_screenshot()
+        if screenshot_bytes:
+            return CmdOutputObservation(
+                content="Screenshot captured",
+                command="get_screenshot",
+                exit_code=0,
+            )
+        return ErrorObservation("Failed to capture screenshot")
+
+    def _handle_get_accessibility_tree(self) -> "Observation":
+        from openhands.events.observation import CmdOutputObservation
+        # NVCF: GET /api/accessibility only
+        at = self.get_vm_accessibility_tree()
+        if at:
+            return CmdOutputObservation(content=at, command="get_accessibility_tree", exit_code=0)
+        return CmdOutputObservation(content="", command="get_accessibility_tree", exit_code=0)
+
+    def _handle_get_terminal_output(self) -> "Observation":
+        from openhands.events.observation import CmdOutputObservation, ErrorObservation
+        for attempt in range(5):
+            try:
+                r = self._nvcf_get("/terminal", timeout=30.0)
+                if r.status_code == 200:
+                    output = r.json().get("output") or ""
+                    return CmdOutputObservation(
+                        content=output,
+                        command="get_terminal_output",
+                        exit_code=0,
+                    )
+                body = r.text[:200] if r.text else ""
+                self.log("warning", f"Terminal output attempt {attempt + 1}/5: HTTP {r.status_code} body={body}")
+            except Exception as e:
+                self.log("warning", f"Terminal output attempt {attempt + 1}/5: {e}")
+            if attempt < 4:
+                time.sleep(5.0)
+        return ErrorObservation("Failed to get terminal output after 5 retries")
+
+    def _handle_get_file(self, params: dict) -> "Observation":
+        from openhands.events.observation import CmdOutputObservation, ErrorObservation
+        import base64
+        file_path = params.get("file_path", "")
+        if not file_path:
+            return ErrorObservation("file_path parameter required")
+        try:
+            r = self._nvcf_post(
+                "/file",
+                data={"file_path": file_path},
+                timeout=30.0,
+            )
+            if r.status_code == 200:
+                content_b64 = base64.b64encode(r.content).decode("utf-8")
+                return CmdOutputObservation(
+                    content=f"base64:{content_b64}",
+                    command=f"get_file {file_path}",
+                    exit_code=0,
+                )
+            return ErrorObservation(f"Failed to get file: {r.status_code}")
+        except Exception as e:
+            return ErrorObservation(f"Failed to get file: {e}")
+
+    def _execute_pyautogui_command(self, pyautogui_command: str) -> dict:
+        """Execute a PyAutoGUI command string in the VM (same as singularity).
+
+        Args:
+            pyautogui_command: Raw PyAutoGUI command(s) to execute
+
+        Returns:
+            Response dictionary from OSWorld server (status, output, error, returncode or message).
+        """
+        wrapped = (
+            "import pyautogui; import time; pyautogui.FAILSAFE = False; "
+            f"{pyautogui_command}"
+        )
+        payload = {"command": ["python", "-c", wrapped], "shell": False}
+        max_retries = 5
+        for attempt in range(max_retries):
+            try:
+                r = self._nvcf_post("/execute", json=payload, timeout=30.0)
+                if r.status_code == 200:
+                    return r.json()
+                body = r.text[:200] if r.text else ""
+                self.log("warning",
+                    f"Execute attempt {attempt + 1}/{max_retries} failed: "
+                    f"HTTP {r.status_code} fn={self._nvcf_function_id} body={body}")
+            except Exception as e:
+                self.log("warning", f"Execute attempt {attempt + 1}/{max_retries} failed: {e}")
+            if attempt < max_retries - 1:
+                time.sleep(5.0)
+        self.log("error", f"Failed to execute PyAutoGUI command after {max_retries} retries: {pyautogui_command[:100]}")
+        return {"status": "error", "message": f"Failed after {max_retries} retries"}
+
+    def _handle_execute_python_command(self, params: dict) -> "Observation":
+        """Handle execute_python_command - raw Python command execution (PyAutoGUI-style, same as singularity)."""
+        from openhands.events.observation import CmdOutputObservation
+
+        command = params.get("command", "")
+        if not command:
+            return CmdOutputObservation(
+                content="Error: command parameter required",
+                command="execute_python_command",
+                exit_code=1,
+            )
+
+        result = self._execute_pyautogui_command(command)
+
+        if result.get("status") == "success":
+            return CmdOutputObservation(
+                content=result.get("output", ""),
+                command=command,
+                exit_code=result.get("returncode", 0),
+            )
+        else:
+            error_msg = result.get("error", result.get("message", "Unknown error"))
+            return CmdOutputObservation(
+                content=f"Error: {error_msg}",
+                command=command,
+                exit_code=result.get("returncode", 1),
+            )
+
+    def _handle_run_python_script(self, params: dict) -> "Observation":
+        from openhands.events.observation import CmdOutputObservation
+        script = params.get("script", "")
+        if not script:
+            return CmdOutputObservation(content="script parameter required", command="run_python_script", exit_code=1)
+        try:
+            r = self._nvcf_post("/run_python", json={"code": script}, timeout=90.0)
+            if r.status_code == 200:
+                res = r.json()
+                out, err = res.get("output", ""), res.get("error", "")
+                return CmdOutputObservation(
+                    content=f"Output:\n{out}" + (f"\nError:\n{err}" if err else ""),
+                    command="run_python_script",
+                    exit_code=res.get("returncode", 0),
+                )
+            return CmdOutputObservation(content=r.text or "Run failed", command="run_python_script", exit_code=1)
+        except Exception as e:
+            return CmdOutputObservation(content=str(e), command="run_python_script", exit_code=1)
+
+    def _handle_run_bash_script(self, params: dict) -> "Observation":
+        """Handle run_bash_script. Uses POST /api/run_bash_script."""
+        from openhands.events.observation import CmdOutputObservation, ErrorObservation
+
+        script = params.get("script", "")
+        if not script:
+            return ErrorObservation("script parameter required")
+        timeout = params.get("timeout", 30)
+        working_dir = params.get("working_dir")
+
+        payload = {"script": script, "timeout": timeout}
+        if working_dir is not None:
+            payload["working_dir"] = working_dir
+
+        for attempt in range(5):
+            try:
+                r = self._nvcf_post(
+                    "/run_bash_script",
+                    json=payload,
+                    timeout=timeout + 10.0,
+                )
+                if r.status_code == 200:
+                    result = r.json()
+                    output = result.get("output", "")
+                    error = result.get("error", "")
+                    content = output
+                    if error:
+                        content = f"{content}\n{error}" if content else error
+                    return CmdOutputObservation(
+                        content=content,
+                        command="run_bash_script",
+                        exit_code=result.get("returncode", 0),
+                    )
+                if r.status_code in (404, 502, 503, 504):
+                    body = r.text[:200] if r.text else ""
+                    self.log("warning",
+                        f"run_bash_script attempt {attempt + 1}/5: HTTP {r.status_code} body={body}")
+                    if attempt < 4:
+                        time.sleep(5.0)
+                        continue
+                try:
+                    error_detail = r.json()
+                    error_msg = error_detail.get("output", error_detail.get("message", "Unknown error"))
+                except Exception:
+                    error_msg = r.text or "Unknown error"
+                return ErrorObservation(f"Failed to run bash script (HTTP {r.status_code}): {error_msg}")
+            except Exception as e:
+                self.log("warning", f"run_bash_script attempt {attempt + 1}/5: {e}")
+                if attempt < 4:
+                    time.sleep(5.0)
+                    continue
+                return ErrorObservation(f"Failed to run bash script: {e}")
+        return ErrorObservation("Failed to run bash script after 5 retries")
+
+    def _handle_start_recording(self) -> "Observation":
+        """Handle start_recording. Uses POST /api/start_recording."""
+        from openhands.events.observation import CmdOutputObservation, ErrorObservation
+
+        try:
+            r = self._nvcf_post("/start_recording", timeout=10.0)
+            if r.status_code == 200:
+                return CmdOutputObservation(
+                    content="Recording started",
+                    command="start_recording",
+                    exit_code=0,
+                )
+            return ErrorObservation(f"Failed to start recording: {r.status_code}")
+        except Exception as e:
+            return ErrorObservation(f"Failed to start recording: {e}")
+
+    def _handle_end_recording(self, params: dict) -> "Observation":
+        """Handle end_recording. POST /api/end_recording returns video file (binary); return as base64."""
+        from openhands.events.observation import CmdOutputObservation, ErrorObservation
+        import base64
+
+        try:
+            r = self._nvcf_post("/end_recording", timeout=60.0)
+            if r.status_code == 200:
+                video_content = r.content
+                content_b64 = base64.b64encode(video_content).decode("utf-8")
+                return CmdOutputObservation(
+                    content=f"base64:{content_b64}",
+                    command="end_recording",
+                    exit_code=0,
+                )
+            return ErrorObservation(f"Failed to end recording: {r.status_code}")
+        except Exception as e:
+            return ErrorObservation(f"Failed to end recording: {e}")
+
+    def _handle_get_vm_platform(self) -> "Observation":
+        """Handle get_vm_platform. Uses GET /api/platform (returns plain text e.g. Linux)."""
+        from openhands.events.observation import CmdOutputObservation
+
+        try:
+            r = self._nvcf_get("/platform", timeout=30.0)
+            if r.status_code == 200:
+                platform_str = (r.text or "").strip()
+                return CmdOutputObservation(
+                    content=platform_str or "Unknown",
+                    command="get_vm_platform",
+                    exit_code=0,
+                )
+        except Exception:
+            pass
+        result = self._execute_pyautogui_command("import platform; print(platform.system())")
+        if result.get("status") == "success":
+            return CmdOutputObservation(
+                content=result.get("output", "").strip() or "Unknown",
+                command="get_vm_platform",
+                exit_code=0,
+            )
+        return CmdOutputObservation(content="Unknown", command="get_vm_platform", exit_code=1)
+
+    def _handle_get_vm_screen_size(self) -> "Observation":
+        """Handle get_vm_screen_size. Uses POST /api/screen_size."""
+        from openhands.events.observation import CmdOutputObservation, ErrorObservation
+
+        try:
+            if hasattr(self, "screen_size") and self.screen_size:
+                width, height = self.screen_size
+            else:
+                r = self._nvcf_post("/screen_size", timeout=30.0)
+                if r.status_code != 200:
+                    return ErrorObservation(f"Failed to get screen size: {r.status_code}")
+                size = r.json()
+                width = size.get("width", 1920)
+                height = size.get("height", 1080)
+                self.screen_size = (width, height)
+            return CmdOutputObservation(
+                content=f"Width: {width}, Height: {height}",
+                command="get_vm_screen_size",
+                exit_code=0,
+            )
+        except Exception as e:
+            return ErrorObservation(f"Failed to get screen size: {e}")
+
+    def _handle_get_vm_window_size(self, params: dict) -> "Observation":
+        from openhands.events.observation import CmdOutputObservation
+        # NVCF has no /api/window_size; use /api/execute with wmctrl only
+        app = params.get("app_class_name", "") or "window"
+        try:
+            payload = {"command": ["wmctrl", "-l", "-G"], "shell": False}
+            r = self._nvcf_post("/execute", json=payload, timeout=30.0)
+            if r.status_code == 200:
+                res = r.json()
+                out = (res.get("output") or "").strip()
+                if out:
+                    return CmdOutputObservation(
+                        content=out[:2000],
+                        command=f"get_vm_window_size {app}",
+                        exit_code=0,
+                    )
+            return CmdOutputObservation(
+                content="Window geometry not available (use get_vm_screen_size for display size).",
+                command=f"get_vm_window_size {app}",
+                exit_code=0,
+            )
+        except Exception:
+            return CmdOutputObservation(
+                content="Window geometry not available (use get_vm_screen_size for display size).",
+                command=f"get_vm_window_size {app}",
+                exit_code=0,
+            )
+
+    def _handle_get_vm_wallpaper(self) -> "Observation":
+        """Handle get_vm_wallpaper. POST /api/wallpaper returns wallpaper image (binary); return as base64."""
+        from openhands.events.observation import CmdOutputObservation, ErrorObservation
+        import base64
+
+        try:
+            r = self._nvcf_post("/wallpaper", timeout=30.0)
+            if r.status_code == 200:
+                wallpaper_bytes = r.content
+                content_b64 = base64.b64encode(wallpaper_bytes).decode("utf-8")
+                return CmdOutputObservation(
+                    content=f"base64:{content_b64}",
+                    command="get_vm_wallpaper",
+                    exit_code=0,
+                )
+            return ErrorObservation(f"Failed to get wallpaper: {r.status_code}")
+        except Exception as e:
+            return ErrorObservation(f"Failed to get wallpaper: {e}")
+
+    def _handle_get_vm_desktop_path(self) -> "Observation":
+        """Handle get_vm_desktop_path. Uses POST /api/desktop_path."""
+        from openhands.events.observation import CmdOutputObservation, ErrorObservation
+
+        try:
+            r = self._nvcf_post("/desktop_path", timeout=30.0)
+            if r.status_code == 200:
+                desktop_path = r.json().get("desktop_path", "")
+                return CmdOutputObservation(
+                    content=desktop_path,
+                    command="get_vm_desktop_path",
+                    exit_code=0,
+                )
+            return ErrorObservation(f"Failed to get desktop path: {r.status_code}")
+        except Exception as e:
+            return ErrorObservation(f"Failed to get desktop path: {e}")
+
+    def _handle_get_vm_directory_tree(self, params: dict) -> "Observation":
+        """Handle get_vm_directory_tree. Uses POST /api/list_directory with JSON {path}."""
+        from openhands.events.observation import CmdOutputObservation, ErrorObservation
+        import json
+
+        path = params.get("path", "")
+        if not path:
+            return ErrorObservation("path parameter required")
+        try:
+            r = self._nvcf_post(
+                "/list_directory",
+                json={"path": path},
+                timeout=30.0,
+            )
+            if r.status_code == 200:
+                directory_tree = r.json().get("directory_tree", {})
+                content = json.dumps(directory_tree, indent=2)
+                return CmdOutputObservation(
+                    content=content,
+                    command=f"get_vm_directory_tree {path}",
+                    exit_code=0,
+                )
+            return ErrorObservation(f"Failed to get directory tree: {r.status_code}")
+        except Exception as e:
+            return ErrorObservation(f"Failed to get directory tree: {e}")
+
+    def get_microagents_from_selected_repo(self, selected_repository: str | None):
+        return []
diff --git a/openhands/runtime/impl/singularity/osworld_singularity_runtime.py b/openhands/runtime/impl/singularity/osworld_singularity_runtime.py
index 85b5c18d1..37260248d 100644
--- a/openhands/runtime/impl/singularity/osworld_singularity_runtime.py
+++ b/openhands/runtime/impl/singularity/osworld_singularity_runtime.py
@@ -32,6 +32,7 @@
 from openhands.runtime.plugins import PluginRequirement
 from openhands.runtime.utils import find_available_tcp_port
 from openhands.runtime.utils.command import DEFAULT_MAIN_MODULE
+from openhands.runtime.utils.osworld_http_client import DirectHttpClient
 
 from openhands.events.tool import ToolCallMetadata
 
@@ -586,6 +587,19 @@ def vlc_url(self) -> str:
         """
         return f'http://localhost:{self._vlc_port}'
 
+    
+    @property
+    def http_client(self):
+        """Get the HTTP client for runtime-agnostic communication.
+        
+        Returns a DirectHttpClient that makes direct HTTP requests to the VM.
+        """
+        return DirectHttpClient(
+            base_url=self.osworld_vm_url,
+            chromium_port=self._chromium_port,
+            vlc_port=self._vlc_port
+        )
+    
     def get_vm_screenshot(self) -> bytes | None:
         """Get screenshot from the VM.
 
diff --git a/openhands/runtime/utils/osworld_http_client.py b/openhands/runtime/utils/osworld_http_client.py
new file mode 100644
index 000000000..76b02e591
--- /dev/null
+++ b/openhands/runtime/utils/osworld_http_client.py
@@ -0,0 +1,216 @@
+"""HTTP client abstraction for OSWorld VM communication.
+
+This module provides a unified HTTP client interface that works with both:
+- Singularity runtime (direct HTTP to localhost)
+- NVCF runtime (authenticated HTTPS to NVIDIA cloud)
+
+This module is placed in openhands.runtime.utils to avoid circular imports
+with openhands.nvidia, which has side effects on import (handler registration).
+"""
+
+from typing import Protocol, Optional, Dict
+import requests
+
+from openhands.core.logger import openhands_logger as logger
+
+
+class OSWorldHttpClient(Protocol):
+    """Protocol for HTTP communication with OSWorld VM.
+    
+    This abstraction allows controllers and getters to use the same code
+    regardless of whether they're talking to a local VM or NVCF.
+    """
+    
+    def get(self, endpoint: str, **kwargs) -> requests.Response:
+        """Make a GET request to the VM server."""
+        ...
+    
+    def post(self, endpoint: str, **kwargs) -> requests.Response:
+        """Make a POST request to the VM server."""
+        ...
+    
+    def get_cdp_url(self) -> str:
+        """Get the Chrome DevTools Protocol URL for Playwright."""
+        ...
+    
+    def get_cdp_headers(self) -> Optional[Dict[str, str]]:
+        """Get headers needed for CDP connection (None for direct connection)."""
+        ...
+    
+    def get_vlc_url(self) -> str:
+        """Get the VLC web interface base URL."""
+        ...
+
+    def update_launch_command(self, command: str) -> str:
+        """Update the launch command to use the HTTP client."""
+        ...
+
+
+class DirectHttpClient:
+    """Direct HTTP client for Singularity/local runtime.
+    
+    Makes direct HTTP requests to the VM server running on localhost.
+    No special authentication or URL rewriting needed.
+    """
+    
+    def __init__(self, base_url: str, chromium_port: int, vlc_port: int):
+        """Initialize the direct HTTP client.
+        
+        Args:
+            base_url: Base URL for the VM server (e.g., http://127.0.0.1:5000)
+            chromium_port: Port for Chrome DevTools Protocol
+            vlc_port: Port for VLC web interface
+        """
+        self.base_url = base_url.rstrip('/')
+        self.chromium_port = chromium_port
+        self.vlc_port = vlc_port
+    
+    def get(self, endpoint: str, **kwargs) -> requests.Response:
+        """Make a GET request to the VM server."""
+        url = self.base_url + endpoint
+        return requests.get(url, **kwargs)
+    
+    def post(self, endpoint: str, **kwargs) -> requests.Response:
+        """Make a POST request to the VM server."""
+        url = self.base_url + endpoint
+        return requests.post(url, **kwargs)
+    
+    def get_cdp_url(self) -> str:
+        """Get the Chrome DevTools Protocol URL."""
+        return f"http://127.0.0.1:{self.chromium_port}"
+    
+    def get_cdp_headers(self) -> Optional[Dict[str, str]]:
+        """No special headers needed for direct connection."""
+        return None
+    
+    def get_vlc_url(self) -> str:
+        """Get the VLC web interface URL."""
+        return f"http://127.0.0.1:{self.vlc_port}"
+
+    def update_launch_command(self, command: str) -> str:
+        """Update the launch command to use the HTTP client."""
+        return command
+
+class NVCFHttpClient:
+    """NVCF HTTP client with authentication and URL rewriting.
+    
+    Makes authenticated HTTPS requests to NVIDIA Cloud Functions.
+    Handles WebSocket URL rewriting for Chrome DevTools Protocol.
+    """
+    
+    NVCF_API_BASE = "https://grpc.nvcf.nvidia.com/api"
+    NVCF_CHROME_BASE = "https://grpc.nvcf.nvidia.com/chrome"
+    NVCF_VLC_BASE = "https://grpc.nvcf.nvidia.com/vlc"
+    
+    def __init__(self, api_key: str, function_id: str, session_headers: Optional[Dict[str, str]] = None):
+        """Initialize the NVCF HTTP client.
+
+        Args:
+            api_key: NGC API key for authentication
+            function_id: NVCF function ID
+            session_headers: Optional NVCF session routing headers (e.g. NVCF-SESSION-ID)
+        """
+        self.headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Function-ID": function_id,
+        }
+        if session_headers:
+            self.headers.update(session_headers)
+        self._session = requests.Session()
+        self._session.headers.update(self.headers)
+        self._cached_cdp_url: Optional[str] = None
+        logger.info(f"NVCFHttpClient created with headers: { {k: v[:20]+'...' if len(str(v))>20 else v for k,v in self.headers.items()} }")
+
+    def get(self, endpoint: str, **kwargs) -> requests.Response:
+        """Make an authenticated GET request to NVCF."""
+        url = self.NVCF_API_BASE + endpoint
+        # Merge auth headers with any provided headers
+        headers = {**self.headers, **kwargs.pop('headers', {})}
+        return self._session.get(url, headers=headers, **kwargs)
+
+    def post(self, endpoint: str, **kwargs) -> requests.Response:
+        """Make an authenticated POST request to NVCF."""
+        url = self.NVCF_API_BASE + endpoint
+        # Merge auth headers with any provided headers
+        headers = {**self.headers, **kwargs.pop('headers', {})}
+        return self._session.post(url, headers=headers, **kwargs)
+    
+    def get_cdp_url(self) -> str:
+        """Get the Chrome DevTools Protocol URL with WebSocket rewriting.
+        
+        Playwright's connect_over_cdp() performs discovery by fetching /json/version.
+        Chrome returns ws://localhost/... which doesn't work for NVCF.
+        We fetch the discovery ourselves and rewrite the WebSocket URL.
+        """
+        try:
+            # Fetch the WebSocket URL from Chrome's discovery endpoint
+            response = requests.get(
+                f"{self.NVCF_CHROME_BASE}/json/version",
+                headers=self.headers,
+                timeout=30
+            )
+            response.raise_for_status()
+            data = response.json()
+            ws_url = data.get("webSocketDebuggerUrl", "")
+            
+            if not ws_url:
+                logger.warning("No webSocketDebuggerUrl in Chrome discovery response")
+                return self.NVCF_CHROME_BASE
+            
+            # Rewrite ws://localhost/... to wss://grpc.nvcf.nvidia.com/chrome/...
+            rewritten_url = ws_url.replace(
+                "ws://localhost/", 
+                "wss://grpc.nvcf.nvidia.com/chrome/"
+            )
+            logger.debug(f"CDP URL rewritten: {ws_url} -> {rewritten_url}")
+            return rewritten_url
+            
+        except Exception as e:
+            logger.error(f"Failed to get CDP URL from NVCF: {e}")
+            # Return base URL as fallback - Playwright will do its own discovery
+            return self.NVCF_CHROME_BASE
+    
+    def get_cdp_headers(self) -> Dict[str, str]:
+        """Get authentication headers for CDP connection."""
+        return self.headers.copy()
+    
+    def get_vlc_url(self) -> str:
+        """Get the VLC web interface URL through NVCF."""
+        return self.NVCF_VLC_BASE
+
+    def update_launch_command(self, command) -> list:
+        """Update the launch command for NVCF VMs.
+
+        Replaces google-chrome with google-chrome-wrapper and adds
+        required NVCF flags, while preserving the original
+        --remote-debugging-port from the setup config.
+        """
+        if not command:
+            return command
+
+        cmd_name = command[0] if isinstance(command, list) else command
+        if cmd_name != "google-chrome":
+            return command
+
+        # Extract the original debugging port from the command args
+        original_args = command[1:] if isinstance(command, list) else []
+        debug_port = "9222"  # default
+        for arg in original_args:
+            if arg.startswith("--remote-debugging-port="):
+                debug_port = arg.split("=", 1)[1]
+                break
+
+        command = [
+            "google-chrome-wrapper",
+            f"--remote-debugging-port={debug_port}",
+            "--remote-debugging-address=127.0.0.1",
+            "--remote-allow-origins=*",
+            "--no-first-run",
+            "--no-default-browser-check",
+            "--disable-infobars",
+            "--disable-session-crashed-bubble",
+            "--disable-features=TranslateUI",
+            "--start-maximized",
+        ]
+
+        return command
diff --git a/pyproject.toml b/pyproject.toml
index bbcc920fd..e71876497 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -135,6 +135,9 @@ datasets = "*"
 [tool.poetry.scripts]
 openhands = "openhands.cli.main:main"
 
+[tool.poetry.group.osworld-nvcf.dependencies]
+ngcsdk = ">=3.64.0"
+
 [tool.poetry.group.testgeneval.dependencies]
 fuzzywuzzy = "^0.18.0"
 rouge = "^1.0.1"
diff --git a/scripts/eval/README.md b/scripts/eval/README.md
new file mode 100644
index 000000000..3e8d748b4
--- /dev/null
+++ b/scripts/eval/README.md
@@ -0,0 +1,227 @@
+# OSWorld Evaluation Pipeline
+
+This directory contains a three-stage pipeline for running OSWorld evaluations, processing results, and generating visualizations.
+
+## Pipeline Overview
+
+```
+osworld.py → unpack.py → generate_gifs.py
+   ↓              ↓              ↓
+JSON files   Extracted    MP4 videos
+             dirs with
+             PNGs & JSON
+```
+
+## Stage 1: osworld.py - Run OSWorld Evaluation
+
+The first stage runs OSWorld benchmark tasks using an OpenHands agent server with an LLM backend.
+
+### Purpose
+- Submits OSWorld test instances to an agent server
+- Processes tasks in parallel with configurable concurrency
+- Saves complete interaction logs as JSON files
+- Supports resumption (skips already-completed tasks)
+
+### Usage
+
+```bash
+python osworld.py \
+    --data-file /path/to/osworld_test.json \
+    --output-dir ./osworld_results \
+    --llm-server-address http://localhost:8000/v1 \
+    --model "hosted_vllm/Qwen/Qwen3-VL-235B-A22B-Instruct" \
+    --max-parallel-jobs 2 \
+    --max-iterations 15 \
+    --timeout 6000
+```
+
+### Key Arguments
+- `--data-file`: Path to OSWorld test data (JSONL format)
+- `--output-dir`: Directory to save result JSON files (default: `./osworld_results`)
+- `--llm-server-address`: LLM server endpoint (default: `http://localhost:8000/v1`)
+- `--model`: Model identifier for the LLM
+- `--max-parallel-jobs`: Number of concurrent tasks (default: 2)
+- `--max-iterations`: Max agent iterations per task (default: 15)
+- `--timeout`: Timeout per task in seconds (default: 6000)
+- `--enable-vision/--no-vision`: Enable/disable vision capabilities (default: enabled)
+- `--enable-a11y-tree`: Enable accessibility tree
+- `--max-image-history`: Number of images to keep in context (default: 4)
+- `--temperature`: Sampling temperature (default: 0.0)
+- `--total-jobs`: Limit number of jobs to run (default: all)
+
+### Output
+Creates one JSON file per task in `--output-dir`:
+- `<instance_id>.json`: Complete interaction log including messages, tool calls, and images
+- `error_<n>.json`: Error logs for failed tasks
+
+Each JSON contains:
+- `instance_id`: Task identifier
+- `messages`: Complete conversation history with the agent
+- `resolved`: Boolean indicating task success
+- Embedded base64-encoded screenshots
+
+## Stage 2: unpack.py - Extract Images and Conversations
+
+The second stage unpacks the JSON files into directories with extracted images and structured conversation data.
+
+### Purpose
+- Extracts base64-encoded images from JSON files
+- Creates organized directory structure for each task
+- Converts message history to readable conversation format
+- Computes overall accuracy statistics
+
+### Usage
+
+```bash
+python unpack.py \
+    --dir_path ./osworld_results \
+    --workers 64
+```
+
+### Key Arguments
+- `--dir_path`: Directory containing JSON files from Stage 1 (default: current directory)
+- `--workers`: Number of parallel worker processes (default: 64)
+- `--skip_save`: Skip saving result.txt file (useful for dry runs)
+
+### Output Structure
+For each `<instance_id>.json`, creates:
+```
+<instance_id>/
+├── conversation.json     # Structured conversation with roles and content
+├── result.txt           # Boolean result (True/False)
+├── 0.png               # First screenshot
+├── 1.png               # Second screenshot
+└── ...                 # Additional screenshots
+```
+
+The `conversation.json` format:
+```json
+[
+  {
+    "role": "user",
+    "content": "Task instruction...",
+    "img": "0.png"
+  },
+  {
+    "role": "assistant",
+    "content": "I'll help with that...",
+    "tool_calls": [
+      {
+        "name": "mouse_click",
+        "arguments": "{\"x\": 0.5, \"y\": 0.3}"
+      }
+    ]
+  },
+  ...
+]
+```
+
+### Statistics
+Prints summary statistics:
+- Total correct tasks
+- Total tasks processed
+- Overall accuracy
+
+## Stage 3: generate_gifs.py - Create Video Visualizations
+
+The final stage generates MP4 videos showing the agent's actions with annotations.
+
+### Purpose
+- Creates videos from conversation and screenshots
+- Overlays user instructions and assistant responses
+- Visualizes click actions with red markers
+- Generates one video per task
+
+### Usage
+
+```bash
+python generate_gifs.py ./osworld_results \
+    --fps 0.2 \
+    --output-name output.mp4
+```
+
+### Key Arguments
+- `work_dir`: Directory containing subdirectories from Stage 2 (positional)
+- `--fps`: Frames per second for video (default: 0.2 = 5 seconds per frame)
+- `--output-name`: Output filename (default: `output.mp4`)
+
+### Output
+Creates `output.mp4` in each task subdirectory with:
+- **Top overlay** (first frame only): User instruction
+- **Bottom overlay** (all frames): Assistant response
+- **Red markers**: Click locations with tool names
+- **Duration**: Configurable via FPS (default 5s per frame)
+
+### Video Features
+- Semi-transparent text backgrounds for readability
+- Centered text with automatic wrapping
+- Red circles mark click coordinates (normalized 0-1)
+- Tool names labeled above click markers
+- MP4 format with mp4v codec
+
+## Complete Pipeline Example
+
+```bash
+# Step 1: Run evaluation
+python osworld.py \
+    --data-file /data/osworld_test.json \
+    --output-dir ./osworld_results \
+    --max-parallel-jobs 4 \
+    --max-iterations 15
+
+# Step 2: Extract images and conversations
+python unpack.py \
+    --dir_path ./osworld_results \
+    --workers 64
+
+# Step 3: Generate videos
+python generate_gifs.py ./osworld_results --fps 0.2
+```
+
+### Expected Directory Structure
+
+After running the complete pipeline:
+
+```
+osworld_results/
+├── <instance_id_1>.json
+├── <instance_id_2>.json
+├── <instance_id_1>/
+│   ├── conversation.json
+│   ├── result.txt
+│   ├── 0.png
+│   ├── 1.png
+│   └── output.mp4
+├── <instance_id_2>/
+│   ├── conversation.json
+│   ├── result.txt
+│   ├── 0.png
+│   └── output.mp4
+└── ...
+```
+
+## Requirements
+
+### Python Dependencies
+- `PIL` (Pillow): Image processing
+- `cv2` (opencv-python): Video generation
+- `numpy`: Array operations
+- `openhands.nvidia.async_server_osworld`: Agent server interface
+
+### System Dependencies
+- Fonts for text overlay (DejaVu or Liberation Sans)
+- Sufficient disk space for videos
+
+## Tips and Best Practices
+
+1. **Resumption**: osworld.py automatically skips completed tasks, so you can safely re-run it after interruptions
+
+2. **Parallel Processing**: 
+   - osworld.py: Set `--max-parallel-jobs` based on available GPU/CPU resources
+   - unpack.py: Uses multiprocessing; adjust `--workers` for your CPU count
+
+3. **Video Customization**: Adjust `--fps` in generate_gifs.py:
+   - Lower FPS (e.g., 0.2) = More time per frame = Easier to read
+   - Higher FPS (e.g., 1.0) = Faster playback = Shorter videos
+
+4. **Storage**: Each task with screenshots and video can be several MB; plan storage accordingly
diff --git a/scripts/eval/generate_gifs.py b/scripts/eval/generate_gifs.py
new file mode 100644
index 000000000..00a7a0848
--- /dev/null
+++ b/scripts/eval/generate_gifs.py
@@ -0,0 +1,409 @@
+#!/usr/bin/env python3
+"""
+Generate videos from conversation data and screenshots.
+
+This script takes a work directory containing conversation.json and PNG files,
+matches each assistant turn with corresponding PNG images, overlays user turn
+content as text on the images, draws click markers for tool calls with coordinates,
+and creates a video with configurable FPS (default 0.2 FPS = 5 seconds per frame).
+"""
+import json
+import argparse
+from pathlib import Path
+from typing import List, Dict, Tuple
+from PIL import Image, ImageDraw, ImageFont
+import textwrap
+import cv2
+import numpy as np
+
+
+def parse_conversation(conversation_path: Path) -> Tuple[List[Dict], List[Dict]]:
+    """
+    Parse conversation.json to extract user and assistant turns.
+    
+    Args:
+        conversation_path: Path to conversation.json file
+        
+    Returns:
+        Tuple of (user_turns, assistant_turns) where:
+        - user_turns is a list of dicts with 'content' key
+        - assistant_turns is a list of dicts with 'content' and 'tool_calls' keys
+    """
+    with open(conversation_path, 'r', encoding='utf-8') as f:
+        conversation = json.load(f)
+    
+    user_turns = []
+    assistant_turns = []
+    
+    for msg in conversation:
+        role = msg.get('role', '')
+        content = msg.get('content', '')
+        
+        if role in ['user', 'tool']:
+            user_turns.append({
+                'content': content
+            })
+        elif role == 'assistant':
+            assistant_turns.append({
+                'content': content,
+                'tool_calls': msg.get('tool_calls', [])
+            })
+    
+    return user_turns, assistant_turns
+
+
+def add_text_to_image(image: Image.Image, top_text: str = None, bottom_text: str = None, font_size: int = 15) -> Image.Image:
+    """
+    Add text overlay to an image at the top and/or bottom.
+    
+    Args:
+        image: PIL Image object
+        top_text: Text to overlay at the top of the image
+        bottom_text: Text to overlay at the bottom of the image
+        font_size: Font size for the text
+        
+    Returns:
+        Modified PIL Image with text overlay
+    """
+    # Create a copy to avoid modifying the original
+    img_with_text = image.copy()
+    draw = ImageDraw.Draw(img_with_text)
+    
+    # Try to use a better font, fall back to default if not available
+    try:
+        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
+    except:
+        try:
+            font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", font_size)
+        except:
+            # Fall back to default font
+            font = ImageFont.load_default()
+    
+    # Get image dimensions
+    img_width, img_height = img_with_text.size
+    
+    # Helper function to add text at a specific position
+    def add_text_overlay(text: str, position: str):
+        """Add text overlay at 'top' or 'bottom' position"""
+        # Wrap text to fit image width (with some padding)
+        max_chars_per_line = int(img_width / (font_size * 0.6))  # Rough estimate
+        wrapped_text = textwrap.fill(text, width=max_chars_per_line)
+        
+        # Calculate text bounding box
+        # For multi-line text, we need to calculate height manually
+        lines = wrapped_text.split('\n')
+        line_height = font_size + 5  # Add some spacing between lines
+        text_height = len(lines) * line_height
+        
+        # Create a semi-transparent black background for the text
+        padding = 10
+        text_bg_height = text_height + 2 * padding
+        text_bg = Image.new('RGBA', (img_width, text_bg_height), (0, 0, 0, 180))
+        
+        # Composite the background onto the image at the appropriate position
+        if position == 'top':
+            y_position = 0
+            img_with_text.paste(text_bg, (0, y_position), text_bg)
+            y_offset = padding
+        else:  # bottom
+            y_position = img_height - text_bg_height
+            img_with_text.paste(text_bg, (0, y_position), text_bg)
+            y_offset = y_position + padding
+        
+        # Draw each line of text
+        for line in lines:
+            # Center each line horizontally
+            line_width = draw.textlength(line, font=font)
+            x_position = (img_width - line_width) / 2
+            draw.text((x_position, y_offset), line, fill=(255, 255, 255), font=font)
+            y_offset += line_height
+    
+    # Add top text if provided
+    if top_text:
+        add_text_overlay(top_text, 'top')
+    
+    # Add bottom text if provided
+    if bottom_text:
+        add_text_overlay(bottom_text, 'bottom')
+    
+    return img_with_text
+
+
+def add_click_marker(image: Image.Image, x: float, y: float, tool_name: str = None, radius: int = 10, font_size: int = 14) -> Image.Image:
+    """
+    Add a filled red circle at the specified coordinates with optional tool name label.
+    
+    Args:
+        image: PIL Image object
+        x: X coordinate (normalized 0-1, will be scaled to image width)
+        y: Y coordinate (normalized 0-1, will be scaled to image height)
+        tool_name: Name of the tool call to display above the circle
+        radius: Radius of the circle in pixels
+        font_size: Font size for the tool name text
+        
+    Returns:
+        Modified PIL Image with circle marker and optional label
+    """
+    # Create a copy to avoid modifying the original
+    img_with_marker = image.copy()
+    draw = ImageDraw.Draw(img_with_marker)
+    
+    # Get image dimensions
+    img_width, img_height = img_with_marker.size
+    
+    # Convert normalized coordinates to pixel coordinates
+    pixel_x = x * img_width
+    pixel_y = y * img_height
+    
+    # Draw filled red circle
+    left_up = (pixel_x - radius, pixel_y - radius)
+    right_down = (pixel_x + radius, pixel_y + radius)
+    draw.ellipse([left_up, right_down], fill=(255, 0, 0, 255), outline=(255, 0, 0, 255))
+    
+    # Draw tool name above the circle if provided
+    if tool_name:
+        # Try to use a better font, fall back to default if not available
+        try:
+            font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
+        except:
+            try:
+                font = ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", font_size)
+            except:
+                # Fall back to default font
+                font = ImageFont.load_default()
+        
+        # Calculate text position (centered above the circle)
+        text_width = draw.textlength(tool_name, font=font)
+        text_x = pixel_x - (text_width / 2)
+        text_y = pixel_y - radius - font_size - 5  # 5 pixels padding above circle
+        
+        # Draw text in red
+        draw.text((text_x, text_y), tool_name, fill=(255, 0, 0, 255), font=font)
+    
+    return img_with_marker
+
+
+def create_video(work_dir: Path, output_path: Path = None, fps: float = 0.2) -> None:
+    """
+    Create a video from conversation and PNG files.
+    
+    Args:
+        work_dir: Directory containing conversation.json and PNG files
+        output_path: Path for the output video (default: work_dir/output.mp4)
+        fps: Frames per second (default: 0.2 = 5 seconds per frame)
+    """
+    work_dir = Path(work_dir)
+    conversation_path = work_dir / 'conversation.json'
+    
+    if not conversation_path.exists():
+        raise FileNotFoundError(f"conversation.json not found in {work_dir}")
+    
+    # Parse conversation
+    print(f"Parsing conversation from {conversation_path}")
+    user_turns, assistant_turns = parse_conversation(conversation_path)
+    
+    print(f"Found {len(user_turns)} user turns and {len(assistant_turns)} assistant turns")
+    
+    # Find all PNG files, sorted numerically
+    png_files = sorted(work_dir.glob('*.png'), key=lambda x: int(x.stem) if x.stem.isdigit() else float('inf'))
+    
+    if not png_files:
+        raise FileNotFoundError(f"No PNG files found in {work_dir}")
+    
+    print(f"Found {len(png_files)} PNG files")
+    
+    # Process images and match with turns
+    processed_images = []
+    
+    for i, png_path in enumerate(png_files):
+        # Match with assistant turn (0.png -> first assistant turn, etc.)
+        if i < len(assistant_turns):
+            # Get the corresponding user turn and assistant turn
+            user_turn = user_turns[i] if i < len(user_turns) else {'content': "No user instruction available"}
+            assistant_turn = assistant_turns[i]
+            
+            user_text = user_turn['content']
+            assistant_text = assistant_turn['content']
+            
+            ## Truncate long text for better display
+            #if len(user_text) > 500:
+            #    user_text = user_text[:500] + "..."
+            #if len(assistant_text) > 500:
+            #    assistant_text = assistant_text[:500] + "..."
+            
+            print(f"Processing {png_path.name} with user turn {i} and assistant turn {i}")
+            
+            # Load image
+            img = Image.open(png_path)
+            
+            # Check if there are tool calls with x, y coordinates in the assistant turn
+            tool_calls = assistant_turn.get('tool_calls', [])
+            if tool_calls and len(tool_calls) > 0:
+                first_tool_call = tool_calls[0]
+                tool_name = first_tool_call.get('name', '')  # Default to '' if no name
+                arguments = first_tool_call.get('arguments', {})
+                
+                # Parse arguments if it's a string (JSON)
+                if isinstance(arguments, str):
+                    try:
+                        arguments = json.loads(arguments)
+                    except json.JSONDecodeError:
+                        print(f"  Warning: Could not parse tool_call arguments for {png_path.name}")
+                        arguments = {}
+                
+                # Check if x and y coordinates exist
+                if 'x' in arguments and 'y' in arguments:
+                    try:
+                        x = float(arguments['x'])
+                        y = float(arguments['y'])
+                        print(f"  Drawing click marker at ({x}, {y}) for tool '{tool_name}'")
+                        img = add_click_marker(img, x, y, tool_name=tool_name)
+                    except (ValueError, TypeError) as e:
+                        print(f"  Warning: Invalid x/y coordinates in {png_path.name}: {e}")
+            
+            # Add text (user at top, assistant at bottom)
+            if i == 0:
+                img_with_text = add_text_to_image(img, top_text=user_text, bottom_text=assistant_text)
+            else:
+                img_with_text = add_text_to_image(img, bottom_text=assistant_text)
+            
+            # Convert to RGB if necessary (OpenCV uses BGR, but we'll convert later)
+            if img_with_text.mode != 'RGB':
+                img_with_text = img_with_text.convert('RGB')
+            
+            processed_images.append(img_with_text)
+        else:
+            print(f"Warning: No assistant turn for {png_path.name}, skipping")
+    
+    if not processed_images:
+        raise ValueError("No images were processed")
+    
+    # Set output path
+    if output_path is None:
+        output_path = work_dir / 'output.mp4'
+    else:
+        output_path = Path(output_path)
+    
+    # Get dimensions from first image
+    first_img = processed_images[0]
+    width, height = first_img.size
+    
+    # Create video writer
+    print(f"Creating video at {output_path}")
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MP4
+    video_writer = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
+    
+    if not video_writer.isOpened():
+        raise RuntimeError(f"Failed to open video writer for {output_path}")
+    
+    # Write frames to video
+    for img_pil in processed_images:
+        # Convert PIL Image to numpy array
+        img_array = np.array(img_pil)
+        # Convert RGB to BGR (OpenCV uses BGR)
+        img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
+        # Write frame
+        video_writer.write(img_bgr)
+    
+    video_writer.release()
+    
+    duration_per_frame = 1.0 / fps
+    total_duration = len(processed_images) * duration_per_frame
+    
+    print(f"✓ Video created successfully: {output_path}")
+    print(f"  - {len(processed_images)} frames")
+    print(f"  - {fps} FPS ({duration_per_frame:.1f}s per frame)")
+    print(f"  - Total duration: {total_duration:.1f}s")
+    print(f"  - Resolution: {width}x{height}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Generate videos from conversation data and screenshots'
+    )
+    parser.add_argument(
+        'work_dir',
+        type=str,
+        help='Directory containing subdirectories with conversation.json and PNG files'
+    )
+    parser.add_argument(
+        '--fps',
+        type=float,
+        default=0.2,
+        help='Frames per second (default: 0.2, which is 5 seconds per frame)'
+    )
+    parser.add_argument(
+        '--output-name',
+        type=str,
+        default='output.mp4',
+        help='Output video filename (default: output.mp4)'
+    )
+    
+    args = parser.parse_args()
+    
+    work_dir = Path(args.work_dir)
+    
+    if not work_dir.exists():
+        print(f"Error: Directory {work_dir} does not exist")
+        return 1
+    
+    if not work_dir.is_dir():
+        print(f"Error: {work_dir} is not a directory")
+        return 1
+    
+    # Find all subdirectories that contain conversation.json
+    subdirs_to_process = []
+    for subdir in work_dir.iterdir():
+        if subdir.is_dir():
+            conversation_file = subdir / 'conversation.json'
+            if conversation_file.exists():
+                subdirs_to_process.append(subdir)
+    
+    if not subdirs_to_process:
+        print(f"No subdirectories with conversation.json found in {work_dir}")
+        return 1
+    
+    print(f"Found {len(subdirs_to_process)} subdirectories to process")
+    print("=" * 80)
+    
+    # Process each subdirectory
+    successful = 0
+    failed = 0
+    errors = []
+    
+    for i, subdir in enumerate(subdirs_to_process, 1):
+        print(f"\n[{i}/{len(subdirs_to_process)}] Processing: {subdir.name}")
+        print("-" * 80)
+        
+        try:
+            output_path = subdir / args.output_name
+            create_video(
+                work_dir=subdir,
+                output_path=output_path,
+                fps=args.fps
+            )
+            successful += 1
+        except Exception as e:
+            print(f"✗ Error processing {subdir.name}: {e}")
+            failed += 1
+            errors.append((subdir.name, str(e)))
+    
+    # Print summary
+    print("\n" + "=" * 80)
+    print("SUMMARY")
+    print("=" * 80)
+    print(f"Total subdirectories: {len(subdirs_to_process)}")
+    print(f"Successful: {successful}")
+    print(f"Failed: {failed}")
+    
+    if errors:
+        print("\nFailed subdirectories:")
+        for subdir_name, error in errors:
+            print(f"  - {subdir_name}: {error}")
+    
+    return 0 if failed == 0 else 1
+
+
+if __name__ == '__main__':
+    exit(main())
+
diff --git a/scripts/eval/osworld.py b/scripts/eval/osworld.py
index b4b48d674..06253c58a 100644
--- a/scripts/eval/osworld.py
+++ b/scripts/eval/osworld.py
@@ -186,8 +186,8 @@ def parse_args():
     parser.add_argument(
         '--temperature',
         type=float,
-        default=0.0,
-        help='Sampling temperature (0.0 for deterministic)'
+        default=0.2,
+        help='Sampling temperature'
     )
     parser.add_argument(
         '--max-iterations',
@@ -218,7 +218,7 @@ def parse_args():
     parser.add_argument(
         '--max-image-history',
         type=int,
-        default=4,
+        default=3,
         help='Maximum number of images to keep in history'
     )
     
diff --git a/scripts/eval/visualize/app.py b/scripts/eval/visualize/app.py
new file mode 100644
index 000000000..7d3946f04
--- /dev/null
+++ b/scripts/eval/visualize/app.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+"""
+Trajectory Viewer - A web app to visualize agent trajectories with screenshots.
+"""
+
+import argparse
+import json
+from pathlib import Path
+from flask import Flask, render_template, send_from_directory, jsonify
+
+app = Flask(__name__)
+
+
+def get_results_dir():
+    """Get the results directory from app config."""
+    return app.config.get("RESULTS_DIR", Path(__file__).parent / "results")
+
+
+def get_trajectories():
+    """Get list of all trajectory folders."""
+    results_dir = get_results_dir()
+    if not results_dir.exists():
+        return []
+    
+    trajectories = []
+    for folder in sorted(results_dir.iterdir()):
+        if folder.is_dir():
+            result_file = folder / "result.txt"
+            result = None
+            if result_file.exists():
+                result = result_file.read_text().strip().lower() == "true"
+            
+            trajectories.append({
+                "id": folder.name,
+                "result": result
+            })
+    
+    return trajectories
+
+
+def parse_coordinates(arguments_str):
+    """Extract normalized coordinates (0-1) from tool call arguments."""
+    try:
+        args = json.loads(arguments_str)
+        coords = []
+        
+        # Handle x, y coordinates (normalized 0-1)
+        if "x" in args and "y" in args:
+            x = float(args["x"])
+            y = float(args["y"])
+            coords.append({"x": x, "y": y})
+        
+        return coords
+    except (json.JSONDecodeError, KeyError, TypeError):
+        return []
+
+
+def load_conversation(traj_id):
+    """Load conversation.json for a trajectory."""
+    conv_file = get_results_dir() / traj_id / "conversation.json"
+    if not conv_file.exists():
+        return [], ""
+    
+    with open(conv_file, "r") as f:
+        conversation = json.load(f)
+    
+    # Extract task instruction from first user message
+    task_instruction = ""
+    for msg in conversation:
+        if msg.get("role") == "user":
+            task_instruction = msg.get("content", "")
+            break
+    
+    # Build list of steps: each step pairs a screenshot with the next assistant action
+    steps = []
+    
+    for i, msg in enumerate(conversation):
+        role = msg.get("role", "unknown")
+        img = msg.get("img")
+        
+        # Only process user/tool turns that have screenshots
+        if role in ("user", "tool") and img:
+            # Find the next assistant turn to get tool calls
+            next_assistant = None
+            for j in range(i + 1, len(conversation)):
+                if conversation[j].get("role") == "assistant":
+                    next_assistant = conversation[j]
+                    break
+            
+            # Extract tool calls from next assistant turn
+            tool_calls = []
+            assistant_content = ""
+            if next_assistant:
+                assistant_content = next_assistant.get("content", "")
+                if "tool_calls" in next_assistant and next_assistant["tool_calls"]:
+                    for tc in next_assistant["tool_calls"]:
+                        tool_call = {
+                            "name": tc.get("name", "unknown"),
+                            "arguments": tc.get("arguments", "{}"),
+                            "coordinates": []
+                        }
+                        coords = parse_coordinates(tc.get("arguments", "{}"))
+                        tool_call["coordinates"] = coords
+                        tool_calls.append(tool_call)
+            
+            steps.append({
+                "step_num": len(steps),
+                "img": img,
+                "tool_calls": tool_calls,
+                "assistant_content": assistant_content
+            })
+    
+    return steps, task_instruction
+
+
+def get_screenshot_list(traj_id):
+    """Get list of screenshot files for a trajectory."""
+    traj_dir = get_results_dir() / traj_id
+    if not traj_dir.exists():
+        return []
+    
+    screenshots = []
+    for f in traj_dir.iterdir():
+        if f.suffix.lower() == ".png" and f.stem.isdigit():
+            screenshots.append(f.name)
+    
+    # Sort by numeric value
+    screenshots.sort(key=lambda x: int(Path(x).stem))
+    return screenshots
+
+
+@app.route("/")
+def index():
+    """Home page listing all trajectories."""
+    trajectories = get_trajectories()
+    return render_template("index.html", trajectories=trajectories)
+
+
+@app.route("/trajectory/<traj_id>")
+def view_trajectory(traj_id):
+    """View a specific trajectory."""
+    steps, task_instruction = load_conversation(traj_id)
+    
+    # Get result
+    result_file = get_results_dir() / traj_id / "result.txt"
+    result = None
+    if result_file.exists():
+        result = result_file.read_text().strip().lower() == "true"
+    
+    return render_template(
+        "trajectory.html",
+        traj_id=traj_id,
+        steps=steps,
+        task_instruction=task_instruction,
+        result=result
+    )
+
+
+@app.route("/api/trajectory/<traj_id>")
+def api_trajectory(traj_id):
+    """API endpoint to get trajectory data as JSON."""
+    steps, task_instruction = load_conversation(traj_id)
+    
+    result_file = get_results_dir() / traj_id / "result.txt"
+    result = None
+    if result_file.exists():
+        result = result_file.read_text().strip().lower() == "true"
+    
+    return jsonify({
+        "id": traj_id,
+        "result": result,
+        "task_instruction": task_instruction,
+        "steps": steps
+    })
+
+
+@app.route("/results/<traj_id>/<filename>")
+def serve_screenshot(traj_id, filename):
+    """Serve screenshot files."""
+    return send_from_directory(get_results_dir() / traj_id, filename)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Trajectory Viewer - Visualize agent trajectories")
+    parser.add_argument(
+        "--results-dir", "-r",
+        type=str,
+        default="./results",
+        help="Path to the results folder containing trajectory directories (default: ./results)"
+    )
+    parser.add_argument(
+        "--port", "-p",
+        type=int,
+        default=5000,
+        help="Port to run the server on (default: 5000)"
+    )
+    parser.add_argument(
+        "--host",
+        type=str,
+        default="0.0.0.0",
+        help="Host to bind the server to (default: 0.0.0.0)"
+    )
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    results_dir = Path(args.results_dir).resolve()
+    
+    # Store in app config so it's accessible in routes
+    app.config["RESULTS_DIR"] = results_dir
+    
+    if not results_dir.exists():
+        print(f"Warning: Results directory '{results_dir}' does not exist.")
+    else:
+        print(f"Serving trajectories from: {results_dir}")
+    
+    app.run(debug=True, host=args.host, port=args.port)
diff --git a/scripts/eval/visualize/templates/index.html b/scripts/eval/visualize/templates/index.html
new file mode 100644
index 000000000..1ecea9e09
--- /dev/null
+++ b/scripts/eval/visualize/templates/index.html
@@ -0,0 +1,128 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Trajectory Viewer</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
+            background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
+            min-height: 100vh;
+            color: #e0e0e0;
+        }
+        
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 40px 20px;
+        }
+        
+        h1 {
+            text-align: center;
+            margin-bottom: 40px;
+            font-size: 2.5rem;
+            color: #fff;
+            text-shadow: 0 2px 4px rgba(0,0,0,0.3);
+        }
+        
+        .trajectory-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fill, minmax(350px, 1fr));
+            gap: 20px;
+        }
+        
+        .trajectory-card {
+            background: rgba(255, 255, 255, 0.1);
+            border-radius: 12px;
+            padding: 24px;
+            border: 1px solid rgba(255, 255, 255, 0.1);
+            transition: transform 0.2s, box-shadow 0.2s;
+            cursor: pointer;
+            text-decoration: none;
+            color: inherit;
+            display: block;
+        }
+        
+        .trajectory-card:hover {
+            transform: translateY(-4px);
+            box-shadow: 0 12px 40px rgba(0, 0, 0, 0.3);
+            background: rgba(255, 255, 255, 0.15);
+        }
+        
+        .trajectory-id {
+            font-family: 'Monaco', 'Consolas', monospace;
+            font-size: 0.85rem;
+            color: #aaa;
+            margin-bottom: 12px;
+            word-break: break-all;
+        }
+        
+        .result-badge {
+            display: inline-block;
+            padding: 6px 16px;
+            border-radius: 20px;
+            font-weight: 600;
+            font-size: 0.9rem;
+        }
+        
+        .result-pass {
+            background: linear-gradient(135deg, #00c853 0%, #00e676 100%);
+            color: #000;
+        }
+        
+        .result-fail {
+            background: linear-gradient(135deg, #ff1744 0%, #ff5252 100%);
+            color: #fff;
+        }
+        
+        .result-unknown {
+            background: linear-gradient(135deg, #757575 0%, #9e9e9e 100%);
+            color: #fff;
+        }
+        
+        .empty-state {
+            text-align: center;
+            padding: 60px 20px;
+            color: #888;
+        }
+        
+        .empty-state h2 {
+            margin-bottom: 10px;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Trajectory Viewer</h1>
+        
+        {% if trajectories %}
+        <div class="trajectory-grid">
+            {% for traj in trajectories %}
+            <a href="/trajectory/{{ traj.id }}" class="trajectory-card">
+                <div class="trajectory-id">{{ traj.id }}</div>
+                {% if traj.result is none %}
+                <span class="result-badge result-unknown">Unknown</span>
+                {% elif traj.result %}
+                <span class="result-badge result-pass">PASS</span>
+                {% else %}
+                <span class="result-badge result-fail">FAIL</span>
+                {% endif %}
+            </a>
+            {% endfor %}
+        </div>
+        {% else %}
+        <div class="empty-state">
+            <h2>No trajectories found</h2>
+            <p>Add trajectory folders to the results/ directory to get started.</p>
+        </div>
+        {% endif %}
+    </div>
+</body>
+</html>
diff --git a/scripts/eval/visualize/templates/trajectory.html b/scripts/eval/visualize/templates/trajectory.html
new file mode 100644
index 000000000..d4ea60e56
--- /dev/null
+++ b/scripts/eval/visualize/templates/trajectory.html
@@ -0,0 +1,427 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Trajectory: {{ traj_id }}</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
+            background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
+            min-height: 100vh;
+            color: #e0e0e0;
+        }
+        
+        .header {
+            background: rgba(0, 0, 0, 0.3);
+            padding: 20px;
+            border-bottom: 1px solid rgba(255, 255, 255, 0.1);
+            position: sticky;
+            top: 0;
+            z-index: 100;
+            backdrop-filter: blur(10px);
+        }
+        
+        .header-content {
+            max-width: 1400px;
+            margin: 0 auto;
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            gap: 20px;
+            flex-wrap: wrap;
+        }
+        
+        .back-link {
+            color: #64b5f6;
+            text-decoration: none;
+            font-size: 0.9rem;
+        }
+        
+        .back-link:hover {
+            text-decoration: underline;
+        }
+        
+        .traj-title {
+            font-family: 'Monaco', 'Consolas', monospace;
+            font-size: 0.85rem;
+            color: #aaa;
+        }
+        
+        .result-badge {
+            display: inline-block;
+            padding: 6px 16px;
+            border-radius: 20px;
+            font-weight: 600;
+            font-size: 0.9rem;
+        }
+        
+        .result-pass {
+            background: linear-gradient(135deg, #00c853 0%, #00e676 100%);
+            color: #000;
+        }
+        
+        .result-fail {
+            background: linear-gradient(135deg, #ff1744 0%, #ff5252 100%);
+            color: #fff;
+        }
+        
+        .container {
+            max-width: 1400px;
+            margin: 0 auto;
+            padding: 30px 20px;
+        }
+        
+        .task-instruction {
+            background: rgba(33, 150, 243, 0.15);
+            border: 1px solid rgba(33, 150, 243, 0.3);
+            border-radius: 12px;
+            padding: 20px 24px;
+            margin-bottom: 30px;
+        }
+        
+        .task-label {
+            font-size: 0.8rem;
+            text-transform: uppercase;
+            letter-spacing: 1px;
+            color: #64b5f6;
+            margin-bottom: 8px;
+        }
+        
+        .task-text {
+            font-size: 1.1rem;
+            line-height: 1.5;
+            color: #fff;
+        }
+        
+        .steps-container {
+            display: flex;
+            flex-direction: column;
+            gap: 40px;
+        }
+        
+        .step {
+            background: rgba(255, 255, 255, 0.05);
+            border-radius: 12px;
+            border: 1px solid rgba(255, 255, 255, 0.1);
+            overflow: hidden;
+        }
+        
+        .step-header {
+            padding: 16px 20px;
+            background: rgba(0, 0, 0, 0.2);
+            display: flex;
+            align-items: center;
+            gap: 12px;
+        }
+        
+        .step-number {
+            background: #2196f3;
+            color: #fff;
+            padding: 4px 12px;
+            border-radius: 12px;
+            font-size: 0.85rem;
+            font-weight: 600;
+        }
+        
+        .step-img-name {
+            color: #888;
+            font-size: 0.85rem;
+        }
+        
+        .step-body {
+            padding: 20px;
+        }
+        
+        .screenshot-container {
+            display: flex;
+            justify-content: center;
+            margin-bottom: 20px;
+        }
+        
+        .screenshot-wrapper {
+            position: relative;
+            display: inline-block;
+            line-height: 0;
+        }
+        
+        .screenshot {
+            display: block;
+            max-width: 100%;
+            max-height: 70vh;
+            height: auto;
+            border-radius: 8px;
+            box-shadow: 0 4px 20px rgba(0, 0, 0, 0.4);
+        }
+        
+        .screenshot-overlay {
+            position: absolute;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            pointer-events: none;
+        }
+        
+        .coordinate-marker {
+            position: absolute;
+            pointer-events: none;
+            /* Marker is positioned at exact coordinate */
+        }
+        
+        .marker-dot {
+            position: absolute;
+            width: 8px;
+            height: 8px;
+            background: #ffff00;
+            border-radius: 50%;
+            left: -4px;
+            top: -4px;
+            z-index: 10;
+            border: 1px solid #000;
+        }
+        
+        .marker-label {
+            position: absolute;
+            left: 0;
+            bottom: 20px;
+            transform: translateX(-50%);
+            background: rgba(255, 0, 0, 0.9);
+            color: white;
+            padding: 4px 10px;
+            border-radius: 4px;
+            font-size: 0.8rem;
+            font-weight: 600;
+            white-space: nowrap;
+            box-shadow: 0 2px 8px rgba(0,0,0,0.4);
+        }
+        
+        .marker-circle {
+            position: absolute;
+            width: 30px;
+            height: 30px;
+            left: -15px;
+            top: -15px;
+            border: 4px solid #ff0000;
+            border-radius: 50%;
+            background: rgba(255, 0, 0, 0.3);
+            box-shadow: 0 0 15px rgba(255, 0, 0, 0.6);
+            animation: pulse 1.5s ease-in-out infinite;
+        }
+        
+        @keyframes pulse {
+            0%, 100% { transform: scale(1); opacity: 1; }
+            50% { transform: scale(1.1); opacity: 0.8; }
+        }
+        
+        .action-info {
+            background: rgba(0, 0, 0, 0.3);
+            border-radius: 8px;
+            padding: 16px 20px;
+        }
+        
+        .action-header {
+            font-size: 0.8rem;
+            text-transform: uppercase;
+            letter-spacing: 1px;
+            color: #888;
+            margin-bottom: 12px;
+        }
+        
+        .tool-call {
+            background: rgba(0, 0, 0, 0.3);
+            border-radius: 6px;
+            padding: 10px 14px;
+            margin-bottom: 8px;
+            font-family: 'Monaco', 'Consolas', monospace;
+            font-size: 0.85rem;
+            display: flex;
+            align-items: center;
+            gap: 10px;
+        }
+        
+        .tool-name {
+            color: #ffd54f;
+            font-weight: 600;
+        }
+        
+        .tool-args {
+            color: #81d4fa;
+        }
+        
+        .assistant-reasoning {
+            margin-top: 16px;
+            padding-top: 16px;
+            border-top: 1px solid rgba(255,255,255,0.1);
+        }
+        
+        .reasoning-header {
+            font-size: 0.8rem;
+            text-transform: uppercase;
+            letter-spacing: 1px;
+            color: #888;
+            margin-bottom: 8px;
+        }
+        
+        .reasoning-text {
+            font-size: 0.9rem;
+            line-height: 1.6;
+            color: #bbb;
+            max-height: 200px;
+            overflow-y: auto;
+            white-space: pre-wrap;
+        }
+        
+        .reasoning-text::-webkit-scrollbar {
+            width: 6px;
+        }
+        
+        .reasoning-text::-webkit-scrollbar-track {
+            background: rgba(0,0,0,0.2);
+            border-radius: 3px;
+        }
+        
+        .reasoning-text::-webkit-scrollbar-thumb {
+            background: rgba(255,255,255,0.2);
+            border-radius: 3px;
+        }
+        
+        .no-action {
+            color: #666;
+            font-style: italic;
+        }
+    </style>
+</head>
+<body>
+    <div class="header">
+        <div class="header-content">
+            <a href="/" class="back-link">← Back to Trajectories</a>
+            <span class="traj-title">{{ traj_id }}</span>
+            {% if result is none %}
+            <span class="result-badge" style="background: #666; color: #fff;">Unknown</span>
+            {% elif result %}
+            <span class="result-badge result-pass">PASS</span>
+            {% else %}
+            <span class="result-badge result-fail">FAIL</span>
+            {% endif %}
+        </div>
+    </div>
+    
+    <div class="container">
+        {% if task_instruction %}
+        <div class="task-instruction">
+            <div class="task-label">Task Instruction</div>
+            <div class="task-text">{{ task_instruction }}</div>
+        </div>
+        {% endif %}
+        
+        <div class="steps-container">
+            {% for step in steps %}
+            <div class="step">
+                <div class="step-header">
+                    <span class="step-number">Step {{ step.step_num }}</span>
+                    <span class="step-img-name">{{ step.img }}</span>
+                </div>
+                <div class="step-body">
+                    <div class="screenshot-container">
+                        <div class="screenshot-wrapper" data-tool-calls='{{ step.tool_calls | tojson | safe }}'>
+                            <img src="/results/{{ traj_id }}/{{ step.img }}" 
+                                 alt="Screenshot {{ step.img }}" 
+                                 class="screenshot">
+                            <div class="screenshot-overlay"></div>
+                        </div>
+                    </div>
+                    
+                    <div class="action-info">
+                        <div class="action-header">Next Action</div>
+                        {% if step.tool_calls %}
+                            {% for tc in step.tool_calls %}
+                            <div class="tool-call">
+                                <span class="tool-name">{{ tc.name }}</span>
+                                <span class="tool-args">{{ tc.arguments }}</span>
+                            </div>
+                            {% endfor %}
+                        {% else %}
+                            <div class="no-action">No action taken</div>
+                        {% endif %}
+                        
+                        {% if step.assistant_content %}
+                        <div class="assistant-reasoning">
+                            <div class="reasoning-header">Assistant Reasoning</div>
+                            <div class="reasoning-text">{{ step.assistant_content }}</div>
+                        </div>
+                        {% endif %}
+                    </div>
+                </div>
+            </div>
+            {% endfor %}
+        </div>
+    </div>
+    
+    <script>
+        // Draw coordinate markers on screenshots
+        function drawMarkers() {
+            document.querySelectorAll('.screenshot-wrapper').forEach(wrapper => {
+                const img = wrapper.querySelector('.screenshot');
+                const overlay = wrapper.querySelector('.screenshot-overlay');
+                const toolCallsData = wrapper.dataset.toolCalls;
+                
+                if (!toolCallsData || !img.complete) return;
+                
+                const toolCalls = JSON.parse(toolCallsData);
+                
+                // Clear existing markers
+                overlay.innerHTML = '';
+                
+                toolCalls.forEach((tc, index) => {
+                    if (tc.coordinates && tc.coordinates.length > 0) {
+                        tc.coordinates.forEach(coord => {
+                            // Coordinates are normalized (0-1), convert to percentage
+                            const xPercent = coord.x * 100;
+                            const yPercent = coord.y * 100;
+                            
+                            const marker = document.createElement('div');
+                            marker.className = 'coordinate-marker';
+                            marker.style.left = xPercent + '%';
+                            marker.style.top = yPercent + '%';
+                            
+                            const dot = document.createElement('div');
+                            dot.className = 'marker-dot';
+                            
+                            const label = document.createElement('div');
+                            label.className = 'marker-label';
+                            label.textContent = tc.name;
+                            
+                            const circle = document.createElement('div');
+                            circle.className = 'marker-circle';
+                            
+                            marker.appendChild(dot);
+                            marker.appendChild(label);
+                            marker.appendChild(circle);
+                            overlay.appendChild(marker);
+                        });
+                    }
+                });
+            });
+        }
+        
+        // Draw markers when images load
+        document.querySelectorAll('.screenshot').forEach(img => {
+            if (img.complete) {
+                drawMarkers();
+            } else {
+                img.addEventListener('load', drawMarkers);
+            }
+        });
+        
+        // Redraw on window resize
+        window.addEventListener('resize', drawMarkers);
+    </script>
+</body>
+</html>
diff --git a/tests/unit/test_osworld_nvcf_runtime.py b/tests/unit/test_osworld_nvcf_runtime.py
new file mode 100644
index 000000000..8057724a3
--- /dev/null
+++ b/tests/unit/test_osworld_nvcf_runtime.py
@@ -0,0 +1,237 @@
+"""Unit tests for OSWorld NVCF Runtime (openhands.runtime.impl.nvcf)."""
+
+import os
+import unittest
+from unittest.mock import MagicMock, Mock, patch
+
+from openhands.core.config import OpenHandsConfig
+from openhands.events import EventStream
+from openhands.events.action.os import OSWorldInteractiveAction
+from openhands.runtime.impl.nvcf import NVCFRuntime, OSWorldNVCFRuntime
+
+
+class TestNVCFRuntime(unittest.TestCase):
+    """Test base NVCFRuntime (ActionExecutionClient, deploy/close)."""
+
+    def setUp(self):
+        self.config = OpenHandsConfig()
+        self.config.runtime = "osworld_nvcf"
+        self.event_stream = Mock(spec=EventStream)
+        self.event_stream.file_store = Mock()
+        self.event_stream.file_store.write = Mock()
+        self.event_stream.file_store.read = Mock(side_effect=FileNotFoundError)
+        self.event_stream.file_store.delete = Mock()
+
+    def test_nvcf_runtime_extends_action_execution_client(self):
+        """NVCFRuntime must extend ActionExecutionClient, not SingularityRuntime."""
+        from openhands.runtime.impl.action_execution.action_execution_client import (
+            ActionExecutionClient,
+        )
+        self.assertEqual(NVCFRuntime.__bases__[0], ActionExecutionClient)
+        self.assertTrue(issubclass(NVCFRuntime, ActionExecutionClient))
+
+    def test_nvcf_runtime_requires_api_key(self):
+        """Without NGC_API_KEY / nvcf_api_key, init raises."""
+        with patch.dict(os.environ, {}, clear=False):
+            for k in ("NGC_API_KEY", "NVCF_FUNCTION_ID", "NGC_ORG"):
+                if k in os.environ:
+                    del os.environ[k]
+            with self.assertRaises(ValueError) as ctx:
+                NVCFRuntime(
+                    config=self.config,
+                    event_stream=self.event_stream,
+                    sid="test",
+                )
+            self.assertIn("api key", str(ctx.exception).lower())
+
+    def test_nvcf_runtime_init_with_env(self):
+        """With NGC_API_KEY and NVCF_FUNCTION_ID, init succeeds."""
+        with patch.dict(
+            os.environ,
+            {"NGC_API_KEY": "test-key", "NVCF_FUNCTION_ID": "test-fid"},
+            clear=False,
+        ):
+            r = NVCFRuntime(
+                config=self.config,
+                event_stream=self.event_stream,
+                sid="test",
+            )
+            self.assertEqual(r._nvcf_function_id, "test-fid")
+            self.assertEqual(r._nvcf_api_key, "test-key")
+
+
+class TestOSWorldNVCFRuntime(unittest.TestCase):
+    """Test OSWorld NVCF runtime (same API as OSWorld Singularity, NVCF backend)."""
+
+    def setUp(self):
+        self.config = OpenHandsConfig()
+        self.config.runtime = "osworld_nvcf"
+        self.event_stream = Mock(spec=EventStream)
+        self.event_stream.file_store = Mock()
+        self.event_stream.file_store.write = Mock()
+        self.event_stream.file_store.read = Mock(side_effect=FileNotFoundError)
+        self.event_stream.file_store.delete = Mock()
+
+    def test_osworld_nvcf_extends_nvcf_runtime(self):
+        """OSWorldNVCFRuntime extends NVCFRuntime."""
+        self.assertTrue(issubclass(OSWorldNVCFRuntime, NVCFRuntime))
+
+    def test_osworld_nvcf_init_os_type_and_screen_size(self):
+        """OS type and default screen_size are set."""
+        with patch.dict(
+            os.environ,
+            {"NGC_API_KEY": "k", "NVCF_FUNCTION_ID": "f"},
+            clear=False,
+        ):
+            r = OSWorldNVCFRuntime(
+                config=self.config,
+                event_stream=self.event_stream,
+                sid="test",
+                os_type="linux",
+            )
+            self.assertEqual(r.os_type, "linux")
+            self.assertEqual(r.screen_size, (1920, 1080))
+
+    def test_osworld_vm_url(self):
+        """osworld_vm_url is NVCF API base."""
+        with patch.dict(
+            os.environ,
+            {"NGC_API_KEY": "k", "NVCF_FUNCTION_ID": "f"},
+            clear=False,
+        ):
+            r = OSWorldNVCFRuntime(
+                config=self.config,
+                event_stream=self.event_stream,
+                sid="test",
+            )
+            self.assertEqual(r.osworld_vm_url, "https://grpc.nvcf.nvidia.com/api")
+
+    def test_action_to_pyautogui_click(self):
+        """_action_to_pyautogui_command produces click command."""
+        with patch.dict(
+            os.environ,
+            {"NGC_API_KEY": "k", "NVCF_FUNCTION_ID": "f"},
+            clear=False,
+        ):
+            r = OSWorldNVCFRuntime(
+                config=self.config,
+                event_stream=self.event_stream,
+                sid="test",
+            )
+            cmd = r._action_to_pyautogui_command(
+                "CLICK",
+                {"x": 100, "y": 200, "button": "left"},
+            )
+            self.assertIn("pyautogui.click", cmd)
+            self.assertIn("100", cmd)
+            self.assertIn("200", cmd)
+
+    def test_action_to_pyautogui_typing(self):
+        """_action_to_pyautogui_command produces typewrite for TYPING."""
+        with patch.dict(
+            os.environ,
+            {"NGC_API_KEY": "k", "NVCF_FUNCTION_ID": "f"},
+            clear=False,
+        ):
+            r = OSWorldNVCFRuntime(
+                config=self.config,
+                event_stream=self.event_stream,
+                sid="test",
+            )
+            cmd = r._action_to_pyautogui_command(
+                "TYPING",
+                {"text": "hello"},
+            )
+            self.assertIn("pyautogui.typewrite", cmd)
+            self.assertIn("hello", cmd)
+
+    def test_run_action_get_screenshot_mocked_client(self):
+        """run_action(get_screenshot) uses _nvcf_client and returns observation."""
+        with patch.dict(
+            os.environ,
+            {"NGC_API_KEY": "k", "NVCF_FUNCTION_ID": "f"},
+            clear=False,
+        ):
+            r = OSWorldNVCFRuntime(
+                config=self.config,
+                event_stream=self.event_stream,
+                sid="test",
+            )
+            mock_client = MagicMock()
+            mock_client.get.return_value = MagicMock(status_code=200, content=b"png")
+            r._nvcf_client = mock_client
+            r._runtime_initialized = True
+
+            action = OSWorldInteractiveAction(
+                method="get_screenshot",
+                params={},
+                thought="test",
+            )
+            obs = r.run_action(action)
+            self.assertIsNotNone(obs)
+            self.assertEqual(obs.content, "Screenshot captured")
+            mock_client.get.assert_called()
+
+    def test_run_action_execute_action_mocked_client(self):
+        """run_action(execute_action) calls execute_vm_action and returns observation."""
+        with patch.dict(
+            os.environ,
+            {"NGC_API_KEY": "k", "NVCF_FUNCTION_ID": "f"},
+            clear=False,
+        ):
+            r = OSWorldNVCFRuntime(
+                config=self.config,
+                event_stream=self.event_stream,
+                sid="test",
+            )
+            mock_client = MagicMock()
+            mock_client.post.return_value = MagicMock(
+                status_code=200,
+                json=lambda: {"status": "success", "output": "ok"},
+            )
+            r._nvcf_client = mock_client
+            r._runtime_initialized = True
+
+            action = OSWorldInteractiveAction(
+                method="execute_action",
+                params={
+                    "action": {
+                        "action_type": "CLICK",
+                        "parameters": {"x": 10, "y": 10},
+                    }
+                },
+                thought="test",
+            )
+            obs = r.run_action(action)
+            self.assertIsNotNone(obs)
+            self.assertEqual(obs.exit_code, 0)
+            mock_client.post.assert_called()
+
+    def test_run_action_unknown_method_returns_error_observation(self):
+        """Unknown method returns ErrorObservation."""
+        with patch.dict(
+            os.environ,
+            {"NGC_API_KEY": "k", "NVCF_FUNCTION_ID": "f"},
+            clear=False,
+        ):
+            r = OSWorldNVCFRuntime(
+                config=self.config,
+                event_stream=self.event_stream,
+                sid="test",
+            )
+            r._nvcf_client = MagicMock()
+            r._runtime_initialized = True
+
+            action = OSWorldInteractiveAction(
+                method="no_such_method",
+                params={},
+                thought="test",
+            )
+            obs = r.run_action(action)
+            from openhands.events.observation import ErrorObservation
+            self.assertIsInstance(obs, ErrorObservation)
+            self.assertIn("Unknown", obs.content)
+
+
+if __name__ == "__main__":
+    unittest.main()