diff --git a/apps/agent/web-ext.config.ts b/apps/agent/web-ext.config.ts index a5f5b376..78733ccf 100644 --- a/apps/agent/web-ext.config.ts +++ b/apps/agent/web-ext.config.ts @@ -21,6 +21,8 @@ if (useBrowserOS) { } if (env.BROWSEROS_CDP_PORT) { chromiumArgs.push(`--browseros-cdp-port=${env.BROWSEROS_CDP_PORT}`) + // Enable HTTP-based CDP so the server can connect + chromiumArgs.push(`--remote-debugging-port=${env.BROWSEROS_CDP_PORT}`) } if (env.BROWSEROS_SERVER_PORT) { chromiumArgs.push(`--browseros-mcp-port=${env.BROWSEROS_SERVER_PORT}`) diff --git a/apps/server/src/tools/cdp-based/pages.ts b/apps/server/src/tools/cdp-based/pages.ts index ff1a610c..88adf766 100644 --- a/apps/server/src/tools/cdp-based/pages.ts +++ b/apps/server/src/tools/cdp-based/pages.ts @@ -228,3 +228,50 @@ export const handleDialog = defineTool({ response.setIncludePages(true) }, }) + +export const closeWindow = defineTool({ + name: 'browser_close_window', + description: `Close a browser window by its windowId. Bypasses beforeunload dialogs.`, + annotations: { + category: ToolCategories.TAB_MANAGEMENT, + readOnlyHint: false, + }, + schema: { + windowId: z.number().describe('The ID of the window to close'), + }, + handler: async (request, response, context) => { + const { windowId } = request.params + const targets = context.browser.targets() + let closedCount = 0 + + for (const target of targets) { + try { + const targetId = (target as unknown as { _targetId?: string })._targetId + if (!targetId) continue + + const session = await target.createCDPSession() + try { + const result = await session.send('Browser.getWindowForTarget', { + targetId, + }) + + if (result.windowId === windowId) { + await session.send('Target.closeTarget', { targetId }) + closedCount++ + } + } finally { + await session.detach().catch(() => {}) + } + } catch { + // Target may already be closed or not support CDP session + } + } + + if (closedCount === 0) { + throw new Error(`No targets found for window ${windowId}`) + } + + response.appendResponseLine(`Closed window ${windowId}`) + response.setIncludePages(true) + }, +}) diff --git a/apps/server/src/tools/cdp-based/registry.ts b/apps/server/src/tools/cdp-based/registry.ts index 1f21437c..116afc01 100644 --- a/apps/server/src/tools/cdp-based/registry.ts +++ b/apps/server/src/tools/cdp-based/registry.ts @@ -6,13 +6,13 @@ import type { ToolDefinition } from '../types/tool-definition' import * as consoleTools from './console' import * as networkTools from './network' +import { closeWindow } from './pages' /** * All available CDP-based browser automation tools */ // biome-ignore lint/suspicious/noExplicitAny: heterogeneous tool collection requires any export const allCdpTools: Array> = [ - //FIXME: nikhil - figure out the better wway to enable/disable tools ...Object.values(consoleTools), // ...Object.values(emulationTools), // ...Object.values(inputTools), @@ -23,6 +23,8 @@ export const allCdpTools: Array> = [ // ...Object.values(screenshotTools), // ...Object.values(scriptTools), // ...Object.values(snapshotTools), + // CDP-based window close (bypasses beforeunload) + closeWindow, ] // Re-export individual tool modules for selective imports diff --git a/apps/server/src/tools/controller-based/registry.ts b/apps/server/src/tools/controller-based/registry.ts index ae2b0c69..56235627 100644 --- a/apps/server/src/tools/controller-based/registry.ts +++ b/apps/server/src/tools/controller-based/registry.ts @@ -55,8 +55,8 @@ export { ungroupTabs, updateTabGroup, } from './tools/tab-management' -// Window Management -export { closeWindow, createWindow } from './tools/window-management' +// Window Management (createWindow uses chrome.windows.create for actual windows) +export { createWindow } from './tools/window-management' // Types export type { Context } from './types/context' export type { ImageContentData, Response } from './types/response' @@ -104,9 +104,9 @@ import { ungroupTabs, updateTabGroup, } from './tools/tab-management' -import { closeWindow, createWindow } from './tools/window-management' +import { createWindow } from './tools/window-management' -// Array export for convenience (37 tools total) +// Array export for convenience (36 tools total) export const allControllerTools = [ getActiveTab, listTabs, @@ -145,5 +145,4 @@ export const allControllerTools = [ searchHistory, getRecentHistory, createWindow, - closeWindow, ] diff --git a/apps/server/src/tools/controller-based/tools/window-management.ts b/apps/server/src/tools/controller-based/tools/window-management.ts index cfa9d65f..a5b1a6d6 100644 --- a/apps/server/src/tools/controller-based/tools/window-management.ts +++ b/apps/server/src/tools/controller-based/tools/window-management.ts @@ -49,22 +49,3 @@ export const createWindow = defineTool({ response.addStructuredContent('tabId', data.tabId) }, }) - -export const closeWindow = defineTool({ - name: 'browser_close_window', - description: 'Close a browser window by its windowId.', - annotations: { - category: ToolCategories.TAB_MANAGEMENT, - readOnlyHint: false, - }, - schema: { - windowId: z.coerce.number().describe('The ID of the window to close'), - }, - handler: async (request, response, context) => { - const { windowId } = request.params as { windowId: number } - - await context.executeAction('closeWindow', { windowId }) - - response.appendResponseLine(`Closed window ${windowId}`) - }, -}) diff --git a/scripts/build/start-all.ts b/scripts/build/start-all.ts index 776959ca..5eca1386 100644 --- a/scripts/build/start-all.ts +++ b/scripts/build/start-all.ts @@ -53,6 +53,27 @@ function killPort(port: number): void { }) } +async function waitForCdp(cdpPort: number, maxAttempts = 60): Promise { + for (let i = 0; i < maxAttempts; i++) { + try { + const response = await fetch(`http://127.0.0.1:${cdpPort}/json/version`, { + signal: AbortSignal.timeout(1000), + }) + if (response.ok) { + return + } + } catch { + // CDP not ready yet + } + await new Promise((resolve) => setTimeout(resolve, 500)) + } + log( + 'server', + COLORS.server, + `Warning: CDP not available after ${maxAttempts * 0.5}s, starting server anyway`, + ) +} + function isPortAvailable(port: number): Promise { return new Promise((resolve) => { const server = createNetServer() @@ -185,19 +206,24 @@ async function main() { const env = createEnvWithMutablePorts(ports, userDataDir) - log('server', COLORS.server, 'Starting server...') - log('agent', COLORS.agent, 'Starting agent...\n') + // Start agent first (launches browser) + log('agent', COLORS.agent, 'Starting agent (browser)...\n') - const serverProc = spawn({ - cmd: ['bun', 'run', '--filter', '@browseros/server', 'start'], + const agentProc = spawn({ + cmd: ['bun', 'run', '--filter', '@browseros/agent', 'dev'], cwd: MONOREPO_ROOT, stdout: 'pipe', stderr: 'pipe', env, }) - const agentProc = spawn({ - cmd: ['bun', 'run', '--filter', '@browseros/agent', 'dev'], + // Wait for CDP to be available before starting server + log('server', COLORS.server, 'Waiting for CDP to be ready...') + await waitForCdp(ports.cdp) + log('server', COLORS.server, 'CDP ready, starting server...\n') + + const serverProc = spawn({ + cmd: ['bun', 'run', '--filter', '@browseros/server', 'start'], cwd: MONOREPO_ROOT, stdout: 'pipe', stderr: 'pipe',