Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions apps/agent/web-ext.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ if (useBrowserOS) {
}
if (env.BROWSEROS_CDP_PORT) {
chromiumArgs.push(`--browseros-cdp-port=${env.BROWSEROS_CDP_PORT}`)
// Enable HTTP-based CDP so the server can connect
chromiumArgs.push(`--remote-debugging-port=${env.BROWSEROS_CDP_PORT}`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not needed. The chromium starts the CDP port on BROWSEROS_CDP_PORT

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if this is not working, there might be bug on chromium I'll take a look. let me know.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes its not working before that, After adding, cdp tools are added.

}
if (env.BROWSEROS_SERVER_PORT) {
chromiumArgs.push(`--browseros-mcp-port=${env.BROWSEROS_SERVER_PORT}`)
Expand Down
47 changes: 47 additions & 0 deletions apps/server/src/tools/cdp-based/pages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,50 @@ export const handleDialog = defineTool({
response.setIncludePages(true)
},
})

export const closeWindow = defineTool({
name: 'browser_close_window',
description: `Close a browser window by its windowId. Bypasses beforeunload dialogs.`,
annotations: {
category: ToolCategories.TAB_MANAGEMENT,
readOnlyHint: false,
},
schema: {
windowId: z.number().describe('The ID of the window to close'),
},
handler: async (request, response, context) => {
const { windowId } = request.params
const targets = context.browser.targets()
let closedCount = 0

for (const target of targets) {
try {
const targetId = (target as unknown as { _targetId?: string })._targetId
if (!targetId) continue

const session = await target.createCDPSession()
try {
const result = await session.send('Browser.getWindowForTarget', {
targetId,
})

if (result.windowId === windowId) {
await session.send('Target.closeTarget', { targetId })
closedCount++
}
} finally {
await session.detach().catch(() => {})
}
} catch {
// Target may already be closed or not support CDP session
}
}

if (closedCount === 0) {
throw new Error(`No targets found for window ${windowId}`)
}

response.appendResponseLine(`Closed window ${windowId}`)
response.setIncludePages(true)
},
})
4 changes: 3 additions & 1 deletion apps/server/src/tools/cdp-based/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ import type { ToolDefinition } from '../types/tool-definition'

import * as consoleTools from './console'
import * as networkTools from './network'
import { closeWindow } from './pages'

/**
* All available CDP-based browser automation tools
*/
// biome-ignore lint/suspicious/noExplicitAny: heterogeneous tool collection requires any
export const allCdpTools: Array<ToolDefinition<any>> = [
//FIXME: nikhil - figure out the better wway to enable/disable tools
...Object.values(consoleTools),
// ...Object.values(emulationTools),
// ...Object.values(inputTools),
Expand All @@ -23,6 +23,8 @@ export const allCdpTools: Array<ToolDefinition<any>> = [
// ...Object.values(screenshotTools),
// ...Object.values(scriptTools),
// ...Object.values(snapshotTools),
// CDP-based window close (bypasses beforeunload)
closeWindow,
]

// Re-export individual tool modules for selective imports
Expand Down
9 changes: 4 additions & 5 deletions apps/server/src/tools/controller-based/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ export {
ungroupTabs,
updateTabGroup,
} from './tools/tab-management'
// Window Management
export { closeWindow, createWindow } from './tools/window-management'
// Window Management (createWindow uses chrome.windows.create for actual windows)
export { createWindow } from './tools/window-management'
// Types
export type { Context } from './types/context'
export type { ImageContentData, Response } from './types/response'
Expand Down Expand Up @@ -104,9 +104,9 @@ import {
ungroupTabs,
updateTabGroup,
} from './tools/tab-management'
import { closeWindow, createWindow } from './tools/window-management'
import { createWindow } from './tools/window-management'

// Array export for convenience (37 tools total)
// Array export for convenience (36 tools total)
export const allControllerTools = [
getActiveTab,
listTabs,
Expand Down Expand Up @@ -145,5 +145,4 @@ export const allControllerTools = [
searchHistory,
getRecentHistory,
createWindow,
closeWindow,
]
19 changes: 0 additions & 19 deletions apps/server/src/tools/controller-based/tools/window-management.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,22 +49,3 @@ export const createWindow = defineTool<z.ZodRawShape, Context, Response>({
response.addStructuredContent('tabId', data.tabId)
},
})

export const closeWindow = defineTool<z.ZodRawShape, Context, Response>({
name: 'browser_close_window',
description: 'Close a browser window by its windowId.',
annotations: {
category: ToolCategories.TAB_MANAGEMENT,
readOnlyHint: false,
},
schema: {
windowId: z.coerce.number().describe('The ID of the window to close'),
},
handler: async (request, response, context) => {
const { windowId } = request.params as { windowId: number }

await context.executeAction('closeWindow', { windowId })

response.appendResponseLine(`Closed window ${windowId}`)
},
})
38 changes: 32 additions & 6 deletions scripts/build/start-all.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,27 @@ function killPort(port: number): void {
})
}

async function waitForCdp(cdpPort: number, maxAttempts = 60): Promise<void> {
for (let i = 0; i < maxAttempts; i++) {
try {
const response = await fetch(`http://127.0.0.1:${cdpPort}/json/version`, {
signal: AbortSignal.timeout(1000),
})
if (response.ok) {
return
}
} catch {
// CDP not ready yet
}
await new Promise((resolve) => setTimeout(resolve, 500))
}
log(
'server',
COLORS.server,
`Warning: CDP not available after ${maxAttempts * 0.5}s, starting server anyway`,
)
}

function isPortAvailable(port: number): Promise<boolean> {
return new Promise((resolve) => {
const server = createNetServer()
Expand Down Expand Up @@ -185,19 +206,24 @@ async function main() {

const env = createEnvWithMutablePorts(ports, userDataDir)

log('server', COLORS.server, 'Starting server...')
log('agent', COLORS.agent, 'Starting agent...\n')
// Start agent first (launches browser)
log('agent', COLORS.agent, 'Starting agent (browser)...\n')

const serverProc = spawn({
cmd: ['bun', 'run', '--filter', '@browseros/server', 'start'],
const agentProc = spawn({
cmd: ['bun', 'run', '--filter', '@browseros/agent', 'dev'],
cwd: MONOREPO_ROOT,
stdout: 'pipe',
stderr: 'pipe',
env,
})

const agentProc = spawn({
cmd: ['bun', 'run', '--filter', '@browseros/agent', 'dev'],
// Wait for CDP to be available before starting server
log('server', COLORS.server, 'Waiting for CDP to be ready...')
await waitForCdp(ports.cdp)
log('server', COLORS.server, 'CDP ready, starting server...\n')

const serverProc = spawn({
cmd: ['bun', 'run', '--filter', '@browseros/server', 'start'],
cwd: MONOREPO_ROOT,
stdout: 'pipe',
stderr: 'pipe',
Expand Down