Skip to content

Commit dcd4e47

Browse files
quanruclaude
andauthored
feat(playground): add device options configuration for Android/iOS (#1485)
* feat(playground): add device options configuration for Android/iOS This commit implements device-specific configuration options in the playground UI, allowing users to customize device behavior such as keyboard handling and IME strategy. Changes: - Add device options state management with localStorage persistence - Create UI controls for Android-specific options (imeStrategy, autoDismissKeyboard, keyboardDismissStrategy, alwaysRefreshScreenInfo) - Create UI controls for iOS-specific options (autoDismissKeyboard) - Extend execution pipeline to pass deviceOptions from frontend to backend - Update agent.interface.options on the server side when deviceOptions are received - Optimize parameter flattening to avoid delete operator performance issues Technical implementation: - Frontend: Store device options in Zustand with localStorage sync - SDK: Include deviceOptions in remote execution adapter payload - Server: Update agent.interface.options to apply settings globally - This ensures all actions (including those called by aiAct) use the updated options Fixes #1282 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> * feat(playground): add dynamic device type detection for iOS/web playground The universal playground app now detects the device type from the connected server's /interface-info API and displays device-specific configuration options accordingly. This ensures that iOS playground users can see and configure iOS device options (autoDismissKeyboard), while web users see no device-specific options. Related to #1282 * fix(ios): improve keyboard dismissal to prevent accidental UI interactions The previous implementation used a swipe down gesture at a fixed screen position (1/3 from top) which could accidentally click on search results or other UI elements that appeared after text input. Changes: - Use WDA's dismissKeyboard API as the primary method (more reliable) - Fall back to safer swipe gesture (from bottom up) if API fails - Increase wait time from 300ms to 500ms for UI stability - Update autoDismissKeyboard documentation to reflect default behavior Technical details: - WDA API tries common keyboard button names: return, done, go, search, etc. - Swipe fallback uses safer coordinates: from 90% height to 50% height - This prevents accidental taps on UI elements in the upper portion of screen Related to #1282 * feat(screenshot-viewer): add screenshot viewer component with styles and functionality * fix(tests): enhance keyboard dismissal tests to simulate failure scenarios --------- Co-authored-by: Claude <[email protected]>
1 parent 8b2ff8b commit dcd4e47

File tree

18 files changed

+332
-32
lines changed

18 files changed

+332
-32
lines changed

apps/android-playground/src/components/playground-panel/index.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ export default function PlaygroundPanel() {
7777
enableScrollToBottom: true,
7878
serverMode: true,
7979
showEnvConfigReminder: true,
80+
deviceType: 'android',
8081
}}
8182
branding={{
8283
title: 'Android Playground',

apps/playground/src/App.tsx

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ export default function App() {
2323
const [serverOnline, setServerOnline] = useState(false);
2424
const [isUserOperating, setIsUserOperating] = useState(false);
2525
const [isNarrowScreen, setIsNarrowScreen] = useState(false);
26+
const [deviceType, setDeviceType] = useState<'web' | 'android' | 'ios'>(
27+
'web',
28+
);
2629

2730
// Create PlaygroundSDK and storage provider
2831
const playgroundSDK = useMemo(() => {
@@ -50,6 +53,21 @@ export default function App() {
5053
try {
5154
const online = await playgroundSDK.checkStatus();
5255
setServerOnline(online);
56+
57+
// Get device type from server if online
58+
if (online) {
59+
try {
60+
const interfaceInfo = await playgroundSDK.getInterfaceInfo();
61+
if (interfaceInfo?.type) {
62+
const type = interfaceInfo.type.toLowerCase();
63+
if (type === 'android' || type === 'ios' || type === 'web') {
64+
setDeviceType(type as 'web' | 'android' | 'ios');
65+
}
66+
}
67+
} catch (error) {
68+
console.warn('Failed to get interface info:', error);
69+
}
70+
}
5371
} catch (error) {
5472
console.error('Failed to check server status:', error);
5573
setServerOnline(false);
@@ -145,6 +163,7 @@ export default function App() {
145163
enableScrollToBottom: true,
146164
serverMode: true,
147165
showEnvConfigReminder: true,
166+
deviceType,
148167
}}
149168
branding={{
150169
title: 'Playground',

packages/ios/src/device.ts

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ export class IOSDevice implements AbstractInterface {
8080
.boolean()
8181
.optional()
8282
.describe(
83-
'If true, the keyboard will be dismissed after the input is completed. Do not set it unless the user asks you to do so.',
83+
'Whether to dismiss the keyboard after input. Defaults to true if not specified. Set to false to keep the keyboard visible after input.',
8484
),
8585
mode: z
8686
.enum(['replace', 'clear', 'append'])
@@ -763,32 +763,42 @@ ScreenSize: ${size.width}x${size.height} (DPR: ${size.scale})
763763

764764
async hideKeyboard(keyNames?: string[]): Promise<boolean> {
765765
try {
766-
// If keyNames are provided, use them instead of manual swipe down
767-
if (keyNames && keyNames.length > 0) {
766+
// Always try WDA's dismissKeyboard API first (most reliable)
767+
// Use common keyboard button names if not specified
768+
const dismissKeys =
769+
keyNames && keyNames.length > 0
770+
? keyNames
771+
: ['return', 'done', 'go', 'search', 'next', 'send'];
772+
773+
debugDevice(
774+
`Attempting to dismiss keyboard using WDA API with keys: ${dismissKeys.join(', ')}`,
775+
);
776+
777+
try {
778+
await this.wdaBackend.dismissKeyboard(dismissKeys);
779+
debugDevice('Successfully dismissed keyboard using WDA API');
780+
await sleep(500); // Wait longer to ensure UI is stable
781+
return true;
782+
} catch (wdaError) {
768783
debugDevice(
769-
`Using keyNames to dismiss keyboard: ${keyNames.join(', ')}`,
784+
`WDA dismissKeyboard failed, falling back to swipe gesture: ${wdaError}`,
770785
);
771-
await this.wdaBackend.dismissKeyboard(keyNames);
772-
debugDevice('Dismissed keyboard using provided keyNames');
773-
await sleep(300);
774-
return true;
775786
}
776787

777-
// Default behavior: Get window size for swipe coordinates
788+
// Fallback: Use swipe gesture if WDA API fails
789+
// Use safer coordinates: swipe up from bottom of screen
778790
const windowSize = await this.wdaBackend.getWindowSize();
779-
780-
// Calculate swipe coordinates at one-third position of the screen
781791
const centerX = Math.round(windowSize.width / 2);
782-
const startY = Math.round(windowSize.height * 0.33); // Start at one-third from top
783-
const endY = Math.round(windowSize.height * 0.33 + 10); // Swipe down
792+
const startY = Math.round(windowSize.height * 0.9); // Start near bottom
793+
const endY = Math.round(windowSize.height * 0.5); // Swipe up to middle
784794

785-
// Perform swipe down gesture to dismiss keyboard
786-
await this.swipe(centerX, startY, centerX, endY, 50);
795+
// Perform swipe up gesture to dismiss keyboard
796+
await this.swipe(centerX, startY, centerX, endY, 300);
787797
debugDevice(
788-
'Dismissed keyboard with swipe down gesture at screen one-third position',
798+
'Dismissed keyboard with swipe up gesture from bottom of screen',
789799
);
790800

791-
await sleep(300);
801+
await sleep(500); // Wait longer to ensure UI is stable
792802
return true;
793803
} catch (error) {
794804
debugDevice(`Failed to hide keyboard: ${error}`);

packages/ios/tests/unit-test/device.test.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,10 @@ describe('IOSDevice', () => {
424424
});
425425

426426
it('should handle keyboard dismissal with default strategy', async () => {
427-
// Mock getWindowSize and swipe methods since hideKeyboard uses swipe gesture by default
427+
// Mock dismissKeyboard to fail so it falls back to swipe gesture
428+
mockWdaClient.dismissKeyboard = vi
429+
.fn()
430+
.mockRejectedValue(new Error('dismissKeyboard not available'));
428431
mockWdaClient.getWindowSize = vi
429432
.fn()
430433
.mockResolvedValue({ width: 375, height: 812 });
@@ -436,7 +439,10 @@ describe('IOSDevice', () => {
436439
});
437440

438441
it('should handle keyboard dismissal failure', async () => {
439-
// Mock swipe to throw an error to simulate failure
442+
// Mock both dismissKeyboard and swipe to fail to simulate total failure
443+
mockWdaClient.dismissKeyboard = vi
444+
.fn()
445+
.mockRejectedValue(new Error('dismissKeyboard failed'));
440446
mockWdaClient.getWindowSize = vi
441447
.fn()
442448
.mockResolvedValue({ width: 375, height: 812 });
@@ -454,6 +460,9 @@ describe('IOSDevice', () => {
454460
...mockWdaClient,
455461
createSession: vi.fn().mockResolvedValue({ sessionId: 'test-session' }),
456462
typeText: vi.fn().mockResolvedValue(undefined),
463+
dismissKeyboard: vi
464+
.fn()
465+
.mockRejectedValue(new Error('dismissKeyboard not available')),
457466
getWindowSize: vi.fn().mockResolvedValue({ width: 375, height: 812 }),
458467
getScreenScale: vi.fn().mockResolvedValue(2),
459468
swipe: vi.fn().mockResolvedValue(undefined),

packages/playground/src/adapters/remote-execution.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ export class RemoteExecutionAdapter extends BasePlaygroundAdapter {
191191
{ key: 'deepThink', value: options.deepThink },
192192
{ key: 'screenshotIncluded', value: options.screenshotIncluded },
193193
{ key: 'domIncluded', value: options.domIncluded },
194+
{ key: 'deviceOptions', value: options.deviceOptions },
194195
{ key: 'params', value: value.params },
195196
] as const;
196197

packages/playground/src/common.ts

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,13 @@ export async function parseStructuredParams(
5151
? Object.keys((schema as { shape: Record<string, unknown> }).shape)
5252
: [];
5353

54-
const paramObj: Record<string, unknown> = { ...options };
54+
// Start with options and merge deviceOptions into the same level
55+
// Destructure to exclude deviceOptions from the final object
56+
const { deviceOptions: _, ...optionsWithoutDeviceOptions } = options;
57+
const paramObj: Record<string, unknown> = {
58+
...optionsWithoutDeviceOptions,
59+
...(options.deviceOptions || {}),
60+
};
5561

5662
keys.forEach((key) => {
5763
if (
@@ -186,10 +192,19 @@ export async function executeAction(
186192
})
187193
: undefined;
188194

189-
return await activeAgent.callActionInActionSpace(action.name, {
195+
// Flatten deviceOptions into the params
196+
// Destructure to exclude deviceOptions from the final object
197+
const { deviceOptions: _, ...optionsWithoutDeviceOptions } = options;
198+
const actionParams = {
190199
locate: detailedLocateParam,
191-
...options,
192-
});
200+
...optionsWithoutDeviceOptions,
201+
...(options.deviceOptions || {}),
202+
};
203+
204+
return await activeAgent.callActionInActionSpace(
205+
action.name,
206+
actionParams,
207+
);
193208
}
194209
} else {
195210
const prompt = value.prompt;

packages/playground/src/server.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@ class PlaygroundServer {
315315
deepThink,
316316
screenshotIncluded,
317317
domIncluded,
318+
deviceOptions,
318319
} = req.body;
319320

320321
if (!type) {
@@ -323,6 +324,18 @@ class PlaygroundServer {
323324
});
324325
}
325326

327+
// Update device options if provided
328+
if (
329+
deviceOptions &&
330+
this.agent.interface &&
331+
'options' in this.agent.interface
332+
) {
333+
this.agent.interface.options = {
334+
...(this.agent.interface.options || {}),
335+
...deviceOptions,
336+
};
337+
}
338+
326339
// Check if another task is running
327340
if (this.currentTaskId) {
328341
return res.status(409).json({
@@ -376,6 +389,7 @@ class PlaygroundServer {
376389
deepThink,
377390
screenshotIncluded,
378391
domIncluded,
392+
deviceOptions,
379393
},
380394
);
381395
} catch (error: unknown) {

packages/playground/src/types.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,20 @@ export interface ServerResponse {
2323
error?: string;
2424
}
2525

26+
export interface DeviceOptions {
27+
imeStrategy?: 'always-yadb' | 'yadb-for-non-ascii';
28+
autoDismissKeyboard?: boolean;
29+
keyboardDismissStrategy?: 'esc-first' | 'back-first';
30+
alwaysRefreshScreenInfo?: boolean;
31+
}
32+
2633
export interface ExecutionOptions {
2734
deepThink?: boolean;
2835
screenshotIncluded?: boolean;
2936
domIncluded?: boolean | 'visible-only';
3037
context?: any;
3138
requestId?: string;
39+
deviceOptions?: DeviceOptions;
3240
}
3341

3442
// Extended web types for playground

0 commit comments

Comments
 (0)