diff --git a/.gitignore b/.gitignore index 16a3ba29..856dd790 100644 --- a/.gitignore +++ b/.gitignore @@ -60,4 +60,7 @@ call_model.py app_package_name.py .claude/ -.venv \ No newline at end of file +.venv + +*test* +results/ \ No newline at end of file diff --git a/README_UPDATE.md b/README_UPDATE.md new file mode 100644 index 00000000..2291a68a --- /dev/null +++ b/README_UPDATE.md @@ -0,0 +1,486 @@ +# 更新日志 - 新功能说明 + +本文档记录了 Open-AutoGLM 项目的最新功能更新和使用说明。 + +--- + +## 🆕 新增功能 + +### 1. 结果输出功能 (`--output`) + +#### 功能说明 +新增了 `--output`(或 `-o`)参数,可以将任务执行结果保存到指定的文件中。结果统一以 **JSON 数组** 格式保存。 + +#### 使用方法 + +**命令行方式:** +```bash +# 保存结果到 results/result.json 文件 +python main.py --output ./results/result.json "打开微信并发送消息" + +# 使用短参数 +python main.py -o ./results/result.json "打开微信" +``` + +**编程方式:** +```python +from main import main_params + +main_params( + task="打开微信", + output="./results/result.json" +) +``` + +#### 输出说明 +- 结果会保存为指定路径的 JSON 文件 +- 如果父级文件夹不存在,会自动创建 +- 任务完成后会显示:`result保存到{output}文件` + +#### 示例 +```bash +$ python main.py --output ./results/result.json "打开微信" +Task: 打开微信 + +[执行过程...] + +Result: 任务完成 + +result保存到./results/result.json文件 +``` + +--- + +### 2. 结构化 JSON 输出与全量步骤保存 (`--all`) + +#### 功能说明 +现在结果统一以 **JSON 数组** 格式保存。 +新增了 `--all` 参数,允许用户控制是仅保存最终结果,还是保存执行过程中的所有步骤结果。 + +#### 使用方法 + +**命令行方式:** +```bash +# 仅保存最后结果 (默认,输出为单元素 JSON 数组) +python main.py --output ./results/result.json "打开微信" + +# 保存所有步骤的结果 +python main.py --output ./results/all_steps.json --all "打开微信" +``` + +**编程方式:** +```python +from main import main_params + +# 保存所有步骤的结果 +main_params( + task="打开微信", + output="./results/all_steps.json", + save_all=True +) +``` + +#### 输出格式示例 + +**仅保存最后结果时:** +```json +[ + "任务完成" +] +``` + +**保存所有步骤时 (`--all`):** +```json +[ + "正在打开微信", + "已进入微信主界面", + "任务完成" +] +``` + +--- + +### 3. 编程接口 `main_params()` 函数 + +#### 功能说明 +新增了 `main_params()` 函数,支持通过函数参数的方式调用主程序,方便从其他 Python 脚本中集成使用。 + +#### 函数签名 +```python +def main_params( + base_url: str = None, + model: str = None, + apikey: str = None, + max_steps: int = None, + device_id: str = None, + connect: str = None, + disconnect: str = None, + list_devices: bool = False, + enable_tcpip: int = None, + wda_url: str = None, + pair: bool = False, + wda_status: bool = False, + quiet: bool = False, + list_apps: bool = False, + lang: str = None, + device_type: str = None, + output: str = None, + task: str = None, + allow_all_apps: bool = False, + save_all: bool = False, +) -> None +``` + +#### 使用方法 + +**基本示例:** +```python +from main import main_params + +# 使用默认配置 +main_params(task="打开微信") + +# 自定义配置 +main_params( + base_url="http://localhost:8000/v1", + model="autoglm-phone-9b", + task="打开微信并发送消息", + output="./results/result.json", + device_type="adb" +) +``` + +**完整示例:** +```python +from main import main_params + +# iOS 设备示例 +main_params( + base_url="http://localhost:8000/v1", + model="autoglm-phone-9b", + apikey="your-api-key", + device_type="ios", + wda_url="http://localhost:8100", + task="打开Safari并搜索", + output="./ios_results/search_res.json", + max_steps=50, + lang="cn", + save_all=True +) +``` + +#### 参数说明 + +| 参数 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| `base_url` | str | 环境变量或默认值 | 模型API基础URL | +| `model` | str | 环境变量或默认值 | 模型名称 | +| `apikey` | str | 环境变量或默认值 | API密钥 | +| `max_steps` | int | 100 | 最大执行步数 | +| `device_id` | str | None | 设备ID | +| `device_type` | str | "adb" | 设备类型:adb/hdc/ios | +| `output` | str | None | 输出文件路径 (JSON) | +| `task` | str | None | 要执行的任务 | +| `allow_all_apps` | bool | False | 是否允许启动所有应用 | +| `save_all` | bool | False | 是否保存所有中间步骤结果 | +| `lang` | str | "cn" | 语言:cn/en | +| `quiet` | bool | False | 是否静默模式 | +| `wda_url` | str | None | iOS WebDriverAgent URL | +| ... | ... | ... | 其他参数见函数文档 | + +--- + +### 4. 允许所有应用功能 (`--allow-all-apps`) + +#### 功能说明 +新增了 `--allow-all-apps` 参数,允许启动任何应用,不再限制在配置的应用列表中。当启用此选项时,可以直接使用应用的包名(Android)、Bundle ID(iOS)或 Bundle Name(HarmonyOS)来启动应用。 + +#### 使用方法 + +**命令行方式:** +```bash +# 限制在应用列表中(默认行为) +python main.py "打开微信" + +# 允许所有应用,直接使用包名 +python main.py --allow-all-apps "打开com.example.myapp" +``` + +**编程方式:** +```python +from main import main_params + +# 限制在应用列表中 +main_params(task="打开微信", allow_all_apps=False) + +# 允许所有应用 +main_params(task="打开com.example.myapp", allow_all_apps=True) +``` + +#### 使用场景 + +1. **测试未配置的应用** + ```bash + python main.py --allow-all-apps "打开com.example.testapp" + ``` + +2. **使用包名直接启动** + ```bash + # Android + python main.py --allow-all-apps "打开com.android.chrome" + + # iOS + python main.py --device-type ios --allow-all-apps "打开com.apple.Safari" + ``` + +3. **动态应用管理** + - 不需要修改配置文件即可启动新应用 + - 适合开发和测试环境 + +#### 注意事项 + +- 当 `allow_all_apps=True` 时,应用名称会被直接当作包名/Bundle ID使用 +- 确保包名/Bundle ID正确,否则可能无法启动应用 +- 建议在已知包名的情况下使用此功能 + +--- + +### 5. 应用包名查询工具 (`scripts/get_package_name.py`) + +#### 功能说明 +新增了一个实用的 Python 脚本工具,用于查询 Android 应用的包名。支持多种查询方式,方便开发者查找和添加新应用到配置中。 + +#### 安装要求 +- 已安装 ADB 工具 +- 设备已连接并启用 USB 调试 + +#### 使用方法 + +**1. 列出所有第三方应用** +```bash +python scripts/get_package_name.py list +``` + +**2. 列出所有应用(包括系统应用)** +```bash +python scripts/get_package_name.py list-all +``` + +**3. 查看当前前台应用的包名** +```bash +# 先打开你想查询的应用,然后运行: +python scripts/get_package_name.py current +``` + +**4. 搜索包含关键词的包名** +```bash +# 搜索微信相关应用 +python scripts/get_package_name.py search wechat + +# 搜索腾讯相关应用 +python scripts/get_package_name.py search tencent +``` + +**5. 查看应用的详细信息** +```bash +python scripts/get_package_name.py info com.tencent.mm +``` + +**6. 指定设备ID(多设备时)** +```bash +python scripts/get_package_name.py device <设备ID> current +python scripts/get_package_name.py device emulator-5554 list +``` + +#### 使用示例 + +**示例1:查找微信包名** +```bash +$ python scripts/get_package_name.py search tencent +搜索包含 'tencent' 的包名: +------------------------------------------------------------ + com.tencent.mm + com.tencent.mobileqq + com.tencent.qqmusic + com.tencent.qqlive + com.tencent.androidqqmail + com.tencent.news + +找到 6 个匹配的应用 +``` + +**示例2:查看当前应用** +```bash +$ python scripts/get_package_name.py current +当前前台应用包名: com.tencent.mm +应用名称: 微信 +``` + +**示例3:获取应用详细信息** +```bash +$ python scripts/get_package_name.py info com.tencent.mm +应用信息: com.tencent.mm +------------------------------------------------------------ +包名: com.tencent.mm +应用名称: 微信 +版本: 8.0.xx +``` + +#### 添加到配置文件 + +找到包名后,可以添加到 `phone_agent/config/apps.py`: + +```python +APP_PACKAGES: dict[str, str] = { + # ... 现有应用 ... + "新应用名称": "com.example.newapp", # 添加新应用 + "新应用英文名": "com.example.newapp", # 支持多个名称映射到同一包名 +} +``` + +#### 其他查询方法 + +**使用 ADB 命令直接查询:** +```bash +# 列出所有第三方应用 +adb shell pm list packages -3 + +# 搜索特定应用 +adb shell pm list packages | grep wechat + +# 查看当前前台应用 +adb shell dumpsys window | grep -E 'mCurrentFocus|mFocusedApp' +``` + +--- + +## 📝 配置说明 + +### 环境变量支持 + +所有参数都支持通过环境变量设置: + +```bash +# 设置模型API地址 +export PHONE_AGENT_BASE_URL="http://localhost:8000/v1" + +# 设置模型名称 +export PHONE_AGENT_MODEL="autoglm-phone-9b" + +# 设置API密钥 +export PHONE_AGENT_API_KEY="your-api-key" + +# 设置最大步数 +export PHONE_AGENT_MAX_STEPS="100" + +# 设置设备ID +export PHONE_AGENT_DEVICE_ID="emulator-5554" + +# 设置设备类型 +export PHONE_AGENT_DEVICE_TYPE="adb" + +# 设置语言 +export PHONE_AGENT_LANG="cn" + +# iOS WebDriverAgent URL +export PHONE_AGENT_WDA_URL="http://localhost:8100" +``` + +--- + +## 🔧 完整使用示例 + +### 示例1:基本使用 +```bash +python main.py "打开微信并发送消息给张三" +``` + +### 示例2:保存结果到文件 +```bash +python main.py --output ./results/result.json "打开微信" +``` + +### 示例3:允许所有应用 +```bash +python main.py --allow-all-apps "打开com.example.myapp" +``` + +### 示例4:iOS设备使用 +```bash +python main.py \ + --device-type ios \ + --wda-url http://localhost:8100 \ + --output ./ios_results/res.json \ + "打开Safari并搜索" +``` + +### 示例5:编程集成 +```python +from main import main_params + +def my_automation_task(): + result = main_params( + base_url="http://localhost:8000/v1", + model="autoglm-phone-9b", + task="打开微信并发送消息", + output="./results/result.json", + allow_all_apps=False, + max_steps=50 + ) + return result + +if __name__ == "__main__": + my_automation_task() +``` + +--- + +## 🐛 故障排除 + +### 问题1:无法保存结果文件 +**解决方案:** +- 确保有写入权限 +- 检查输出路径是否正确 +- 确保磁盘空间充足 + +### 问题2:无法启动未配置的应用 +**解决方案:** +- 使用 `--allow-all-apps` 参数 +- 或先使用 `scripts/get_package_name.py` 查找包名,然后添加到配置 + +### 问题3:包名查询工具无法使用 +**解决方案:** +- 确保 ADB 已安装并在 PATH 中 +- 确保设备已连接:`adb devices` +- 确保已启用 USB 调试 + +--- + +## 📚 相关文件 + +- `main.py` - 主程序文件,包含所有新功能 +- `phone_agent/config/apps.py` - 应用配置映射 +- `scripts/get_package_name.py` - 包名查询工具 +- `phone_agent/agent.py` - Android/HarmonyOS Agent +- `phone_agent/agent_ios.py` - iOS Agent + +--- + +## 🔄 更新历史 + +### 最新更新 +- ✅ 修改 `output` 参数为具体文件路径,支持 JSON 数组格式 +- ✅ 添加 `--output` 参数支持结果保存 +- ✅ 新增 `main_params()` 编程接口 +- ✅ 添加 `--allow-all-apps` 参数支持所有应用 +- ✅ 创建包名查询工具脚本 + +--- + +## 💡 提示 + +1. **结果保存**:现在支持指定具体 JSON 文件路径 +2. **应用配置**:优先使用配置列表中的应用,更稳定可靠 +3. **包名查询**:使用工具脚本可以快速找到应用的包名 +4. **编程集成**:使用 `main_params()` 可以更好地集成到其他项目中 + diff --git a/main.py b/main.py index 8cdc34b2..865bc111 100755 --- a/main.py +++ b/main.py @@ -514,6 +514,25 @@ def parse_args() -> argparse.Namespace: help="Device type: adb for Android, hdc for HarmonyOS, ios for iPhone (default: adb)", ) + parser.add_argument( + "--output", + "-o", + type=str, + help="Output file path to save result (e.g., results/result.json)", + ) + + parser.add_argument( + "--all", + action="store_true", + help="Save all intermediate step results to the output file, not just the final result", + ) + + parser.add_argument( + "--allow-all-apps", + action="store_true", + help="Allow launching any app, not limited to the configured app list", + ) + parser.add_argument( "task", nargs="?", @@ -681,9 +700,8 @@ def handle_device_commands(args) -> bool: return False -def main(): - """Main entry point.""" - args = parse_args() +def _run_main(args): + """Internal function to run main logic with args object.""" # Set device type globally based on args if args.device_type == "adb": @@ -760,6 +778,7 @@ def main(): device_id=args.device_id, verbose=not args.quiet, lang=args.lang, + allow_all_apps=args.allow_all_apps, ) agent = IOSPhoneAgent( @@ -773,6 +792,7 @@ def main(): device_id=args.device_id, verbose=not args.quiet, lang=args.lang, + allow_all_apps=args.allow_all_apps, ) agent = PhoneAgent( @@ -822,6 +842,30 @@ def main(): print(f"\nTask: {args.task}\n") result = agent.run(args.task) print(f"\nResult: {result}") + + # Save result to file if output is specified + if args.output: + try: + import json + # Create parent directory if it doesn't exist + output_dir = os.path.dirname(args.output) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + + # Prepare data to save + if getattr(args, "all", False): + # Save all steps + save_data = [res.message or res.thinking for res in agent.history] + else: + # Save only final result + save_data = [result] + + # Save to the specified output file + with open(args.output, "w", encoding="utf-8") as f: + json.dump(save_data, f, ensure_ascii=False, indent=4) + print(f"\nresult保存到{args.output}文件") + except Exception as e: + print(f"\n保存结果到文件失败: {e}") else: # Interactive mode print("\nEntering interactive mode. Type 'quit' to exit.\n") @@ -840,6 +884,31 @@ def main(): print() result = agent.run(task) print(f"\nResult: {result}\n") + + # Save result to file if output is specified + if args.output: + try: + import json + # Create parent directory if it doesn't exist + output_dir = os.path.dirname(args.output) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + + # Prepare data to save + if getattr(args, "all", False): + # Save all steps + save_data = [res.message or res.thinking for res in agent.history] + else: + # Save only final result + save_data = [result] + + # Save to the specified output file + with open(args.output, "w", encoding="utf-8") as f: + json.dump(save_data, f, ensure_ascii=False, indent=4) + print(f"result保存到{args.output}文件\n") + except Exception as e: + print(f"保存结果到文件失败: {e}\n") + agent.reset() except KeyboardInterrupt: @@ -849,5 +918,89 @@ def main(): print(f"\nError: {e}\n") +def main(): + """Main entry point.""" + args = parse_args() + _run_main(args) + + +def main_params( + base_url: str = None, + model: str = None, + apikey: str = None, + max_steps: int = None, + device_id: str = None, + connect: str = None, + disconnect: str = None, + list_devices: bool = False, + enable_tcpip: int = None, + wda_url: str = None, + pair: bool = False, + wda_status: bool = False, + quiet: bool = False, + list_apps: bool = False, + lang: str = None, + device_type: str = None, + output: str = None, + task: str = None, + allow_all_apps: bool = False, + save_all: bool = False, +): + """ + Main entry point with parameters. + + Args: + base_url: Model API base URL + model: Model name + apikey: API key for model authentication + max_steps: Maximum steps per task + device_id: Device ID + connect: Connect to remote device (e.g., "192.168.1.100:5555") + disconnect: Disconnect from remote device (or "all" to disconnect all) + list_devices: List connected devices and exit + enable_tcpip: Enable TCP/IP debugging on USB device (port number) + wda_url: WebDriverAgent URL for iOS + pair: Pair with iOS device + wda_status: Show WebDriverAgent status and exit (iOS only) + quiet: Suppress verbose output + list_apps: List supported apps and exit + lang: Language for system prompt (cn or en) + device_type: Device type (adb, hdc, or ios) + output: Output file path to save result (e.g., results/result.json) + task: Task to execute + allow_all_apps: Allow launching any app, not limited to the configured app list + save_all: Save all intermediate step results to the output file, not just the final result + """ + # Create a namespace object similar to argparse.Namespace + class Args: + pass + + args = Args() + + # Set default values from environment or defaults + args.base_url = base_url if base_url is not None else os.getenv("PHONE_AGENT_BASE_URL", "http://localhost:8000/v1") + args.model = model if model is not None else os.getenv("PHONE_AGENT_MODEL", "autoglm-phone-9b") + args.apikey = apikey if apikey is not None else os.getenv("PHONE_AGENT_API_KEY", "EMPTY") + args.max_steps = max_steps if max_steps is not None else int(os.getenv("PHONE_AGENT_MAX_STEPS", "100")) + args.device_id = device_id if device_id is not None else os.getenv("PHONE_AGENT_DEVICE_ID") + args.connect = connect + args.disconnect = disconnect + args.list_devices = list_devices + args.enable_tcpip = enable_tcpip + args.wda_url = wda_url if wda_url is not None else os.getenv("PHONE_AGENT_WDA_URL", "http://localhost:8100") + args.pair = pair + args.wda_status = wda_status + args.quiet = quiet + args.list_apps = list_apps + args.lang = lang if lang is not None else os.getenv("PHONE_AGENT_LANG", "cn") + args.device_type = device_type if device_type is not None else os.getenv("PHONE_AGENT_DEVICE_TYPE", "adb") + args.output = output + args.task = task + args.allow_all_apps = allow_all_apps + args.all = save_all + + _run_main(args) + + if __name__ == "__main__": main() diff --git a/phone_agent/actions/handler.py b/phone_agent/actions/handler.py index 0bef1c3a..f8a0afb7 100644 --- a/phone_agent/actions/handler.py +++ b/phone_agent/actions/handler.py @@ -37,10 +37,12 @@ def __init__( device_id: str | None = None, confirmation_callback: Callable[[str], bool] | None = None, takeover_callback: Callable[[str], None] | None = None, + allow_all_apps: bool = False, ): self.device_id = device_id self.confirmation_callback = confirmation_callback or self._default_confirmation self.takeover_callback = takeover_callback or self._default_takeover + self.allow_all_apps = allow_all_apps def execute( self, action: dict[str, Any], screen_width: int, screen_height: int @@ -122,7 +124,7 @@ def _handle_launch(self, action: dict, width: int, height: int) -> ActionResult: return ActionResult(False, False, "No app name specified") device_factory = get_device_factory() - success = device_factory.launch_app(app_name, self.device_id) + success = device_factory.launch_app(app_name, self.device_id, allow_all_apps=self.allow_all_apps) if success: return ActionResult(True, False) return ActionResult(False, False, f"App not found: {app_name}") diff --git a/phone_agent/actions/handler_ios.py b/phone_agent/actions/handler_ios.py index c37f50d9..9354af36 100644 --- a/phone_agent/actions/handler_ios.py +++ b/phone_agent/actions/handler_ios.py @@ -44,11 +44,13 @@ def __init__( session_id: str | None = None, confirmation_callback: Callable[[str], bool] | None = None, takeover_callback: Callable[[str], None] | None = None, + allow_all_apps: bool = False, ): self.wda_url = wda_url self.session_id = session_id self.confirmation_callback = confirmation_callback or self._default_confirmation self.takeover_callback = takeover_callback or self._default_takeover + self.allow_all_apps = allow_all_apps def execute( self, action: dict[str, Any], screen_width: int, screen_height: int @@ -130,7 +132,7 @@ def _handle_launch(self, action: dict, width: int, height: int) -> ActionResult: return ActionResult(False, False, "No app name specified") success = launch_app( - app_name, wda_url=self.wda_url, session_id=self.session_id + app_name, wda_url=self.wda_url, session_id=self.session_id, allow_all_apps=self.allow_all_apps ) if success: return ActionResult(True, False) diff --git a/phone_agent/adb/device.py b/phone_agent/adb/device.py index 995336a1..59624893 100644 --- a/phone_agent/adb/device.py +++ b/phone_agent/adb/device.py @@ -206,15 +206,16 @@ def home(device_id: str | None = None, delay: float | None = None) -> None: def launch_app( - app_name: str, device_id: str | None = None, delay: float | None = None + app_name: str, device_id: str | None = None, delay: float | None = None, allow_all_apps: bool = False ) -> bool: """ Launch an app by name. Args: - app_name: The app name (must be in APP_PACKAGES). + app_name: The app name (must be in APP_PACKAGES if allow_all_apps is False). device_id: Optional ADB device ID. delay: Delay in seconds after launching. If None, uses configured default. + allow_all_apps: If True, allow launching any app by package name, not limited to APP_PACKAGES. Returns: True if app was launched, False if app not found. @@ -222,11 +223,16 @@ def launch_app( if delay is None: delay = TIMING_CONFIG.device.default_launch_delay - if app_name not in APP_PACKAGES: - return False - adb_prefix = _get_adb_prefix(device_id) - package = APP_PACKAGES[app_name] + + # If allow_all_apps is True, use app_name directly as package name + # Otherwise, check if app_name is in APP_PACKAGES + if allow_all_apps: + package = app_name # Use app_name directly as package name + else: + if app_name not in APP_PACKAGES: + return False + package = APP_PACKAGES[app_name] subprocess.run( adb_prefix diff --git a/phone_agent/agent.py b/phone_agent/agent.py index 36427917..1e716550 100644 --- a/phone_agent/agent.py +++ b/phone_agent/agent.py @@ -22,6 +22,7 @@ class AgentConfig: lang: str = "cn" system_prompt: str | None = None verbose: bool = True + allow_all_apps: bool = False def __post_init__(self): if self.system_prompt is None: @@ -80,6 +81,7 @@ def __init__( self._context: list[dict[str, Any]] = [] self._step_count = 0 + self.history: list[StepResult] = [] def run(self, task: str) -> str: """ @@ -93,9 +95,11 @@ def run(self, task: str) -> str: """ self._context = [] self._step_count = 0 + self.history = [] # First step with user prompt result = self._execute_step(task, is_first=True) + self.history.append(result) if result.finished: return result.message or "Task completed" @@ -103,6 +107,7 @@ def run(self, task: str) -> str: # Continue until finished or max steps reached while self._step_count < self.agent_config.max_steps: result = self._execute_step(is_first=False) + self.history.append(result) if result.finished: return result.message or "Task completed" diff --git a/phone_agent/agent_ios.py b/phone_agent/agent_ios.py index a3b20d9f..c87d7170 100644 --- a/phone_agent/agent_ios.py +++ b/phone_agent/agent_ios.py @@ -24,6 +24,7 @@ class IOSAgentConfig: lang: str = "cn" system_prompt: str | None = None verbose: bool = True + allow_all_apps: bool = False def __post_init__(self): if self.system_prompt is None: @@ -94,10 +95,12 @@ def __init__( session_id=self.agent_config.session_id, confirmation_callback=confirmation_callback, takeover_callback=takeover_callback, + allow_all_apps=self.agent_config.allow_all_apps, ) self._context: list[dict[str, Any]] = [] self._step_count = 0 + self.history: list[StepResult] = [] def run(self, task: str) -> str: """ @@ -111,9 +114,11 @@ def run(self, task: str) -> str: """ self._context = [] self._step_count = 0 + self.history = [] # First step with user prompt result = self._execute_step(task, is_first=True) + self.history.append(result) if result.finished: return result.message or "Task completed" @@ -121,6 +126,7 @@ def run(self, task: str) -> str: # Continue until finished or max steps reached while self._step_count < self.agent_config.max_steps: result = self._execute_step(is_first=False) + self.history.append(result) if result.finished: return result.message or "Task completed" diff --git a/phone_agent/device_factory.py b/phone_agent/device_factory.py index 915ff52b..1a30c181 100644 --- a/phone_agent/device_factory.py +++ b/phone_agent/device_factory.py @@ -100,10 +100,10 @@ def home(self, device_id: str | None = None, delay: float | None = None): return self.module.home(device_id, delay) def launch_app( - self, app_name: str, device_id: str | None = None, delay: float | None = None + self, app_name: str, device_id: str | None = None, delay: float | None = None, allow_all_apps: bool = False ) -> bool: """Launch an app.""" - return self.module.launch_app(app_name, device_id, delay) + return self.module.launch_app(app_name, device_id, delay, allow_all_apps) def type_text(self, text: str, device_id: str | None = None): """Type text.""" diff --git a/phone_agent/hdc/device.py b/phone_agent/hdc/device.py index 63f23c3a..7e268164 100644 --- a/phone_agent/hdc/device.py +++ b/phone_agent/hdc/device.py @@ -218,15 +218,16 @@ def home(device_id: str | None = None, delay: float | None = None) -> None: def launch_app( - app_name: str, device_id: str | None = None, delay: float | None = None + app_name: str, device_id: str | None = None, delay: float | None = None, allow_all_apps: bool = False ) -> bool: """ Launch an app by name. Args: - app_name: The app name (must be in APP_PACKAGES). + app_name: The app name (must be in APP_PACKAGES if allow_all_apps is False). device_id: Optional HDC device ID. delay: Delay in seconds after launching. If None, uses configured default. + allow_all_apps: If True, allow launching any app by bundle name, not limited to APP_PACKAGES. Returns: True if app was launched, False if app not found. @@ -234,17 +235,23 @@ def launch_app( if delay is None: delay = TIMING_CONFIG.device.default_launch_delay - if app_name not in APP_PACKAGES: - print(f"[HDC] App '{app_name}' not found in HarmonyOS app list") - print(f"[HDC] Available apps: {', '.join(sorted(APP_PACKAGES.keys())[:10])}...") - return False - hdc_prefix = _get_hdc_prefix(device_id) - bundle = APP_PACKAGES[app_name] - - # Get the ability name for this bundle - # Default to "EntryAbility" if not specified in APP_ABILITIES - ability = APP_ABILITIES.get(bundle, "EntryAbility") + + # If allow_all_apps is True, use app_name directly as bundle name + # Otherwise, check if app_name is in APP_PACKAGES + if allow_all_apps: + bundle = app_name # Use app_name directly as bundle name + # Default to "EntryAbility" if not specified in APP_ABILITIES + ability = APP_ABILITIES.get(bundle, "EntryAbility") + else: + if app_name not in APP_PACKAGES: + print(f"[HDC] App '{app_name}' not found in HarmonyOS app list") + print(f"[HDC] Available apps: {', '.join(sorted(APP_PACKAGES.keys())[:10])}...") + return False + bundle = APP_PACKAGES[app_name] + # Get the ability name for this bundle + # Default to "EntryAbility" if not specified in APP_ABILITIES + ability = APP_ABILITIES.get(bundle, "EntryAbility") # HarmonyOS uses 'aa start' command to launch apps # Format: aa start -b {bundle} -a {ability} diff --git a/phone_agent/xctest/device.py b/phone_agent/xctest/device.py index 49fc379c..c867fad5 100644 --- a/phone_agent/xctest/device.py +++ b/phone_agent/xctest/device.py @@ -355,26 +355,33 @@ def launch_app( wda_url: str = "http://localhost:8100", session_id: str | None = None, delay: float = 1.0, + allow_all_apps: bool = False, ) -> bool: """ Launch an app by name. Args: - app_name: The app name (must be in APP_PACKAGES). + app_name: The app name (must be in APP_PACKAGES if allow_all_apps is False). wda_url: WebDriverAgent URL. session_id: Optional WDA session ID. delay: Delay in seconds after launching. + allow_all_apps: If True, allow launching any app by bundle ID, not limited to APP_PACKAGES. Returns: True if app was launched, False if app not found. """ - if app_name not in APP_PACKAGES: - return False - try: import requests - bundle_id = APP_PACKAGES[app_name] + # If allow_all_apps is True, use app_name directly as bundle ID + # Otherwise, check if app_name is in APP_PACKAGES + if allow_all_apps: + bundle_id = app_name # Use app_name directly as bundle ID + else: + if app_name not in APP_PACKAGES: + return False + bundle_id = APP_PACKAGES[app_name] + url = _get_wda_session_url(wda_url, session_id, "wda/apps/launch") response = requests.post( diff --git a/scripts/get_package_name.py b/scripts/get_package_name.py new file mode 100644 index 00000000..e305325a --- /dev/null +++ b/scripts/get_package_name.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 +""" +工具脚本:查询Android应用的包名 +""" + +import subprocess +import sys +import re + + +def list_all_packages(device_id: str | None = None, third_party_only: bool = False) -> list[str]: + """ + 列出所有已安装应用的包名。 + + Args: + device_id: 可选的设备ID + third_party_only: 是否只显示第三方应用 + + Returns: + 包名列表 + """ + adb_prefix = ["adb"] + if device_id: + adb_prefix = ["adb", "-s", device_id] + + cmd = adb_prefix + ["shell", "pm", "list", "packages"] + if third_party_only: + cmd.append("-3") + + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"错误: {result.stderr}") + return [] + + packages = [] + for line in result.stdout.strip().split("\n"): + if line.startswith("package:"): + package = line.replace("package:", "").strip() + packages.append(package) + + return packages + + +def get_current_package(device_id: str | None = None) -> str | None: + """ + 获取当前前台应用的包名。 + + Args: + device_id: 可选的设备ID + + Returns: + 包名,如果无法获取则返回None + """ + adb_prefix = ["adb"] + if device_id: + adb_prefix = ["adb", "-s", device_id] + + result = subprocess.run( + adb_prefix + ["shell", "dumpsys", "window"], + capture_output=True, + text=True + ) + + if result.returncode != 0: + return None + + output = result.stdout + # 查找 mCurrentFocus 或 mFocusedApp + for line in output.split("\n"): + if "mCurrentFocus" in line or "mFocusedApp" in line: + # 提取包名,格式通常是 com.package.name/ActivityName + match = re.search(r'([a-z][a-z0-9_]*\.)+[a-z][a-z0-9_]*', line) + if match: + return match.group(0) + + return None + + +def search_packages(keyword: str, device_id: str | None = None) -> list[str]: + """ + 搜索包含关键词的包名。 + + Args: + keyword: 搜索关键词 + device_id: 可选的设备ID + + Returns: + 匹配的包名列表 + """ + all_packages = list_all_packages(device_id, third_party_only=False) + keyword_lower = keyword.lower() + return [pkg for pkg in all_packages if keyword_lower in pkg.lower()] + + +def get_app_info(package_name: str, device_id: str | None = None) -> dict: + """ + 获取应用的详细信息。 + + Args: + package_name: 包名 + device_id: 可选的设备ID + + Returns: + 应用信息字典 + """ + adb_prefix = ["adb"] + if device_id: + adb_prefix = ["adb", "-s", device_id] + + # 获取应用信息 + result = subprocess.run( + adb_prefix + ["shell", "dumpsys", "package", package_name], + capture_output=True, + text=True + ) + + info = { + "package": package_name, + "installed": False, + "version": None, + "label": None, + } + + if result.returncode == 0: + output = result.stdout + info["installed"] = True + + # 提取版本信息 + version_match = re.search(r'versionName=([^\s]+)', output) + if version_match: + info["version"] = version_match.group(1) + + # 提取应用标签(需要从另一个命令获取) + label_result = subprocess.run( + adb_prefix + ["shell", "pm", "dump", package_name], + capture_output=True, + text=True + ) + if label_result.returncode == 0: + label_match = re.search(r'label=([^\s]+)', label_result.stdout) + if label_match: + info["label"] = label_match.group(1) + + return info + + +def main(): + """主函数""" + if len(sys.argv) < 2: + print("用法:") + print(" python get_package_name.py list # 列出所有第三方应用") + print(" python get_package_name.py list-all # 列出所有应用(包括系统应用)") + print(" python get_package_name.py current # 显示当前前台应用的包名") + print(" python get_package_name.py search <关键词> # 搜索包含关键词的包名") + print(" python get_package_name.py info <包名> # 显示应用的详细信息") + print(" python get_package_name.py device <设备ID> <命令> # 指定设备ID") + sys.exit(1) + + command = sys.argv[1] + device_id = None + + # 检查是否有设备ID参数 + if command == "device" and len(sys.argv) >= 4: + device_id = sys.argv[2] + command = sys.argv[3] + args = sys.argv[4:] + else: + args = sys.argv[2:] + + if command == "list": + print("第三方应用包名列表:") + print("-" * 60) + packages = list_all_packages(device_id, third_party_only=True) + for pkg in sorted(packages): + print(f" {pkg}") + print(f"\n共 {len(packages)} 个应用") + + elif command == "list-all": + print("所有应用包名列表:") + print("-" * 60) + packages = list_all_packages(device_id, third_party_only=False) + for pkg in sorted(packages): + print(f" {pkg}") + print(f"\n共 {len(packages)} 个应用") + + elif command == "current": + package = get_current_package(device_id) + if package: + print(f"当前前台应用包名: {package}") + info = get_app_info(package, device_id) + if info.get("label"): + print(f"应用名称: {info['label']}") + else: + print("无法获取当前应用的包名") + + elif command == "search": + if not args: + print("错误: 请提供搜索关键词") + sys.exit(1) + keyword = args[0] + print(f"搜索包含 '{keyword}' 的包名:") + print("-" * 60) + packages = search_packages(keyword, device_id) + if packages: + for pkg in sorted(packages): + print(f" {pkg}") + print(f"\n找到 {len(packages)} 个匹配的应用") + else: + print("未找到匹配的应用") + + elif command == "info": + if not args: + print("错误: 请提供包名") + sys.exit(1) + package = args[0] + print(f"应用信息: {package}") + print("-" * 60) + info = get_app_info(package, device_id) + if info["installed"]: + print(f"包名: {info['package']}") + if info.get("label"): + print(f"应用名称: {info['label']}") + if info.get("version"): + print(f"版本: {info['version']}") + else: + print(f"错误: 未找到包名为 '{package}' 的应用") + + else: + print(f"错误: 未知命令 '{command}'") + sys.exit(1) + + +if __name__ == "__main__": + main() +