diff --git a/agent_reach/cli.py b/agent_reach/cli.py index 8fde2a9..1c8ff21 100644 --- a/agent_reach/cli.py +++ b/agent_reach/cli.py @@ -266,43 +266,66 @@ def _cmd_install(args): def _install_skill(): - """Install Agent Reach as an agent skill (OpenClaw / Claude Code).""" + """Install Agent Reach as an agent skill (OpenClaw / Claude Code / .agents).""" import os + import shutil import importlib.resources - # Determine skill install path + def _copy_skill_dir(target: str) -> bool: + """Copy entire skill directory (SKILL.md + references/).""" + try: + # Clear existing installation + if os.path.exists(target): + shutil.rmtree(target) + os.makedirs(target, exist_ok=True) + + # Get skill directory from package + skill_pkg = importlib.resources.files("agent_reach").joinpath("skill") + + # Copy SKILL.md + skill_md = skill_pkg.joinpath("SKILL.md").read_text() + with open(os.path.join(target, "SKILL.md"), "w") as f: + f.write(skill_md) + + # Copy references/ directory + refs_pkg = skill_pkg.joinpath("references") + refs_target = os.path.join(target, "references") + os.makedirs(refs_target, exist_ok=True) + + for ref_file in refs_pkg.iterdir(): + if ref_file.suffix == ".md": + content = ref_file.read_text() + with open(os.path.join(refs_target, ref_file.name), "w") as f: + f.write(content) + + return True + except Exception as e: + print(f" Warning: Could not install skill: {e}") + return False + + # Determine skill install path (priority: .agents > openclaw > claude) skill_dirs = [ - os.path.expanduser("~/.openclaw/skills"), # OpenClaw + os.path.expanduser("~/.agents/skills"), # Generic agents (priority) + os.path.expanduser("~/.openclaw/skills"), # OpenClaw os.path.expanduser("~/.claude/skills"), # Claude Code (if exists) - os.path.expanduser("~/.agents/skills"), # Generic agents ] installed = False for skill_dir in skill_dirs: if os.path.isdir(skill_dir): target = os.path.join(skill_dir, "agent-reach") - try: - os.makedirs(target, exist_ok=True) - # Read SKILL.md from package data - skill_md = importlib.resources.files("agent_reach").joinpath("skill", "SKILL.md").read_text() - with open(os.path.join(target, "SKILL.md"), "w") as f: - f.write(skill_md) - platform_name = "OpenClaw" if "openclaw" in skill_dir else "Claude Code" if "claude" in skill_dir else "Agent" + if _copy_skill_dir(target): + platform_name = "Agent" if ".agents" in skill_dir else "OpenClaw" if "openclaw" in skill_dir else "Claude Code" print(f"Skill installed for {platform_name}: {target}") installed = True - except Exception: - pass if not installed: - # No known skill directory found — create for OpenClaw by default - target = os.path.expanduser("~/.openclaw/skills/agent-reach") - try: - os.makedirs(target, exist_ok=True) - skill_md = importlib.resources.files("agent_reach").joinpath("skill", "SKILL.md").read_text() - with open(os.path.join(target, "SKILL.md"), "w") as f: - f.write(skill_md) + # No known skill directory found — create for .agents by default + target = os.path.expanduser("~/.agents/skills/agent-reach") + os.makedirs(os.path.dirname(target), exist_ok=True) + if _copy_skill_dir(target): print(f"Skill installed: {target}") - except Exception: + else: print(" -- Could not install agent skill (optional)") diff --git a/agent_reach/skill/SKILL.md b/agent_reach/skill/SKILL.md index cf0be2a..1648afa 100644 --- a/agent_reach/skill/SKILL.md +++ b/agent_reach/skill/SKILL.md @@ -2,232 +2,98 @@ name: agent-reach description: > Give your AI agent eyes to see the entire internet. - Search and read 16 platforms: Twitter/X, Reddit, YouTube, GitHub, Bilibili, - XiaoHongShu, Douyin, Weibo, WeChat Articles, Xiaoyuzhou Podcast, LinkedIn, - Instagram, V2EX, RSS, Exa web search, and any web page. - Zero config for 8 channels. Use when user asks to search, read, or interact - on any supported platform, shares a URL, or asks to search the web. - Triggers: "搜推特", "搜小红书", "看视频", "搜一下", "上网搜", "帮我查", - "search twitter", "youtube transcript", "search reddit", "read this link", - "B站", "bilibili", "抖音视频", "微信文章", "公众号", "微博", "V2EX", - "小宇宙", "播客", "podcast", "web search", "research", "帮我安装". + 17 platforms via CLI, MCP, curl, and Python scripts. + Zero config for 8 channels. + + 【路由方式】SKILL.md 包含路由表和常用命令,复杂场景需按需阅读对应分类的 references/*.md。 + 分类:search / social (小红书/抖音/微博/推特/B站/V2EX/Reddit) / career(LinkedIn) / dev(github) / web(网页/文章/公众号/RSS) / video(YouTube/B站/播客). + + Use when user asks to search, read, or interact on any supported platform, + shares a URL, or asks to search the web. +triggers: + - search: 搜/查/找/search/搜索/查一下/帮我搜 + - social: + - 小红书: xiaohongshu/xhs/小红书/红书 + - 抖音: douyin/抖音 + - Twitter: twitter/推特/x.com/推文 + - 微博: weibo/微博 + - B站: bilibili/b站/哔哩哔哩 + - V2EX: v2ex + - Reddit: reddit + - career: 招聘/职位/求职/linkedin/领英/找工作 + - dev: github/代码/仓库/gh/issue/pr/分支/commit + - web: 网页/链接/文章/公众号/微信文章/rss/读一下/打开这个 + - video: youtube/视频/播客/字幕/小宇宙/转录/yt metadata: openclaw: homepage: https://github.com/Panniantong/Agent-Reach --- -# Agent Reach — Usage Guide +# Agent Reach — 路由器 -Upstream tools for 13+ platforms. Call them directly. +17 平台工具集合。根据用户意图选择对应分类。 -Run `agent-reach doctor` to check which channels are available. +## 路由表 -## ⚠️ Workspace Rules +| 用户意图 | 分类 | 详细文档 | +|---------|------|---------| +| 网页搜索/代码搜索 | search | [references/search.md](references/search.md) | +| 小红书/抖音/微博/推特/B站/V2EX/Reddit | social | [references/social.md](references/social.md) | +| 招聘/职位/LinkedIn | career | [references/career.md](references/career.md) | +| GitHub/代码 | dev | [references/dev.md](references/dev.md) | +| 网页/文章/公众号/RSS | web | [references/web.md](references/web.md) | +| YouTube/B站/播客字幕 | video | [references/video.md](references/video.md) | -**Never create files in the agent workspace.** Use `/tmp/` for temporary output and `~/.agent-reach/` for persistent data. - -## Web — Any URL - -```bash -curl -s "https://r.jina.ai/URL" -``` - -## Web Search (Exa) +## 零配置快速命令 ```bash +# Exa 网页搜索 mcporter call 'exa.web_search_exa(query: "query", numResults: 5)' -mcporter call 'exa.get_code_context_exa(query: "code question", tokensNum: 3000)' -``` - -## Twitter/X (xreach) - -```bash -xreach search "query" -n 10 --json # search -xreach tweet URL_OR_ID --json # read tweet (supports /status/ and /article/ URLs) -xreach tweets @username -n 20 --json # user timeline -xreach thread URL_OR_ID --json # full thread -``` - -## YouTube (yt-dlp) - -```bash -yt-dlp --dump-json "URL" # video metadata -yt-dlp --write-sub --write-auto-sub --sub-lang "zh-Hans,zh,en" --skip-download -o "/tmp/%(id)s" "URL" - # download subtitles, then read the .vtt file -yt-dlp --dump-json "ytsearch5:query" # search -``` - -## Bilibili (yt-dlp) -```bash -yt-dlp --dump-json "https://www.bilibili.com/video/BVxxx" -yt-dlp --write-sub --write-auto-sub --sub-lang "zh-Hans,zh,en" --convert-subs vtt --skip-download -o "/tmp/%(id)s" "URL" -``` - -> Server IPs may get 412. Use `--cookies-from-browser chrome` or configure proxy. - -## Reddit - -```bash -curl -s "https://www.reddit.com/r/SUBREDDIT/hot.json?limit=10" -H "User-Agent: agent-reach/1.0" -curl -s "https://www.reddit.com/search.json?q=QUERY&limit=10" -H "User-Agent: agent-reach/1.0" -``` - -> Server IPs may get 403. Search via Exa instead, or configure proxy. - -## GitHub (gh CLI) +# 通用网页阅读 +curl -s "https://r.jina.ai/URL" -```bash +# GitHub 搜索 gh search repos "query" --sort stars --limit 10 -gh repo view owner/repo -gh search code "query" --language python -gh issue list -R owner/repo --state open -gh issue view 123 -R owner/repo -``` -## 小红书 / XiaoHongShu (mcporter) - -```bash -mcporter call 'xiaohongshu.search_feeds(keyword: "query")' -mcporter call 'xiaohongshu.get_feed_detail(feed_id: "xxx", xsec_token: "yyy")' -mcporter call 'xiaohongshu.get_feed_detail(feed_id: "xxx", xsec_token: "yyy", load_all_comments: true)' -mcporter call 'xiaohongshu.publish_content(title: "标题", content: "正文", images: ["/path/img.jpg"], tags: ["tag"])' -``` +# Twitter 搜索 +xreach search "query" -n 10 --json -> Requires login. Use Cookie-Editor to import cookies. +# YouTube/B站字幕 +yt-dlp --write-sub --skip-download -o "/tmp/%(id)s" "URL" -## 抖音 / Douyin (mcporter) - -```bash -mcporter call 'douyin.parse_douyin_video_info(share_link: "https://v.douyin.com/xxx/")' -mcporter call 'douyin.get_douyin_download_link(share_link: "https://v.douyin.com/xxx/")' -``` - -> No login needed. - -## 微信公众号 / WeChat Articles - -**Search** (miku_ai): -```python -python3 -c " -import asyncio -from miku_ai import get_wexin_article -async def s(): - for a in await get_wexin_article('query', 5): - print(f'{a[\"title\"]} | {a[\"url\"]}') -asyncio.run(s()) -" -``` - -**Read** (Camoufox — bypasses WeChat anti-bot): -```bash -cd ~/.agent-reach/tools/wechat-article-for-ai && python3 main.py "https://mp.weixin.qq.com/s/ARTICLE_ID" -``` - -> WeChat articles cannot be read with Jina Reader or curl. Must use Camoufox. - -## 小宇宙播客 / Xiaoyuzhou Podcast (groq-whisper + ffmpeg) - -```bash -# 转录单集播客(输出文本到 /tmp/) -~/.agent-reach/tools/xiaoyuzhou/transcribe.sh "https://www.xiaoyuzhoufm.com/episode/EPISODE_ID" -``` - -> 需要 ffmpeg + Groq API Key(免费)。 -> 配置 Key:`agent-reach configure groq-key YOUR_KEY` -> 首次运行需安装工具:`agent-reach install --env=auto` -> 运行 `agent-reach doctor` 检查状态。 -> 输出 Markdown 文件默认保存到 `/tmp/`。 - - -## LinkedIn (mcporter) - -```bash -mcporter call 'linkedin.get_person_profile(linkedin_url: "https://linkedin.com/in/username")' -mcporter call 'linkedin.search_people(keyword: "AI engineer", limit: 10)' +# V2EX 热门 +curl -s "https://www.v2ex.com/api/topics/hot.json" -H "User-Agent: agent-reach/1.0" ``` -Fallback: `curl -s "https://r.jina.ai/https://linkedin.com/in/username"` - -## V2EX (public API) +## 环境检查 ```bash -# 热门主题 -curl -s "https://www.v2ex.com/api/topics/hot.json" -H "User-Agent: agent-reach/1.0" +# 检查可用 channel +agent-reach doctor -# 节点主题(node_name 如 python、tech、jobs、qna) -curl -s "https://www.v2ex.com/api/topics/show.json?node_name=python&page=1" -H "User-Agent: agent-reach/1.0" - -# 主题详情(topic_id 从 URL 获取,如 https://www.v2ex.com/t/1234567) -curl -s "https://www.v2ex.com/api/topics/show.json?id=TOPIC_ID" -H "User-Agent: agent-reach/1.0" - -# 主题回复 -curl -s "https://www.v2ex.com/api/replies/show.json?topic_id=TOPIC_ID&page=1" -H "User-Agent: agent-reach/1.0" - -# 用户信息 -curl -s "https://www.v2ex.com/api/members/show.json?username=USERNAME" -H "User-Agent: agent-reach/1.0" +# 查看所有 MCP 服务 +mcporter_list_servers() ``` -Python 调用示例(V2EXChannel): - -```python -from agent_reach.channels.v2ex import V2EXChannel - -ch = V2EXChannel() - -# 获取热门帖子(默认 20 条) -# 返回字段:id, title, url, replies, node_name, node_title, content(前200字), created -topics = ch.get_hot_topics(limit=10) -for t in topics: - print(f"[{t['node_title']}] {t['title']} ({t['replies']} 回复) {t['url']}") - print(f" id={t['id']} created={t['created']}") - -# 获取指定节点的最新帖子 -# 返回字段:id, title, url, replies, node_name, node_title, content(前200字), created -node_topics = ch.get_node_topics("python", limit=5) -for t in node_topics: - print(t["id"], t["title"], t["url"]) - -# 获取单个帖子详情 + 回复列表 -# 返回字段:id, title, url, content, replies_count, node_name, node_title, -# author, created, replies (list of {author, content, created}) -topic = ch.get_topic(1234567) -print(topic["title"], "—", topic["author"]) -for r in topic["replies"]: - print(f" {r['author']}: {r['content'][:80]}") - -# 获取用户信息 -# 返回字段:id, username, url, website, twitter, psn, github, btc, location, bio, avatar, created -user = ch.get_user("Livid") -print(user["username"], user["bio"], user["github"]) - -# 搜索(V2EX 公开 API 不支持,会返回说明信息) -result = ch.search("asyncio") -print(result[0]["error"]) # 提示使用站内搜索或 Exa channel -``` +## 工作区规则 -> No auth required. Results are public JSON. V2EX 节点名见 https://www.v2ex.com/planes +**不要在 agent workspace 创建文件。** 使用 `/tmp/` 存放临时输出,`~/.agent-reach/` 存放持久数据。 -## RSS (feedparser) - -## RSS - -```python -python3 -c " -import feedparser -for e in feedparser.parse('FEED_URL').entries[:5]: - print(f'{e.title} — {e.link}') -" -``` +## 详细文档 -## Troubleshooting +根据用户需求,阅读对应的详细文档: -- **Channel not working?** Run `agent-reach doctor` — shows status and fix instructions. -- **Twitter fetch failed?** Ensure `undici` is installed: `npm install -g undici`. Configure proxy: `agent-reach configure proxy URL`. +- [搜索工具](references/search.md) — Exa AI 搜索 +- [社交媒体](references/social.md) — 小红书, 抖音, Twitter, B站, V2EX, Reddit +- [职场招聘](references/career.md) — LinkedIn +- [开发工具](references/dev.md) — GitHub CLI +- [网页阅读](references/web.md) — Jina Reader, 微信公众号, RSS +- [视频播客](references/video.md) — YouTube, B站, 小宇宙 -## Setting Up a Channel ("帮我配 XXX") +## 配置渠道 -If a channel needs setup (cookies, Docker, etc.), fetch the install guide: +如果某个 channel 需要配置,获取安装指南: https://raw.githubusercontent.com/Panniantong/agent-reach/main/docs/install.md -User only provides cookies. Everything else is your job. +用户只需提供 cookies,其他配置由 agent 完成。 diff --git a/agent_reach/skill/references/career.md b/agent_reach/skill/references/career.md new file mode 100644 index 0000000..f1b46fe --- /dev/null +++ b/agent_reach/skill/references/career.md @@ -0,0 +1,29 @@ +# 职场招聘 + +LinkedIn。 + +## LinkedIn + +```bash +# 获取个人资料 +mcporter call 'linkedin-scraper.get_person_profile(linkedin_url: "https://linkedin.com/in/username")' + +# 搜索人才 +mcporter call 'linkedin-scraper.search_people(keyword: "AI engineer", limit: 10)' + +# 获取公司资料 +mcporter call 'linkedin-scraper.get_company_profile(linkedin_url: "https://linkedin.com/company/xxx")' + +# 搜索职位 +mcporter call 'linkedin-scraper.search_jobs(keyword: "software engineer", limit: 10)' +``` + +> **需要登录**: LinkedIn scraper 需要有效的登录态。 + +### Fallback 方案 + +如果 MCP 不可用,可以用 Jina Reader: + +```bash +curl -s "https://r.jina.ai/https://linkedin.com/in/username" +``` diff --git a/agent_reach/skill/references/dev.md b/agent_reach/skill/references/dev.md new file mode 100644 index 0000000..ca24762 --- /dev/null +++ b/agent_reach/skill/references/dev.md @@ -0,0 +1,62 @@ +# 开发工具 + +GitHub CLI + +## GitHub (gh CLI) + +GitHub 官方命令行工具,用于仓库、Issue、PR、Actions、Release 以及 API 访问。 + +```bash +# 认证 +gh auth login +gh auth status + +# 搜索 +gh search repos "query" --sort stars --limit 10 +gh search code "query" --language python + +# 仓库 +gh repo view owner/repo +gh repo clone owner/repo +gh repo create my-repo --private +gh repo fork owner/repo +gh repo fork owner/repo --clone +gh repo sync owner/repo + +# Issues +gh issue list -R owner/repo --state open +gh issue view 123 -R owner/repo +gh issue create -R owner/repo --title "Title" --body "Body" + +# Pull Requests +gh pr list -R owner/repo --state open +gh pr view 123 -R owner/repo +gh pr create -R owner/repo --title "Title" --body "Body" +gh pr checks 123 --repo owner/repo + +# Actions / CI +gh run list --repo owner/repo --limit 10 +gh run view --repo owner/repo +gh run view --repo owner/repo --log-failed +gh workflow list --repo owner/repo + +# Releases +gh release list -R owner/repo +gh release create v1.0.0 + +# API +gh api /user +gh api repos/owner/repo + +# JSON 输出 +gh issue list --repo owner/repo --json number,title --jq '.[] | "\(.number): \(.title)"' +``` + + +## 选择指南 + +| 工具 | 来源 | 用途 | +|-----|------|------| +| gh CLI | agent-reach | Git 操作 | +| zread | my-mcp-tools | 读仓库内容 | +| context7 | my-mcp-tools | 查技术文档 | diff --git a/agent_reach/skill/references/search.md b/agent_reach/skill/references/search.md new file mode 100644 index 0000000..4a0fa4c --- /dev/null +++ b/agent_reach/skill/references/search.md @@ -0,0 +1,33 @@ +# 搜索工具 + +Exa AI 搜索引擎。 + +## Exa AI 搜索 + +高质量 AI 搜索引擎,擅长技术和代码搜索。 + +```bash +mcporter call 'exa.web_search_exa(query: "query", numResults: 5)' +mcporter call 'exa.get_code_context_exa(query: "code question", tokensNum: 3000)' +``` + +### 使用场景 + +| 场景 | 参数 | +|-----|------| +| 网页搜索 | `web_search_exa(query: "...", numResults: 5)` | +| 代码搜索 | `get_code_context_exa(query: "...", tokensNum: 3000)` | + +### 特点 + +- 擅长英文内容和技术文档 +- 支持代码上下文搜索 +- 结果质量高 + +## 与其他搜索工具对比 + +| 工具 | 来源 | 适用场景 | +|-----|------|---------| +| Exa | agent-reach | 英文/技术/代码搜索 | +| 智谱搜索 | my-mcp-tools | 中文搜索 | +| GitHub 搜索 | agent-reach (dev.md) | 仓库/代码搜索 | diff --git a/agent_reach/skill/references/social.md b/agent_reach/skill/references/social.md new file mode 100644 index 0000000..79ad98c --- /dev/null +++ b/agent_reach/skill/references/social.md @@ -0,0 +1,149 @@ +# 社交媒体 & 社区 + +小红书、抖音、Twitter/X、微博、B站、V2EX、Reddit。 + +## 小红书 / XiaoHongShu + +```bash +# 搜索笔记 +mcporter call 'xiaohongshu.search_feeds(keyword: "query")' + +# 获取笔记详情 +mcporter call 'xiaohongshu.get_feed_detail(feed_id: "xxx", xsec_token: "yyy")' + +# 获取笔记详情 + 评论 +mcporter call 'xiaohongshu.get_feed_detail(feed_id: "xxx", xsec_token: "yyy", load_all_comments: true)' + +# 发布内容 +mcporter call 'xiaohongshu.publish_content(title: "标题", content: "正文", images: ["/path/img.jpg"], tags: ["tag"])' +``` + +> **需要登录**: 使用 Cookie-Editor 浏览器插件导出 cookies。运行 `agent-reach doctor` 检查状态。 + +## 抖音 / Douyin + +```bash +# 解析视频信息 +mcporter call 'douyin.parse_douyin_video_info(share_link: "https://v.douyin.com/xxx/")' + +# 获取无水印下载链接 +mcporter call 'douyin.get_douyin_download_link(share_link: "https://v.douyin.com/xxx/")' + +# 提取视频文案 +mcporter call 'douyin.extract_douyin_text(share_link: "https://v.douyin.com/xxx/")' +``` + +> **无需登录** + +## Twitter/X (xreach CLI) + +```bash +# 搜索推文 +xreach search "query" -n 10 --json + +# 读取单条推文 (支持 /status/ 和 /article/ URL) +xreach tweet URL_OR_ID --json + +# 用户时间线 +xreach tweets @username -n 20 --json + +# 读取完整 thread +xreach thread URL_OR_ID --json +``` + +> **需要配置**: `agent-reach configure twitter-auth ...` 或通过环境变量配置。 +> 如果 fetch 失败,确保安装了 undici: `npm install -g undici` + +## 微博 / Weibo + +```bash +# 使用 Jina Reader 读取 +curl -s "https://r.jina.ai/https://weibo.com/USER_ID/POST_ID" +``` + +> 微博主要通过网页抓取,推荐使用通用网页读取方式。 + +## B站 / Bilibili + +```bash +# 获取视频元数据 +yt-dlp --dump-json "https://www.bilibili.com/video/BVxxx" + +# 下载字幕 +yt-dlp --write-sub --write-auto-sub --sub-lang "zh-Hans,zh,en" --convert-subs vtt --skip-download -o "/tmp/%(id)s" "URL" +``` + +> **注意**: 服务器 IP 可能遇到 412 错误。使用 `--cookies-from-browser chrome` 或配置代理。 + +## V2EX (公开 API) + +无需认证,直接调用公开 API。 + +### 热门主题 + +```bash +curl -s "https://www.v2ex.com/api/topics/hot.json" -H "User-Agent: agent-reach/1.0" +``` + +### 节点主题 + +```bash +# node_name 如: python, tech, jobs, qna, programmers +curl -s "https://www.v2ex.com/api/topics/show.json?node_name=python&page=1" -H "User-Agent: agent-reach/1.0" +``` + +### 主题详情 + +```bash +# topic_id 从 URL 获取,如 https://www.v2ex.com/t/1234567 +curl -s "https://www.v2ex.com/api/topics/show.json?id=TOPIC_ID" -H "User-Agent: agent-reach/1.0" +``` + +### 主题回复 + +```bash +curl -s "https://www.v2ex.com/api/replies/show.json?topic_id=TOPIC_ID&page=1" -H "User-Agent: agent-reach/1.0" +``` + +### 用户信息 + +```bash +curl -s "https://www.v2ex.com/api/members/show.json?username=USERNAME" -H "User-Agent: agent-reach/1.0" +``` + +### Python 调用示例 + +```python +from agent_reach.channels.v2ex import V2EXChannel + +ch = V2EXChannel() + +# 获取热门帖子 +topics = ch.get_hot_topics(limit=10) +for t in topics: + print(f"[{t['node_title']}] {t['title']} ({t['replies']} 回复)") + +# 获取节点帖子 +node_topics = ch.get_node_topics("python", limit=5) + +# 获取帖子详情 + 回复 +topic = ch.get_topic(1234567) +print(topic["title"], "—", topic["author"]) + +# 获取用户信息 +user = ch.get_user("Livid") +``` + +> **节点列表**: https://www.v2ex.com/planes + +## Reddit (公开 API) + +```bash +# 获取 subreddit 热门帖子 +curl -s "https://www.reddit.com/r/SUBREDDIT/hot.json?limit=10" -H "User-Agent: agent-reach/1.0" + +# 搜索 +curl -s "https://www.reddit.com/search.json?q=QUERY&limit=10" -H "User-Agent: agent-reach/1.0" +``` + +> **注意**: 服务器 IP 可能遇到 403 错误。搜索建议使用 Exa 代替,或配置代理。 diff --git a/agent_reach/skill/references/video.md b/agent_reach/skill/references/video.md new file mode 100644 index 0000000..c79c71e --- /dev/null +++ b/agent_reach/skill/references/video.md @@ -0,0 +1,88 @@ +# 视频/播客 + +YouTube、B站、小宇宙播客的字幕和转录。 + +## YouTube (yt-dlp) + +### 获取视频元数据 + +```bash +yt-dlp --dump-json "URL" +``` + +### 下载字幕 + +```bash +# 下载字幕 (不下载视频) +yt-dlp --write-sub --write-auto-sub --sub-lang "zh-Hans,zh,en" --skip-download -o "/tmp/%(id)s" "URL" + +# 然后读取 .vtt 文件 +cat /tmp/VIDEO_ID.*.vtt +``` + +### 搜索视频 + +```bash +yt-dlp --dump-json "ytsearch5:query" +``` + +## B站 / Bilibili (yt-dlp) + +### 获取视频元数据 + +```bash +yt-dlp --dump-json "https://www.bilibili.com/video/BVxxx" +``` + +### 下载字幕 + +```bash +yt-dlp --write-sub --write-auto-sub --sub-lang "zh-Hans,zh,en" --convert-subs vtt --skip-download -o "/tmp/%(id)s" "URL" +``` + +> **注意**: 服务器 IP 可能遇到 412 错误。使用 `--cookies-from-browser chrome` 或配置代理。 + +## 小宇宙播客 / Xiaoyuzhou Podcast + +### 转录单集播客 + +```bash +# 输出 Markdown 文件到 /tmp/ +~/.agent-reach/tools/xiaoyuzhou/transcribe.sh "https://www.xiaoyuzhoufm.com/episode/EPISODE_ID" +``` + +### 前置要求 + +1. **ffmpeg**: `brew install ffmpeg` +2. **Groq API Key** (免费): https://console.groq.com/keys +3. **配置 Key**: `agent-reach configure groq-key YOUR_KEY` +4. **首次运行**: `agent-reach install --env=auto` 安装工具 + +### 检查状态 + +```bash +agent-reach doctor +``` + +> 输出 Markdown 文件默认保存到 `/tmp/`。 + +## 抖音视频解析 + +```bash +# 解析视频信息 +mcporter call 'douyin.parse_douyin_video_info(share_link: "https://v.douyin.com/xxx/")' + +# 获取无水印下载链接 +mcporter call 'douyin.get_douyin_download_link(share_link: "https://v.douyin.com/xxx/")' +``` + +> 详见 [social.md](social.md#抖音--douyin) + +## 选择指南 + +| 场景 | 推荐工具 | +|-----|---------| +| YouTube 字幕 | yt-dlp | +| B站字幕 | yt-dlp | +| 播客转录 | 小宇宙 transcribe.sh | +| 抖音视频解析 | douyin MCP | diff --git a/agent_reach/skill/references/web.md b/agent_reach/skill/references/web.md new file mode 100644 index 0000000..ae5ed02 --- /dev/null +++ b/agent_reach/skill/references/web.md @@ -0,0 +1,77 @@ +# 网页阅读 + +通用网页、微信公众号、RSS。 + +## 通用网页 (Jina Reader) + +```bash +# 读取任意网页内容 +curl -s "https://r.jina.ai/URL" + +# 示例 +curl -s "https://r.jina.ai/https://example.com/article" +``` + +**适用场景**: 大多数网页可以直接用 Jina Reader 读取。 + +## Web Reader (MCP) + +```bash +# 读取网页内容 (Markdown 格式) +mcporter call 'web-reader.webReader(url: "https://example.com")' + +# 保留图片 +mcporter call 'web-reader.webReader(url: "https://example.com", retain_images: true)' + +# 纯文本格式 +mcporter call 'web-reader.webReader(url: "https://example.com", return_format: "text")' +``` + +**适用场景**: 需要更精确控制输出格式时使用。 + +## 微信公众号 / WeChat Articles + +**注意**: 微信公众号文章无法用 Jina Reader 或 curl 直接读取,必须使用专用工具。 + +### 搜索文章 (miku_ai) + +```python +python3 -c " +import asyncio +from miku_ai import get_wexin_article +async def s(): + for a in await get_wexin_article('query', 5): + print(f'{a[\"title\"]} | {a[\"url\"]}') +asyncio.run(s()) +" +``` + +### 读取文章 (Camoufox - 绕过微信反爬) + +```bash +cd ~/.agent-reach/tools/wechat-article-for-ai && python3 main.py "https://mp.weixin.qq.com/s/ARTICLE_ID" +``` + +> **重要**: 微信文章必须用 Camoufox 读取,其他方法会失败。 + +## RSS (feedparser) + +```python +python3 -c " +import feedparser +for e in feedparser.parse('FEED_URL').entries[:5]: + print(f'{e.title} — {e.link}') +" +``` + +**适用场景**: 订阅博客、新闻源、播客等 RSS feed。 + +## 选择指南 + +| 场景 | 推荐工具 | +|-----|---------| +| 通用网页 | Jina Reader (`curl r.jina.ai`) | +| 需要图片/格式控制 | web-reader MCP | +| 微信公众号 | Camoufox (读取) + miku_ai (搜索) | +| RSS 订阅 | feedparser | +| 微博/知乎等 | Jina Reader |