diff --git a/.github/required-checks.txt b/.github/required-checks.txt
index c9cbf6eab7..780e743db5 100644
--- a/.github/required-checks.txt
+++ b/.github/required-checks.txt
@@ -1,16 +1,2 @@
-# workflow_file|job_name
-pr-test-build.yml|go-ci
-pr-test-build.yml|quality-ci
-pr-test-build.yml|quality-staged-check
-pr-test-build.yml|fmt-check
-pr-test-build.yml|golangci-lint
-pr-test-build.yml|route-lifecycle
-pr-test-build.yml|provider-smoke-matrix
-pr-test-build.yml|provider-smoke-matrix-cheapest
-pr-test-build.yml|test-smoke
-pr-test-build.yml|pre-release-config-compat-smoke
-pr-test-build.yml|distributed-critical-paths
-pr-test-build.yml|changelog-scope-classifier
-pr-test-build.yml|docs-build
-pr-test-build.yml|ci-summary
+pr-test-build.yml|build
 pr-path-guard.yml|ensure-no-translator-changes
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
index 7609a68b9b..6c99b21b5d 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -1,14 +1,13 @@
 name: docker-image
 
 on:
-  workflow_dispatch:
   push:
     tags:
       - v*
 
 env:
   APP_NAME: CLIProxyAPI
-  DOCKERHUB_REPO: ${{ secrets.DOCKERHUB_USERNAME }}/cli-proxy-api-plus
+  DOCKERHUB_REPO: eceasy/cli-proxy-api
 
 jobs:
   docker_amd64:
diff --git a/.github/workflows/pr-path-guard.yml b/.github/workflows/pr-path-guard.yml
index 4fe3d93881..450fda144f 100644
--- a/.github/workflows/pr-path-guard.yml
+++ b/.github/workflows/pr-path-guard.yml
@@ -9,6 +9,7 @@ on:
 
 jobs:
   ensure-no-translator-changes:
+    name: ensure-no-translator-changes
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
@@ -21,7 +22,7 @@ jobs:
           files: |
             internal/translator/**
       - name: Fail when restricted paths change
-        if: steps.changed-files.outputs.any_changed == 'true'
+        if: steps.changed-files.outputs.any_changed == 'true' && !(startsWith(github.head_ref, 'feature/koosh-migrate') || startsWith(github.head_ref, 'feature/migrate-') || startsWith(github.head_ref, 'migrated/'))
         run: |
           echo "Changes under internal/translator are not allowed in pull requests."
           echo "You need to create an issue for our maintenance team to make the necessary changes."
diff --git a/.github/workflows/pr-test-build.yml b/.github/workflows/pr-test-build.yml
index 477ff0498e..2fe1994b84 100644
--- a/.github/workflows/pr-test-build.yml
+++ b/.github/workflows/pr-test-build.yml
@@ -8,6 +8,7 @@ permissions:
 
 jobs:
   build:
+    name: build
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 04ec21a9a5..4bb5e63b3a 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -19,12 +19,11 @@ jobs:
       - run: git fetch --force --tags
       - uses: actions/setup-go@v4
         with:
-          go-version: '>=1.26.0'
+          go-version: '>=1.24.0'
           cache: true
       - name: Generate Build Metadata
         run: |
-          VERSION=$(git describe --tags --always --dirty)
-          echo "VERSION=${VERSION}" >> $GITHUB_ENV
+          echo VERSION=`git describe --tags --always --dirty` >> $GITHUB_ENV
           echo COMMIT=`git rev-parse --short HEAD` >> $GITHUB_ENV
           echo BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` >> $GITHUB_ENV
       - uses: goreleaser/goreleaser-action@v4
diff --git a/.gitignore b/.gitignore
index ead9772081..183138f96c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,20 +1,17 @@
 # Binaries
 cli-proxy-api
-cliproxy
 *.exe
 
-
 # Configuration
 config.yaml
 .env
-.mcp.json
+
 # Generated content
 bin/*
 logs/*
 conv/*
 temp/*
 refs/*
-tmp/*
 
 # Storage backends
 pgstore/*
@@ -47,10 +44,7 @@ GEMINI.md
 .bmad/*
 _bmad/*
 _bmad-output/*
-.mcp/cache/
 
 # macOS
 .DS_Store
 ._*
-*.bak
-server
diff --git a/.goreleaser.yml b/.goreleaser.yml
index 6e1829ed51..31d05e6d38 100644
--- a/.goreleaser.yml
+++ b/.goreleaser.yml
@@ -1,5 +1,5 @@
 builds:
-  - id: "cli-proxy-api-plus"
+  - id: "cli-proxy-api"
     env:
       - CGO_ENABLED=0
     goos:
@@ -10,11 +10,11 @@ builds:
       - amd64
       - arm64
     main: ./cmd/server/
-    binary: cli-proxy-api-plus
+    binary: cli-proxy-api
     ldflags:
-      - -s -w -X 'main.Version={{.Version}}-plus' -X 'main.Commit={{.ShortCommit}}' -X 'main.BuildDate={{.Date}}'
+      - -s -w -X 'main.Version={{.Version}}' -X 'main.Commit={{.ShortCommit}}' -X 'main.BuildDate={{.Date}}'
 archives:
-  - id: "cli-proxy-api-plus"
+  - id: "cli-proxy-api"
     format: tar.gz
     format_overrides:
       - goos: windows
diff --git a/Dockerfile b/Dockerfile
index cde6205a81..8623dc5e43 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM golang:1.26-alpine AS builder
+FROM golang:1.24-alpine AS builder
 
 WORKDIR /app
 
@@ -12,7 +12,7 @@ ARG VERSION=dev
 ARG COMMIT=none
 ARG BUILD_DATE=unknown
 
-RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w -X 'main.Version=${VERSION}-plus' -X 'main.Commit=${COMMIT}' -X 'main.BuildDate=${BUILD_DATE}'" -o ./CLIProxyAPIPlus ./cmd/server/
+RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w -X 'main.Version=${VERSION}' -X 'main.Commit=${COMMIT}' -X 'main.BuildDate=${BUILD_DATE}'" -o ./CLIProxyAPI ./cmd/server/
 
 FROM alpine:3.22.0
 
@@ -20,7 +20,7 @@ RUN apk add --no-cache tzdata
 
 RUN mkdir /CLIProxyAPI
 
-COPY --from=builder ./app/CLIProxyAPIPlus /CLIProxyAPI/CLIProxyAPIPlus
+COPY --from=builder ./app/CLIProxyAPI /CLIProxyAPI/CLIProxyAPI
 
 COPY config.example.yaml /CLIProxyAPI/config.example.yaml
 
@@ -32,4 +32,4 @@ ENV TZ=Asia/Shanghai
 
 RUN cp /usr/share/zoneinfo/${TZ} /etc/localtime && echo "${TZ}" > /etc/timezone
 
-CMD ["./CLIProxyAPIPlus"]
\ No newline at end of file
+CMD ["./CLIProxyAPI"]
\ No newline at end of file
diff --git a/README.md b/README.md
index 67169fe1ea..5c7d0ce6a3 100644
--- a/README.md
+++ b/README.md
@@ -1,181 +1,161 @@
-# CLIProxyAPI++ (KooshaPari Fork)
+# CLI Proxy API
 
-**Forked and enhanced from [CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI)**
+English | [中文](README_CN.md)
 
-Multi-provider LLM proxy with unified OpenAI-compatible API, third-party auth, SDK generation, and enterprise features.
+A proxy server that provides OpenAI/Gemini/Claude/Codex compatible API interfaces for CLI.
+
+It now also supports OpenAI Codex (GPT models) and Claude Code via OAuth.
+
+So you can use local or multi-account CLI access with OpenAI(include Responses)/Gemini/Claude-compatible clients and SDKs.
+
+## Sponsor
+
+[![z.ai](https://assets.router-for.me/english-4.7.png)](https://z.ai/subscribe?ic=8JVLJQFSKB)
+
+This project is sponsored by Z.ai, supporting us with their GLM CODING PLAN.
+
+GLM CODING PLAN is a subscription service designed for AI coding, starting at just $3/month. It provides access to their flagship GLM-4.7 model across 10+ popular AI coding tools (Claude Code, Cline, Roo Code, etc.), offering developers top-tier, fast, and stable coding experiences.
+
+Get 10% OFF GLM CODING PLAN：https://z.ai/subscribe?ic=8JVLJQFSKB
+
+---
+
+<table>
+<tbody>
+<tr>
+<td width="180"><a href="https://www.packyapi.com/register?aff=cliproxyapi"><img src="./assets/packycode.png" alt="PackyCode" width="150"></a></td>
+<td>Thanks to PackyCode for sponsoring this project! PackyCode is a reliable and efficient API relay service provider, offering relay services for Claude Code, Codex, Gemini, and more. PackyCode provides special discounts for our software users: register using <a href="https://www.packyapi.com/register?aff=cliproxyapi">this link</a> and enter the "cliproxyapi" promo code during recharge to get 10% off.</td>
+</tr>
+<tr>
+<td width="180"><a href="https://cubence.com/signup?code=CLIPROXYAPI&source=cpa"><img src="./assets/cubence.png" alt="Cubence" width="150"></a></td>
+<td>Thanks to Cubence for sponsoring this project! Cubence is a reliable and efficient API relay service provider, offering relay services for Claude Code, Codex, Gemini, and more. Cubence provides special discounts for our software users: register using <a href="https://cubence.com/signup?code=CLIPROXYAPI&source=cpa">this link</a> and enter the "CLIPROXYAPI" promo code during recharge to get 10% off.</td>
+</tr>
+</tbody>
+</table>
 
 ## Overview
 
-CLIProxyAPI++ provides a unified API gateway for multiple LLM providers with:
-- OpenAI-compatible endpoints
-- Third-party provider support (Kiro, GitHub Copilot, Ollama)
-- OAuth authentication flows
-- Built-in rate limiting and metrics
-- SDK auto-generation
-
-## Architecture
-
-```
-┌──────────────┐     ┌─────────────────┐     ┌────────────┐
-│   Clients    │────▶│   CLIProxy++     │────▶│  Providers │
-│ (thegent,   │     │  (this repo)    │     │ (OpenAI,   │
-│  agentapi)   │     │                 │     │  Anthropic,│
-└──────────────┘     └─────────────────┘     │  AWS, etc) │
-                         │                   └────────────┘
-                         ▼
-                  ┌─────────────────┐
-                  │   SDK Gen      │
-                  │ (Python, Go)   │
-                  └─────────────────┘
-```
-
-## Quick Start
-
-### Docker
-
-```bash
-mkdir -p ~/cli-proxy && cd ~/cli-proxy
-
-cat > docker-compose.yml << 'EOF'
-services:
-  cli-proxy-api:
-    image: eceasy/cli-proxy-api-plus:latest
-    ports:
-      - "8317:8317"
-    volumes:
-      - ./config.yaml:/CLIProxyAPI/config.yaml
-    restart: unless-stopped
-EOF
-
-curl -o config.yaml https://raw.githubusercontent.com/KooshaPari/cliproxyapi-plusplus/main/config.example.yaml
-docker compose up -d
-```
-
-### From Source
-
-```bash
-# Build
-go build -o cliproxy ./cmd/cliproxy
-
-# Run
-./cliproxy --config config.yaml
-```
-
-## Configuration
-
-```yaml
-server:
-  port: 8317
-
-providers:
-  openai:
-    api_key: ${OPENAI_API_KEY}
-  anthropic:
-    api_key: ${ANTHROPIC_API_KEY}
-  kiro:
-    enabled: true
-  github_copilot:
-    enabled: true
+- OpenAI/Gemini/Claude compatible API endpoints for CLI models
+- OpenAI Codex support (GPT models) via OAuth login
+- Claude Code support via OAuth login
+- Qwen Code support via OAuth login
+- iFlow support via OAuth login
+- Amp CLI and IDE extensions support with provider routing
+- Streaming and non-streaming responses
+- Function calling/tools support
+- Multimodal input support (text and images)
+- Multiple accounts with round-robin load balancing (Gemini, OpenAI, Claude, Qwen and iFlow)
+- Simple CLI authentication flows (Gemini, OpenAI, Claude, Qwen and iFlow)
+- Generative Language API Key support
+- AI Studio Build multi-account load balancing
+- Gemini CLI multi-account load balancing
+- Claude Code multi-account load balancing
+- Qwen Code multi-account load balancing
+- iFlow multi-account load balancing
+- OpenAI Codex multi-account load balancing
+- OpenAI-compatible upstream providers via config (e.g., OpenRouter)
+- Reusable Go SDK for embedding the proxy (see `docs/sdk-usage.md`)
+
+## Getting Started
+
+CLIProxyAPI Guides: [https://help.router-for.me/](https://help.router-for.me/)
+
+## Management API
+
+see [MANAGEMENT_API.md](https://help.router-for.me/management/api)
+
+## Amp CLI Support
+
+CLIProxyAPI includes integrated support for [Amp CLI](https://ampcode.com) and Amp IDE extensions, enabling you to use your Google/ChatGPT/Claude OAuth subscriptions with Amp's coding tools:
+
+- Provider route aliases for Amp's API patterns (`/api/provider/{provider}/v1...`)
+- Management proxy for OAuth authentication and account features
+- Smart model fallback with automatic routing
+- **Model mapping** to route unavailable models to alternatives (e.g., `claude-opus-4.5` → `claude-sonnet-4`)
+- Security-first design with localhost-only management endpoints
+
+**→ [Complete Amp CLI Integration Guide](https://help.router-for.me/agent-client/amp-cli.html)**
+
+## SDK Docs
+
+- Usage: [docs/sdk-usage.md](docs/sdk-usage.md)
+- Advanced (executors & translators): [docs/sdk-advanced.md](docs/sdk-advanced.md)
+- Access: [docs/sdk-access.md](docs/sdk-access.md)
+- Watcher: [docs/sdk-watcher.md](docs/sdk-watcher.md)
+- Custom Provider Example: `examples/custom-provider`
+
+## Contributing
+
+Contributions are welcome! Please feel free to submit a Pull Request.
+
+1. Fork the repository
+2. Create your feature branch (`git checkout -b feature/amazing-feature`)
+3. Commit your changes (`git commit -m 'Add some amazing feature'`)
+4. Push to the branch (`git push origin feature/amazing-feature`)
+5. Open a Pull Request
+
+## Who is with us?
+
+Those projects are based on CLIProxyAPI:
+
+### [vibeproxy](https://github.com/automazeio/vibeproxy)
+
+Native macOS menu bar app to use your Claude Code & ChatGPT subscriptions with AI coding tools - no API keys needed
 
-rate_limit:
-  requests_per_minute: 60
-  tokens_per_minute: 100000
-```
+### [Subtitle Translator](https://github.com/VjayC/SRT-Subtitle-Translator-Validator)
 
-## Features
+Browser-based tool to translate SRT subtitles using your Gemini subscription via CLIProxyAPI with automatic validation/error correction - no API keys needed
 
-### Provider Support
+### [CCS (Claude Code Switch)](https://github.com/kaitranntt/ccs)
 
-| Provider | Auth | Status |
-|----------|------|--------|
-| OpenAI | API Key | ✅ |
-| Anthropic | API Key | ✅ |
-| Azure OpenAI | API Key/OAuth | ✅ |
-| Google Gemini | API Key | ✅ |
-| AWS Bedrock | IAM | ✅ |
-| Kiro (CodeWhisperer) | OAuth | ✅ |
-| GitHub Copilot | OAuth | ✅ |
-| Ollama | Local | ✅ |
+CLI wrapper for instant switching between multiple Claude accounts and alternative models (Gemini, Codex, Antigravity) via CLIProxyAPI OAuth - no API keys needed
 
-### Authentication
+### [ProxyPal](https://github.com/heyhuynhgiabuu/proxypal)
 
-- **API Key** - Standard OpenAI-style
-- **OAuth** - Kiro, GitHub Copilot via web flow
-- **AWS IAM** - Bedrock credentials
+Native macOS GUI for managing CLIProxyAPI: configure providers, model mappings, and endpoints via OAuth - no API keys needed.
 
-### Rate Limiting
+### [Quotio](https://github.com/nguyenphutrong/quotio)
 
-- Token bucket algorithm
-- Per-provider limits
-- Cooldown management
-- Usage quotas
+Native macOS menu bar app that unifies Claude, Gemini, OpenAI, Qwen, and Antigravity subscriptions with real-time quota tracking and smart auto-failover for AI coding tools like Claude Code, OpenCode, and Droid - no API keys needed.
 
-### Observability
+### [CodMate](https://github.com/loocor/CodMate)
 
-- Request/response logging
-- Cost tracking
-- Latency metrics
-- Error rate monitoring
-
-## Endpoints
+Native macOS SwiftUI app for managing CLI AI sessions (Codex, Claude Code, Gemini CLI) with unified provider management, Git review, project organization, global search, and terminal integration. Integrates CLIProxyAPI to provide OAuth authentication for Codex, Claude, Gemini, Antigravity, and Qwen Code, with built-in and third-party provider rerouting through a single proxy endpoint - no API keys needed for OAuth providers.
 
-| Endpoint | Description |
-|----------|-------------|
-| `POST /v1/chat/completions` | Chat completions |
-| `POST /v1/completions` | Text completions |
-| `GET /v1/models` | List models |
-| `GET /health` | Health check |
-| `GET /metrics` | Prometheus metrics |
+### [ProxyPilot](https://github.com/Finesssee/ProxyPilot)
 
-## SDKs
+Windows-native CLIProxyAPI fork with TUI, system tray, and multi-provider OAuth for AI coding tools - no API keys needed.
 
-Auto-generated SDKs for:
+### [Claude Proxy VSCode](https://github.com/uzhao/claude-proxy-vscode)
 
-- **Python** - `pip install cliproxy-sdk`
-- **Go** - `go get github.com/KooshaPari/cliproxy-sdk-go`
+VSCode extension for quick switching between Claude Code models, featuring integrated CLIProxyAPI as its backend with automatic background lifecycle management.
 
-## Integration
+### [ZeroLimit](https://github.com/0xtbug/zero-limit)
 
-### With thegent
+Windows desktop app built with Tauri + React for monitoring AI coding assistant quotas via CLIProxyAPI. Track usage across Gemini, Claude, OpenAI Codex, and Antigravity accounts with real-time dashboard, system tray integration, and one-click proxy control - no API keys needed.
 
-```yaml
-# thegent config
-llm:
-  provider: cliproxy
-  base_url: http://localhost:8317/v1
-  api_key: ${CLIPROXY_API_KEY}
-```
+### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
 
-### With agentapi
+A lightweight web admin panel for CLIProxyAPI with health checks, resource monitoring, real-time logs, auto-update, request statistics and pricing display. Supports one-click installation and systemd service.
 
-```bash
-agentapi --cliproxy http://localhost:8317
-```
+### [CLIProxyAPI Tray](https://github.com/kitephp/CLIProxyAPI_Tray)
 
-## Development
+A Windows tray application implemented using PowerShell scripts, without relying on any third-party libraries. The main features include: automatic creation of shortcuts, silent running, password management, channel switching (Main / Plus), and automatic downloading and updating.
 
-```bash
-# Lint
-go fmt ./...
-go vet ./...
+> [!NOTE]  
+> If you developed a project based on CLIProxyAPI, please open a PR to add it to this list.
 
-# Test
-go test ./...
+## More choices
 
-# Generate SDKs
-./scripts/generate_sdks.sh
-```
+Those projects are ports of CLIProxyAPI or inspired by it:
 
-## Fork Differences
+### [9Router](https://github.com/decolua/9router)
 
-This fork includes:
+A Next.js implementation inspired by CLIProxyAPI, easy to install and use, built from scratch with format translation (OpenAI/Claude/Gemini/Ollama), combo system with auto-fallback, multi-account management with exponential backoff, a Next.js web dashboard, and support for CLI tools (Cursor, Claude Code, Cline, RooCode) - no API keys needed.
 
-- ✅ SDK auto-generation workflow
-- ✅ Enhanced OpenAPI spec
-- ✅ Python client SDK (`pkg/sdk/python`)
-- ✅ Go client SDK (`pkg/sdk/go`)
-- ✅ Integration with tokenledger for cost tracking
+> [!NOTE]  
+> If you have developed a port of CLIProxyAPI or a project inspired by it, please open a PR to add it to this list.
 
 ## License
 
-MIT License - see LICENSE file
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
diff --git a/README_CN.md b/README_CN.md
index 79b5203f02..dbaf5f1314 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -1,100 +1,168 @@
-# CLIProxyAPI Plus
+# CLI 代理 API
 
 [English](README.md) | 中文
 
-这是 [CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI) 的 Plus 版本，在原有基础上增加了第三方供应商的支持。
+一个为 CLI 提供 OpenAI/Gemini/Claude/Codex 兼容 API 接口的代理服务器。
 
-所有的第三方供应商支持都由第三方社区维护者提供，CLIProxyAPI 不提供技术支持。如需取得支持，请与对应的社区维护者联系。
+现已支持通过 OAuth 登录接入 OpenAI Codex（GPT 系列）和 Claude Code。
 
-该 Plus 版本的主线功能与主线功能强制同步。
+您可以使用本地或多账户的CLI方式，通过任何与 OpenAI（包括Responses）/Gemini/Claude 兼容的客户端和SDK进行访问。
 
-## 与主线版本版本差异
+## 赞助商
 
-- 新增 GitHub Copilot 支持（OAuth 登录），由[em4go](https://github.com/em4go/CLIProxyAPI/tree/feature/github-copilot-auth)提供
-- 新增 Kiro (AWS CodeWhisperer) 支持 (OAuth 登录), 由[fuko2935](https://github.com/fuko2935/CLIProxyAPI/tree/feature/kiro-integration)、[Ravens2121](https://github.com/Ravens2121/CLIProxyAPIPlus/)提供
+[![bigmodel.cn](https://assets.router-for.me/chinese-4.7.png)](https://www.bigmodel.cn/claude-code?ic=RRVJPB5SII)
 
-## 新增功能 (Plus 增强版)
+本项目由 Z智谱 提供赞助, 他们通过 GLM CODING PLAN 对本项目提供技术支持。
 
-- **OAuth Web 认证**: 基于浏览器的 Kiro OAuth 登录，提供美观的 Web UI
-- **请求限流器**: 内置请求限流，防止 API 滥用
-- **后台令牌刷新**: 过期前 10 分钟自动刷新令牌
-- **监控指标**: 请求指标收集，用于监控和调试
-- **设备指纹**: 设备指纹生成，增强安全性
-- **冷却管理**: 智能冷却机制，应对 API 速率限制
-- **用量检查器**: 实时用量监控和配额管理
-- **模型转换器**: 跨供应商的统一模型名称转换
-- **UTF-8 流处理**: 改进的流式响应处理
+GLM CODING PLAN 是专为AI编码打造的订阅套餐，每月最低仅需20元，即可在十余款主流AI编码工具如 Claude Code、Cline、Roo Code 中畅享智谱旗舰模型GLM-4.7，为开发者提供顶尖的编码体验。
 
-## Kiro 认证
+智谱AI为本软件提供了特别优惠，使用以下链接购买可以享受九折优惠：https://www.bigmodel.cn/claude-code?ic=RRVJPB5SII
 
-### 网页端 OAuth 登录
+---
 
-访问 Kiro OAuth 网页认证界面：
+<table>
+<tbody>
+<tr>
+<td width="180"><a href="https://www.packyapi.com/register?aff=cliproxyapi"><img src="./assets/packycode.png" alt="PackyCode" width="150"></a></td>
+<td>感谢 PackyCode 对本项目的赞助！PackyCode 是一家可靠高效的 API 中转服务商，提供 Claude Code、Codex、Gemini 等多种服务的中转。PackyCode 为本软件用户提供了特别优惠：使用<a href="https://www.packyapi.com/register?aff=cliproxyapi">此链接</a>注册，并在充值时输入 "cliproxyapi" 优惠码即可享受九折优惠。</td>
+</tr>
+<tr>
+<td width="180"><a href="https://cubence.com/signup?code=CLIPROXYAPI&source=cpa"><img src="./assets/cubence.png" alt="Cubence" width="150"></a></td>
+<td>感谢 Cubence 对本项目的赞助！Cubence 是一家可靠高效的 API 中转服务商，提供 Claude Code、Codex、Gemini 等多种服务的中转。Cubence 为本软件用户提供了特别优惠：使用<a href="https://cubence.com/signup?code=CLIPROXYAPI&source=cpa">此链接</a>注册，并在充值时输入 "CLIPROXYAPI" 优惠码即可享受九折优惠。</td>
+</tr>
+</tbody>
+</table>
 
-```
-http://your-server:8080/v0/oauth/kiro
-```
 
-提供基于浏览器的 Kiro (AWS CodeWhisperer) OAuth 认证流程，支持：
-- AWS Builder ID 登录
-- AWS Identity Center (IDC) 登录
-- 从 Kiro IDE 导入令牌
+## 功能特性
 
-## Docker 快速部署
+- 为 CLI 模型提供 OpenAI/Gemini/Claude/Codex 兼容的 API 端点
+- 新增 OpenAI Codex（GPT 系列）支持（OAuth 登录）
+- 新增 Claude Code 支持（OAuth 登录）
+- 新增 Qwen Code 支持（OAuth 登录）
+- 新增 iFlow 支持（OAuth 登录）
+- 支持流式与非流式响应
+- 函数调用/工具支持
+- 多模态输入（文本、图片）
+- 多账户支持与轮询负载均衡（Gemini、OpenAI、Claude、Qwen 与 iFlow）
+- 简单的 CLI 身份验证流程（Gemini、OpenAI、Claude、Qwen 与 iFlow）
+- 支持 Gemini AIStudio API 密钥
+- 支持 AI Studio Build 多账户轮询
+- 支持 Gemini CLI 多账户轮询
+- 支持 Claude Code 多账户轮询
+- 支持 Qwen Code 多账户轮询
+- 支持 iFlow 多账户轮询
+- 支持 OpenAI Codex 多账户轮询
+- 通过配置接入上游 OpenAI 兼容提供商（例如 OpenRouter）
+- 可复用的 Go SDK（见 `docs/sdk-usage_CN.md`）
 
-### 一键部署
+## 新手入门
 
-```bash
-# 创建部署目录
-mkdir -p ~/cli-proxy && cd ~/cli-proxy
+CLIProxyAPI 用户手册： [https://help.router-for.me/](https://help.router-for.me/cn/)
 
-# 创建 docker-compose.yml
-cat > docker-compose.yml << 'EOF'
-services:
-  cli-proxy-api:
-    image: eceasy/cli-proxy-api-plus:latest
-    container_name: cli-proxy-api-plus
-    ports:
-      - "8317:8317"
-    volumes:
-      - ./config.yaml:/CLIProxyAPI/config.yaml
-      - ./auths:/root/.cli-proxy-api
-      - ./logs:/CLIProxyAPI/logs
-    restart: unless-stopped
-EOF
+## 管理 API 文档
 
-# 下载示例配置
-curl -o config.yaml https://raw.githubusercontent.com/router-for-me/CLIProxyAPIPlus/main/config.example.yaml
+请参见 [MANAGEMENT_API_CN.md](https://help.router-for.me/cn/management/api)
 
-# 拉取并启动
-docker compose pull && docker compose up -d
-```
+## Amp CLI 支持
 
-### 配置说明
+CLIProxyAPI 已内置对 [Amp CLI](https://ampcode.com) 和 Amp IDE 扩展的支持，可让你使用自己的 Google/ChatGPT/Claude OAuth 订阅来配合 Amp 编码工具：
 
-启动前请编辑 `config.yaml`：
+- 提供商路由别名，兼容 Amp 的 API 路径模式（`/api/provider/{provider}/v1...`）
+- 管理代理，处理 OAuth 认证和账号功能
+- 智能模型回退与自动路由
+- 以安全为先的设计，管理端点仅限 localhost
 
-```yaml
-# 基本配置示例
-server:
-  port: 8317
+**→ [Amp CLI 完整集成指南](https://help.router-for.me/cn/agent-client/amp-cli.html)**
 
-# 在此添加你的供应商配置
-```
+## SDK 文档
 
-### 更新到最新版本
-
-```bash
-cd ~/cli-proxy
-docker compose pull && docker compose up -d
-```
+- 使用文档：[docs/sdk-usage_CN.md](docs/sdk-usage_CN.md)
+- 高级（执行器与翻译器）：[docs/sdk-advanced_CN.md](docs/sdk-advanced_CN.md)
+- 认证: [docs/sdk-access_CN.md](docs/sdk-access_CN.md)
+- 凭据加载/更新: [docs/sdk-watcher_CN.md](docs/sdk-watcher_CN.md)
+- 自定义 Provider 示例：`examples/custom-provider`
 
 ## 贡献
 
-该项目仅接受第三方供应商支持的 Pull Request。任何非第三方供应商支持的 Pull Request 都将被拒绝。
+欢迎贡献！请随时提交 Pull Request。
+
+1. Fork 仓库
+2. 创建您的功能分支（`git checkout -b feature/amazing-feature`）
+3. 提交您的更改（`git commit -m 'Add some amazing feature'`）
+4. 推送到分支（`git push origin feature/amazing-feature`）
+5. 打开 Pull Request
+
+## 谁与我们在一起？
+
+这些项目基于 CLIProxyAPI:
+
+### [vibeproxy](https://github.com/automazeio/vibeproxy)
+
+一个原生 macOS 菜单栏应用，让您可以使用 Claude Code & ChatGPT 订阅服务和 AI 编程工具，无需 API 密钥。
+
+### [Subtitle Translator](https://github.com/VjayC/SRT-Subtitle-Translator-Validator)
+
+一款基于浏览器的 SRT 字幕翻译工具，可通过 CLI 代理 API 使用您的 Gemini 订阅。内置自动验证与错误修正功能，无需 API 密钥。
+
+### [CCS (Claude Code Switch)](https://github.com/kaitranntt/ccs)
+
+CLI 封装器，用于通过 CLIProxyAPI OAuth 即时切换多个 Claude 账户和替代模型（Gemini, Codex, Antigravity），无需 API 密钥。
+
+### [ProxyPal](https://github.com/heyhuynhgiabuu/proxypal)
+
+基于 macOS 平台的原生 CLIProxyAPI GUI：配置供应商、模型映射以及OAuth端点，无需 API 密钥。
+
+### [Quotio](https://github.com/nguyenphutrong/quotio)
+
+原生 macOS 菜单栏应用，统一管理 Claude、Gemini、OpenAI、Qwen 和 Antigravity 订阅，提供实时配额追踪和智能自动故障转移，支持 Claude Code、OpenCode 和 Droid 等 AI 编程工具，无需 API 密钥。
+
+### [CodMate](https://github.com/loocor/CodMate)
+
+原生 macOS SwiftUI 应用，用于管理 CLI AI 会话（Claude Code、Codex、Gemini CLI），提供统一的提供商管理、Git 审查、项目组织、全局搜索和终端集成。集成 CLIProxyAPI 为 Codex、Claude、Gemini、Antigravity 和 Qwen Code 提供统一的 OAuth 认证，支持内置和第三方提供商通过单一代理端点重路由 - OAuth 提供商无需 API 密钥。
+
+### [ProxyPilot](https://github.com/Finesssee/ProxyPilot)
+
+原生 Windows CLIProxyAPI 分支，集成 TUI、系统托盘及多服务商 OAuth 认证，专为 AI 编程工具打造，无需 API 密钥。
 
-如果需要提交任何非第三方供应商支持的 Pull Request，请提交到[主线](https://github.com/router-for-me/CLIProxyAPI)版本。
+### [Claude Proxy VSCode](https://github.com/uzhao/claude-proxy-vscode)
+
+一款 VSCode 扩展，提供了在 VSCode 中快速切换 Claude Code 模型的功能，内置 CLIProxyAPI 作为其后端，支持后台自动启动和关闭。
+
+### [ZeroLimit](https://github.com/0xtbug/zero-limit)
+
+Windows 桌面应用，基于 Tauri + React 构建，用于通过 CLIProxyAPI 监控 AI 编程助手配额。支持跨 Gemini、Claude、OpenAI Codex 和 Antigravity 账户的使用量追踪，提供实时仪表盘、系统托盘集成和一键代理控制，无需 API 密钥。
+
+### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
+
+面向 CLIProxyAPI 的 Web 管理面板，提供健康检查、资源监控、日志查看、自动更新、请求统计与定价展示，支持一键安装与 systemd 服务。
+
+> [!NOTE]  
+> 如果你开发了基于 CLIProxyAPI 的项目，请提交一个 PR（拉取请求）将其添加到此列表中。
+
+## 更多选择
+
+以下项目是 CLIProxyAPI 的移植版或受其启发：
+
+### [9Router](https://github.com/decolua/9router)
+
+基于 Next.js 的实现，灵感来自 CLIProxyAPI，易于安装使用；自研格式转换（OpenAI/Claude/Gemini/Ollama）、组合系统与自动回退、多账户管理（指数退避）、Next.js Web 控制台，并支持 Cursor、Claude Code、Cline、RooCode 等 CLI 工具，无需 API 密钥。
+
+### [CLIProxyAPI Tray](https://github.com/kitephp/CLIProxyAPI_Tray)
+
+Windows 托盘应用，基于 PowerShell 脚本实现，不依赖任何第三方库。主要功能包括：自动创建快捷方式、静默运行、密码管理、通道切换（Main / Plus）以及自动下载与更新。
+
+> [!NOTE]  
+> 如果你开发了 CLIProxyAPI 的移植或衍生项目，请提交 PR 将其添加到此列表中。
 
 ## 许可证
 
-此项目根据 MIT 许可证授权 - 有关详细信息，请参阅 [LICENSE](LICENSE) 文件。
\ No newline at end of file
+此项目根据 MIT 许可证授权 - 有关详细信息，请参阅 [LICENSE](LICENSE) 文件。
+
+## 写给所有中国网友的
+
+QQ 群：188637136
+
+或
+
+Telegram 群：https://t.me/CLIProxyAPI
diff --git a/assets/cubence.png b/assets/cubence.png
new file mode 100644
index 0000000000..c61f12f61e
Binary files /dev/null and b/assets/cubence.png differ
diff --git a/cmd/server/main.go b/cmd/server/main.go
index 2ef8c33913..385d7cfadf 100644
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -8,7 +8,6 @@ import (
 	"errors"
 	"flag"
 	"fmt"
-	"io"
 	"io/fs"
 	"net/url"
 	"os"
@@ -18,7 +17,6 @@ import (
 
 	"github.com/joho/godotenv"
 	configaccess "github.com/router-for-me/CLIProxyAPI/v6/internal/access/config_access"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kiro"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/buildinfo"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/cmd"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -27,7 +25,6 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/store"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/tui"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/usage"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
@@ -50,19 +47,6 @@ func init() {
 	buildinfo.BuildDate = BuildDate
 }
 
-// setKiroIncognitoMode sets the incognito browser mode for Kiro authentication.
-// Kiro defaults to incognito mode for multi-account support.
-// Users can explicitly override with --incognito or --no-incognito flags.
-func setKiroIncognitoMode(cfg *config.Config, useIncognito, noIncognito bool) {
-	if useIncognito {
-		cfg.IncognitoBrowser = true
-	} else if noIncognito {
-		cfg.IncognitoBrowser = false
-	} else {
-		cfg.IncognitoBrowser = true // Kiro default
-	}
-}
-
 // main is the entry point of the application.
 // It parses command-line flags, loads configuration, and starts the appropriate
 // service based on the provided flags (login, codex-login, or server mode).
@@ -74,54 +58,30 @@ func main() {
 	var codexLogin bool
 	var claudeLogin bool
 	var qwenLogin bool
-	var kiloLogin bool
 	var iflowLogin bool
 	var iflowCookie bool
 	var noBrowser bool
 	var oauthCallbackPort int
 	var antigravityLogin bool
-	var kimiLogin bool
-	var kiroLogin bool
-	var kiroGoogleLogin bool
-	var kiroAWSLogin bool
-	var kiroAWSAuthCode bool
-	var kiroImport bool
-	var githubCopilotLogin bool
 	var projectID string
 	var vertexImport string
 	var configPath string
 	var password string
-	var tuiMode bool
-	var standalone bool
-	var noIncognito bool
-	var useIncognito bool
 
 	// Define command-line flags for different operation modes.
 	flag.BoolVar(&login, "login", false, "Login Google Account")
 	flag.BoolVar(&codexLogin, "codex-login", false, "Login to Codex using OAuth")
 	flag.BoolVar(&claudeLogin, "claude-login", false, "Login to Claude using OAuth")
 	flag.BoolVar(&qwenLogin, "qwen-login", false, "Login to Qwen using OAuth")
-	flag.BoolVar(&kiloLogin, "kilo-login", false, "Login to Kilo AI using device flow")
 	flag.BoolVar(&iflowLogin, "iflow-login", false, "Login to iFlow using OAuth")
 	flag.BoolVar(&iflowCookie, "iflow-cookie", false, "Login to iFlow using Cookie")
 	flag.BoolVar(&noBrowser, "no-browser", false, "Don't open browser automatically for OAuth")
 	flag.IntVar(&oauthCallbackPort, "oauth-callback-port", 0, "Override OAuth callback port (defaults to provider-specific port)")
-	flag.BoolVar(&useIncognito, "incognito", false, "Open browser in incognito/private mode for OAuth (useful for multiple accounts)")
-	flag.BoolVar(&noIncognito, "no-incognito", false, "Force disable incognito mode (uses existing browser session)")
 	flag.BoolVar(&antigravityLogin, "antigravity-login", false, "Login to Antigravity using OAuth")
-	flag.BoolVar(&kimiLogin, "kimi-login", false, "Login to Kimi using OAuth")
-	flag.BoolVar(&kiroLogin, "kiro-login", false, "Login to Kiro using Google OAuth")
-	flag.BoolVar(&kiroGoogleLogin, "kiro-google-login", false, "Login to Kiro using Google OAuth (same as --kiro-login)")
-	flag.BoolVar(&kiroAWSLogin, "kiro-aws-login", false, "Login to Kiro using AWS Builder ID (device code flow)")
-	flag.BoolVar(&kiroAWSAuthCode, "kiro-aws-authcode", false, "Login to Kiro using AWS Builder ID (authorization code flow, better UX)")
-	flag.BoolVar(&kiroImport, "kiro-import", false, "Import Kiro token from Kiro IDE (~/.aws/sso/cache/kiro-auth-token.json)")
-	flag.BoolVar(&githubCopilotLogin, "github-copilot-login", false, "Login to GitHub Copilot using device flow")
 	flag.StringVar(&projectID, "project_id", "", "Project ID (Gemini only, not required)")
 	flag.StringVar(&configPath, "config", DefaultConfigPath, "Configure File Path")
 	flag.StringVar(&vertexImport, "vertex-import", "", "Import Vertex service account key JSON file")
 	flag.StringVar(&password, "password", "", "")
-	flag.BoolVar(&tuiMode, "tui", false, "Start with terminal management UI")
-	flag.BoolVar(&standalone, "standalone", false, "In TUI mode, start an embedded local server")
 
 	flag.CommandLine.Usage = func() {
 		out := flag.CommandLine.Output()
@@ -483,7 +443,7 @@ func main() {
 	}
 
 	// Register built-in access providers before constructing services.
-	configaccess.Register(&cfg.SDKConfig)
+	configaccess.Register()
 
 	// Handle different command modes based on the provided flags.
 
@@ -496,9 +456,6 @@ func main() {
 	} else if antigravityLogin {
 		// Handle Antigravity login
 		cmd.DoAntigravityLogin(cfg, options)
-	} else if githubCopilotLogin {
-		// Handle GitHub Copilot login
-		cmd.DoGitHubCopilotLogin(cfg, options)
 	} else if codexLogin {
 		// Handle Codex login
 		cmd.DoCodexLogin(cfg, options)
@@ -507,38 +464,10 @@ func main() {
 		cmd.DoClaudeLogin(cfg, options)
 	} else if qwenLogin {
 		cmd.DoQwenLogin(cfg, options)
-	} else if kiloLogin {
-		cmd.DoKiloLogin(cfg, options)
 	} else if iflowLogin {
 		cmd.DoIFlowLogin(cfg, options)
 	} else if iflowCookie {
 		cmd.DoIFlowCookieAuth(cfg, options)
-	} else if kimiLogin {
-		cmd.DoKimiLogin(cfg, options)
-	} else if kiroLogin {
-		// For Kiro auth, default to incognito mode for multi-account support
-		// Users can explicitly override with --no-incognito
-		// Note: This config mutation is safe - auth commands exit after completion
-		// and don't share config with StartService (which is in the else branch)
-		setKiroIncognitoMode(cfg, useIncognito, noIncognito)
-		cmd.DoKiroLogin(cfg, options)
-	} else if kiroGoogleLogin {
-		// For Kiro auth, default to incognito mode for multi-account support
-		// Users can explicitly override with --no-incognito
-		// Note: This config mutation is safe - auth commands exit after completion
-		setKiroIncognitoMode(cfg, useIncognito, noIncognito)
-		cmd.DoKiroGoogleLogin(cfg, options)
-	} else if kiroAWSLogin {
-		// For Kiro auth, default to incognito mode for multi-account support
-		// Users can explicitly override with --no-incognito
-		setKiroIncognitoMode(cfg, useIncognito, noIncognito)
-		cmd.DoKiroAWSLogin(cfg, options)
-	} else if kiroAWSAuthCode {
-		// For Kiro auth with authorization code flow (better UX)
-		setKiroIncognitoMode(cfg, useIncognito, noIncognito)
-		cmd.DoKiroAWSAuthCodeLogin(cfg, options)
-	} else if kiroImport {
-		cmd.DoKiroImport(cfg, options)
 	} else {
 		// In cloud deploy mode without config file, just wait for shutdown signals
 		if isCloudDeploy && !configFileExists {
@@ -546,89 +475,8 @@ func main() {
 			cmd.WaitForCloudDeploy()
 			return
 		}
-		if tuiMode {
-			if standalone {
-				// Standalone mode: start an embedded local server and connect TUI client to it.
-				managementasset.StartAutoUpdater(context.Background(), configFilePath)
-				hook := tui.NewLogHook(2000)
-				hook.SetFormatter(&logging.LogFormatter{})
-				log.AddHook(hook)
-
-				origStdout := os.Stdout
-				origStderr := os.Stderr
-				origLogOutput := log.StandardLogger().Out
-				log.SetOutput(io.Discard)
-
-				devNull, errOpenDevNull := os.Open(os.DevNull)
-				if errOpenDevNull == nil {
-					os.Stdout = devNull
-					os.Stderr = devNull
-				}
-
-				restoreIO := func() {
-					os.Stdout = origStdout
-					os.Stderr = origStderr
-					log.SetOutput(origLogOutput)
-					if devNull != nil {
-						_ = devNull.Close()
-					}
-				}
-
-				localMgmtPassword := fmt.Sprintf("tui-%d-%d", os.Getpid(), time.Now().UnixNano())
-				if password == "" {
-					password = localMgmtPassword
-				}
-
-				cancel, done := cmd.StartServiceBackground(cfg, configFilePath, password)
-
-				client := tui.NewClient(cfg.Port, password)
-				ready := false
-				backoff := 100 * time.Millisecond
-				for i := 0; i < 30; i++ {
-					if _, errGetConfig := client.GetConfig(); errGetConfig == nil {
-						ready = true
-						break
-					}
-					time.Sleep(backoff)
-					if backoff < time.Second {
-						backoff = time.Duration(float64(backoff) * 1.5)
-					}
-				}
-
-				if !ready {
-					restoreIO()
-					cancel()
-					<-done
-					fmt.Fprintf(os.Stderr, "TUI error: embedded server is not ready\n")
-					return
-				}
-
-				if errRun := tui.Run(cfg.Port, password, hook, origStdout); errRun != nil {
-					restoreIO()
-					fmt.Fprintf(os.Stderr, "TUI error: %v\n", errRun)
-				} else {
-					restoreIO()
-				}
-
-				cancel()
-				<-done
-			} else {
-				// Default TUI mode: pure management client.
-				// The proxy server must already be running.
-				if errRun := tui.Run(cfg.Port, password, nil, os.Stdout); errRun != nil {
-					fmt.Fprintf(os.Stderr, "TUI error: %v\n", errRun)
-				}
-			}
-		} else {
-      // Start the main proxy service
-      managementasset.StartAutoUpdater(context.Background(), configFilePath)
-
-      if cfg.AuthDir != "" {
-        kiro.InitializeAndStart(cfg.AuthDir, cfg)
-        defer kiro.StopGlobalRefreshManager()
-      }
-
-      cmd.StartService(cfg, configFilePath, password)
-		}
+		// Start the main proxy service
+		managementasset.StartAutoUpdater(context.Background(), configFilePath)
+		cmd.StartService(cfg, configFilePath, password)
 	}
 }
diff --git a/config.example.yaml b/config.example.yaml
index b513eb60ac..76c9e15e65 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -1,6 +1,6 @@
 # Server host/interface to bind to. Default is empty ("") to bind all interfaces (IPv4 + IPv6).
 # Use "127.0.0.1" or "localhost" to restrict access to local machine only.
-host: ''
+host: ""
 
 # Server port
 port: 8317
@@ -8,8 +8,8 @@ port: 8317
 # TLS settings for HTTPS. When enabled, the server listens with the provided certificate and key.
 tls:
   enable: false
-  cert: ''
-  key: ''
+  cert: ""
+  key: ""
 
 # Management API settings
 remote-management:
@@ -20,39 +20,29 @@ remote-management:
   # Management key. If a plaintext value is provided here, it will be hashed on startup.
   # All management requests (even from localhost) require this key.
   # Leave empty to disable the Management API entirely (404 for all /v0/management routes).
-  secret-key: ''
+  secret-key: ""
 
   # Disable the bundled management control panel asset download and HTTP route when true.
   disable-control-panel: false
 
   # GitHub repository for the management control panel. Accepts a repository URL or releases API URL.
-  panel-github-repository: 'https://github.com/router-for-me/Cli-Proxy-API-Management-Center'
+  panel-github-repository: "https://github.com/router-for-me/Cli-Proxy-API-Management-Center"
 
 # Authentication directory (supports ~ for home directory)
-auth-dir: '~/.cli-proxy-api'
+auth-dir: "~/.cli-proxy-api"
 
 # API keys for authentication
 api-keys:
-  - 'your-api-key-1'
-  - 'your-api-key-2'
-  - 'your-api-key-3'
+  - "your-api-key-1"
+  - "your-api-key-2"
+  - "your-api-key-3"
 
 # Enable debug logging
 debug: false
 
-# Enable pprof HTTP debug server (host:port). Keep it bound to localhost for safety.
-pprof:
-  enable: false
-  addr: '127.0.0.1:8316'
-
 # When true, disable high-overhead HTTP middleware features to reduce per-request memory usage under high concurrency.
 commercial-mode: false
 
-# Open OAuth URLs in incognito/private browser mode.
-# Useful when you want to login with a different account without logging out from your current session.
-# Default: false (but Kiro auth defaults to true for multi-account support)
-incognito-browser: true
-
 # When true, write application logs to rotating files instead of stdout
 logging-to-file: false
 
@@ -68,15 +58,11 @@ error-logs-max-files: 10
 usage-statistics-enabled: false
 
 # Proxy URL. Supports socks5/http/https protocols. Example: socks5://user:pass@192.168.1.1:1080/
-proxy-url: ''
+proxy-url: ""
 
 # When true, unprefixed model requests only use credentials without a prefix (except when prefix == model name).
 force-model-prefix: false
 
-# When true, forward filtered upstream response headers to downstream clients.
-# Default is false (disabled).
-passthrough-headers: false
-
 # Number of times to retry a request. Retries will occur if the HTTP response code is 403, 408, 500, 502, 503, or 504.
 request-retry: 3
 
@@ -90,7 +76,7 @@ quota-exceeded:
 
 # Routing strategy for selecting credentials when multiple match.
 routing:
-  strategy: 'round-robin' # round-robin (default), fill-first
+  strategy: "round-robin" # round-robin (default), fill-first
 
 # When true, enable authentication for the WebSocket API (/v1/ws).
 ws-auth: false
@@ -164,40 +150,6 @@ nonstream-keepalive-interval: 0
 #       sensitive-words:             # optional: words to obfuscate with zero-width characters
 #         - "API"
 #         - "proxy"
-#       cache-user-id: true          # optional: default is false; set true to reuse cached user_id per API key instead of generating a random one each request
-
-# Default headers for Claude API requests. Update when Claude Code releases new versions.
-# These are used as fallbacks when the client does not send its own headers.
-# claude-header-defaults:
-#   user-agent: "claude-cli/2.1.44 (external, sdk-cli)"
-#   package-version: "0.74.0"
-#   runtime-version: "v24.3.0"
-#   timeout: "600"
-
-# Kiro (AWS CodeWhisperer) configuration
-# Note: Kiro API currently only operates in us-east-1 region
-#kiro:
-#  - token-file: "~/.aws/sso/cache/kiro-auth-token.json" # path to Kiro token file
-#    agent-task-type: "" # optional: "vibe" or empty (API default)
-#  - access-token: "aoaAAAAA..." # or provide tokens directly
-#    refresh-token: "aorAAAAA..."
-#    profile-arn: "arn:aws:codewhisperer:us-east-1:..."
-#    proxy-url: "socks5://proxy.example.com:1080" # optional: proxy override
-
-# Kilocode (OAuth-based code assistant)
-# Note: Kilocode uses OAuth device flow authentication.
-# Use the CLI command: ./server --kilo-login
-# This will save credentials to the auth directory (default: ~/.cli-proxy-api/)
-# oauth-model-alias:
-#   kilo:
-#     - name: "minimax/minimax-m2.5:free"
-#       alias: "minimax-m2.5"
-#     - name: "z-ai/glm-5:free"
-#       alias: "glm-5"
-# oauth-excluded-models:
-#   kilo:
-#     - "kilo-claude-opus-4-6"  # exclude specific models (exact match)
-#     - "*:free"                # wildcard matching suffix (e.g. all free models)
 
 # OpenAI compatibility providers
 # openai-compatibility:
@@ -264,25 +216,25 @@ nonstream-keepalive-interval: 0
 
 # Global OAuth model name aliases (per channel)
 # These aliases rename model IDs for both model listing and request routing.
-# Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot, kimi.
+# Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
 # NOTE: Aliases do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
 # You can repeat the same name with different aliases to expose multiple client model names.
-# oauth-model-alias:
-#  antigravity:
-#    - name: "rev19-uic3-1p"
-#      alias: "gemini-2.5-computer-use-preview-10-2025"
-#    - name: "gemini-3-pro-image"
-#      alias: "gemini-3-pro-image-preview"
-#    - name: "gemini-3-pro-high"
-#      alias: "gemini-3-pro-preview"
-#    - name: "gemini-3-flash"
-#      alias: "gemini-3-flash-preview"
-#    - name: "claude-sonnet-4-5"
-#      alias: "gemini-claude-sonnet-4-5"
-#    - name: "claude-sonnet-4-5-thinking"
-#      alias: "gemini-claude-sonnet-4-5-thinking"
-#    - name: "claude-opus-4-5-thinking"
-#      alias: "gemini-claude-opus-4-5-thinking"
+oauth-model-alias:
+  antigravity:
+    - name: "rev19-uic3-1p"
+      alias: "gemini-2.5-computer-use-preview-10-2025"
+    - name: "gemini-3-pro-image"
+      alias: "gemini-3-pro-image-preview"
+    - name: "gemini-3-pro-high"
+      alias: "gemini-3-pro-preview"
+    - name: "gemini-3-flash"
+      alias: "gemini-3-flash-preview"
+    - name: "claude-sonnet-4-5"
+      alias: "gemini-claude-sonnet-4-5"
+    - name: "claude-sonnet-4-5-thinking"
+      alias: "gemini-claude-sonnet-4-5-thinking"
+    - name: "claude-opus-4-5-thinking"
+      alias: "gemini-claude-opus-4-5-thinking"
 #   gemini-cli:
 #     - name: "gemini-2.5-pro"          # original model name under this channel
 #       alias: "g2.5p"                  # client-visible alias
@@ -305,18 +257,8 @@ nonstream-keepalive-interval: 0
 #   iflow:
 #     - name: "glm-4.7"
 #       alias: "glm-god"
-#   kimi:
-#     - name: "kimi-k2.5"
-#       alias: "k2.5"
-#   kiro:
-#     - name: "kiro-claude-opus-4-5"
-#       alias: "op45"
-#   github-copilot:
-#     - name: "gpt-5"
-#       alias: "copilot-gpt5"
 
 # OAuth provider excluded models
-# Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot.
 # oauth-excluded-models:
 #   gemini-cli:
 #     - "gemini-2.5-pro"     # exclude specific models (exact match)
@@ -337,12 +279,6 @@ nonstream-keepalive-interval: 0
 #     - "vision-model"
 #   iflow:
 #     - "tstars2.0"
-#   kimi:
-#     - "kimi-k2-thinking"
-#   kiro:
-#     - "kiro-claude-haiku-4-5"
-#   github-copilot:
-#     - "raptor-mini"
 
 # Optional payload configuration
 # payload:
diff --git a/docker-compose.yml b/docker-compose.yml
index cd8c21b97c..ad2190c23a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,6 @@
 services:
   cli-proxy-api:
-    image: ${CLI_PROXY_IMAGE:-eceasy/cli-proxy-api-plus:latest}
+    image: ${CLI_PROXY_IMAGE:-eceasy/cli-proxy-api:latest}
     pull_policy: always
     build:
       context: .
@@ -9,7 +9,7 @@ services:
         VERSION: ${VERSION:-dev}
         COMMIT: ${COMMIT:-none}
         BUILD_DATE: ${BUILD_DATE:-unknown}
-    container_name: cli-proxy-api-plus
+    container_name: cli-proxy-api
     # env_file:
     #   - .env
     environment:
diff --git a/docs/sdk-access.md b/docs/sdk-access.md
index 343c851b4f..e4e6962994 100644
--- a/docs/sdk-access.md
+++ b/docs/sdk-access.md
@@ -7,71 +7,80 @@ The `github.com/router-for-me/CLIProxyAPI/v6/sdk/access` package centralizes inb
 ```go
 import (
     sdkaccess "github.com/router-for-me/CLIProxyAPI/v6/sdk/access"
+    "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 )
 ```
 
 Add the module with `go get github.com/router-for-me/CLIProxyAPI/v6/sdk/access`.
 
-## Provider Registry
-
-Providers are registered globally and then attached to a `Manager` as a snapshot:
-
-- `RegisterProvider(type, provider)` installs a pre-initialized provider instance.
-- Registration order is preserved the first time each `type` is seen.
-- `RegisteredProviders()` returns the providers in that order.
-
 ## Manager Lifecycle
 
 ```go
 manager := sdkaccess.NewManager()
-manager.SetProviders(sdkaccess.RegisteredProviders())
+providers, err := sdkaccess.BuildProviders(cfg)
+if err != nil {
+    return err
+}
+manager.SetProviders(providers)
 ```
 
 * `NewManager` constructs an empty manager.
 * `SetProviders` replaces the provider slice using a defensive copy.
 * `Providers` retrieves a snapshot that can be iterated safely from other goroutines.
-
-If the manager itself is `nil` or no providers are configured, the call returns `nil, nil`, allowing callers to treat access control as disabled.
+* `BuildProviders` translates `config.Config` access declarations into runnable providers. When the config omits explicit providers but defines inline API keys, the helper auto-installs the built-in `config-api-key` provider.
 
 ## Authenticating Requests
 
 ```go
-result, authErr := manager.Authenticate(ctx, req)
+result, err := manager.Authenticate(ctx, req)
 switch {
-case authErr == nil:
+case err == nil:
     // Authentication succeeded; result describes the provider and principal.
-case sdkaccess.IsAuthErrorCode(authErr, sdkaccess.AuthErrorCodeNoCredentials):
+case errors.Is(err, sdkaccess.ErrNoCredentials):
     // No recognizable credentials were supplied.
-case sdkaccess.IsAuthErrorCode(authErr, sdkaccess.AuthErrorCodeInvalidCredential):
+case errors.Is(err, sdkaccess.ErrInvalidCredential):
     // Supplied credentials were present but rejected.
 default:
-    // Internal/transport failure was returned by a provider.
+    // Transport-level failure was returned by a provider.
 }
 ```
 
-`Manager.Authenticate` walks the configured providers in order. It returns on the first success, skips providers that return `AuthErrorCodeNotHandled`, and aggregates `AuthErrorCodeNoCredentials` / `AuthErrorCodeInvalidCredential` for a final result.
+`Manager.Authenticate` walks the configured providers in order. It returns on the first success, skips providers that surface `ErrNotHandled`, and tracks whether any provider reported `ErrNoCredentials` or `ErrInvalidCredential` for downstream error reporting.
 
-Each `Result` includes the provider identifier, the resolved principal, and optional metadata (for example, which header carried the credential).
-
-## Built-in `config-api-key` Provider
+If the manager itself is `nil` or no providers are registered, the call returns `nil, nil`, allowing callers to treat access control as disabled without branching on errors.
 
-The proxy includes one built-in access provider:
+Each `Result` includes the provider identifier, the resolved principal, and optional metadata (for example, which header carried the credential).
 
-- `config-api-key`: Validates API keys declared under top-level `api-keys`.
-  - Credential sources: `Authorization: Bearer`, `X-Goog-Api-Key`, `X-Api-Key`, `?key=`, `?auth_token=`
-  - Metadata: `Result.Metadata["source"]` is set to the matched source label.
+## Configuration Layout
 
-In the CLI server and `sdk/cliproxy`, this provider is registered automatically based on the loaded configuration.
+The manager expects access providers under the `auth.providers` key inside `config.yaml`:
 
 ```yaml
-api-keys:
-  - sk-test-123
-  - sk-prod-456
+auth:
+  providers:
+    - name: inline-api
+      type: config-api-key
+      api-keys:
+        - sk-test-123
+        - sk-prod-456
 ```
 
-## Loading Providers from External Go Modules
+Fields map directly to `config.AccessProvider`: `name` labels the provider, `type` selects the registered factory, `sdk` can name an external module, `api-keys` seeds inline credentials, and `config` passes provider-specific options.
+
+### Loading providers from external SDK modules
 
-To consume a provider shipped in another Go module, import it for its registration side effect:
+To consume a provider shipped in another Go module, point the `sdk` field at the module path and import it for its registration side effect:
+
+```yaml
+auth:
+  providers:
+    - name: partner-auth
+      type: partner-token
+      sdk: github.com/acme/xplatform/sdk/access/providers/partner
+      config:
+        region: us-west-2
+        audience: cli-proxy
+```
 
 ```go
 import (
@@ -80,11 +89,19 @@ import (
 )
 ```
 
-The blank identifier import ensures `init` runs so `sdkaccess.RegisterProvider` executes before you call `RegisteredProviders()` (or before `cliproxy.NewBuilder().Build()`).
+The blank identifier import ensures `init` runs so `sdkaccess.RegisterProvider` executes before `BuildProviders` is called.
+
+## Built-in Providers
+
+The SDK ships with one provider out of the box:
+
+- `config-api-key`: Validates API keys declared inline or under top-level `api-keys`. It accepts the key from `Authorization: Bearer`, `X-Goog-Api-Key`, `X-Api-Key`, or the `?key=` query string and reports `ErrInvalidCredential` when no match is found.
+
+Additional providers can be delivered by third-party packages. When a provider package is imported, it registers itself with `sdkaccess.RegisterProvider`.
 
 ### Metadata and auditing
 
-`Result.Metadata` carries provider-specific context. The built-in `config-api-key` provider, for example, stores the credential source (`authorization`, `x-goog-api-key`, `x-api-key`, `query-key`, `query-auth-token`). Populate this map in custom providers to enrich logs and downstream auditing.
+`Result.Metadata` carries provider-specific context. The built-in `config-api-key` provider, for example, stores the credential source (`authorization`, `x-goog-api-key`, `x-api-key`, or `query-key`). Populate this map in custom providers to enrich logs and downstream auditing.
 
 ## Writing Custom Providers
 
@@ -93,13 +110,13 @@ type customProvider struct{}
 
 func (p *customProvider) Identifier() string { return "my-provider" }
 
-func (p *customProvider) Authenticate(ctx context.Context, r *http.Request) (*sdkaccess.Result, *sdkaccess.AuthError) {
+func (p *customProvider) Authenticate(ctx context.Context, r *http.Request) (*sdkaccess.Result, error) {
     token := r.Header.Get("X-Custom")
     if token == "" {
-        return nil, sdkaccess.NewNotHandledError()
+        return nil, sdkaccess.ErrNoCredentials
     }
     if token != "expected" {
-        return nil, sdkaccess.NewInvalidCredentialError()
+        return nil, sdkaccess.ErrInvalidCredential
     }
     return &sdkaccess.Result{
         Provider:  p.Identifier(),
@@ -109,46 +126,51 @@ func (p *customProvider) Authenticate(ctx context.Context, r *http.Request) (*sd
 }
 
 func init() {
-    sdkaccess.RegisterProvider("custom", &customProvider{})
+    sdkaccess.RegisterProvider("custom", func(cfg *config.AccessProvider, root *config.Config) (sdkaccess.Provider, error) {
+        return &customProvider{}, nil
+    })
 }
 ```
 
-A provider must implement `Identifier()` and `Authenticate()`. To make it available to the access manager, call `RegisterProvider` inside `init` with an initialized provider instance.
+A provider must implement `Identifier()` and `Authenticate()`. To expose it to configuration, call `RegisterProvider` inside `init`. Provider factories receive the specific `AccessProvider` block plus the full root configuration for contextual needs.
 
 ## Error Semantics
 
-- `NewNoCredentialsError()` (`AuthErrorCodeNoCredentials`): no credentials were present or recognized. (HTTP 401)
-- `NewInvalidCredentialError()` (`AuthErrorCodeInvalidCredential`): credentials were present but rejected. (HTTP 401)
-- `NewNotHandledError()` (`AuthErrorCodeNotHandled`): fall through to the next provider.
-- `NewInternalAuthError(message, cause)` (`AuthErrorCodeInternal`): transport/system failure. (HTTP 500)
+- `ErrNoCredentials`: no credentials were present or recognized by any provider.
+- `ErrInvalidCredential`: at least one provider processed the credentials but rejected them.
+- `ErrNotHandled`: instructs the manager to fall through to the next provider without affecting aggregate error reporting.
 
-Errors propagate immediately to the caller unless they are classified as `not_handled` / `no_credentials` / `invalid_credential` and can be aggregated by the manager.
+Return custom errors to surface transport failures; they propagate immediately to the caller instead of being masked.
 
 ## Integration with cliproxy Service
 
-`sdk/cliproxy` wires `@sdk/access` automatically when you build a CLI service via `cliproxy.NewBuilder`. Supplying a manager lets you reuse the same instance in your host process:
+`sdk/cliproxy` wires `@sdk/access` automatically when you build a CLI service via `cliproxy.NewBuilder`. Supplying a preconfigured manager allows you to extend or override the default providers:
 
 ```go
 coreCfg, _ := config.LoadConfig("config.yaml")
-accessManager := sdkaccess.NewManager()
+providers, _ := sdkaccess.BuildProviders(coreCfg)
+manager := sdkaccess.NewManager()
+manager.SetProviders(providers)
 
 svc, _ := cliproxy.NewBuilder().
   WithConfig(coreCfg).
-  WithConfigPath("config.yaml").
-  WithRequestAccessManager(accessManager).
+  WithAccessManager(manager).
   Build()
 ```
 
-Register any custom providers (typically via blank imports) before calling `Build()` so they are present in the global registry snapshot.
+The service reuses the manager for every inbound request, ensuring consistent authentication across embedded deployments and the canonical CLI binary.
 
-### Hot reloading
+### Hot reloading providers
 
-When configuration changes, refresh any config-backed providers and then reset the manager's provider chain:
+When configuration changes, rebuild providers and swap them into the manager:
 
 ```go
-// configaccess is github.com/router-for-me/CLIProxyAPI/v6/internal/access/config_access
-configaccess.Register(&newCfg.SDKConfig)
-accessManager.SetProviders(sdkaccess.RegisteredProviders())
+providers, err := sdkaccess.BuildProviders(newCfg)
+if err != nil {
+    log.Errorf("reload auth providers failed: %v", err)
+    return
+}
+accessManager.SetProviders(providers)
 ```
 
-This mirrors the behaviour in `internal/access.ApplyAccessProviders`, enabling runtime updates without restarting the process.
+This mirrors the behaviour in `cliproxy.Service.refreshAccessProviders` and `api.Server.applyAccessConfig`, enabling runtime updates without restarting the process.
diff --git a/docs/sdk-access_CN.md b/docs/sdk-access_CN.md
index 38aafe119f..b3f2649708 100644
--- a/docs/sdk-access_CN.md
+++ b/docs/sdk-access_CN.md
@@ -7,71 +7,80 @@
 ```go
 import (
     sdkaccess "github.com/router-for-me/CLIProxyAPI/v6/sdk/access"
+    "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 )
 ```
 
 通过 `go get github.com/router-for-me/CLIProxyAPI/v6/sdk/access` 添加依赖。
 
-## Provider Registry
-
-访问提供者是全局注册，然后以快照形式挂到 `Manager` 上：
-
-- `RegisterProvider(type, provider)` 注册一个已经初始化好的 provider 实例。
-- 每个 `type` 第一次出现时会记录其注册顺序。
-- `RegisteredProviders()` 会按该顺序返回 provider 列表。
-
 ## 管理器生命周期
 
 ```go
 manager := sdkaccess.NewManager()
-manager.SetProviders(sdkaccess.RegisteredProviders())
+providers, err := sdkaccess.BuildProviders(cfg)
+if err != nil {
+    return err
+}
+manager.SetProviders(providers)
 ```
 
 - `NewManager` 创建空管理器。
 - `SetProviders` 替换提供者切片并做防御性拷贝。
 - `Providers` 返回适合并发读取的快照。
-
-如果管理器本身为 `nil` 或未配置任何 provider，调用会返回 `nil, nil`，可视为关闭访问控制。
+- `BuildProviders` 将 `config.Config` 中的访问配置转换成可运行的提供者。当配置没有显式声明但包含顶层 `api-keys` 时，会自动挂载内建的 `config-api-key` 提供者。
 
 ## 认证请求
 
 ```go
-result, authErr := manager.Authenticate(ctx, req)
+result, err := manager.Authenticate(ctx, req)
 switch {
-case authErr == nil:
+case err == nil:
     // Authentication succeeded; result carries provider and principal.
-case sdkaccess.IsAuthErrorCode(authErr, sdkaccess.AuthErrorCodeNoCredentials):
+case errors.Is(err, sdkaccess.ErrNoCredentials):
     // No recognizable credentials were supplied.
-case sdkaccess.IsAuthErrorCode(authErr, sdkaccess.AuthErrorCodeInvalidCredential):
+case errors.Is(err, sdkaccess.ErrInvalidCredential):
     // Credentials were present but rejected.
 default:
     // Provider surfaced a transport-level failure.
 }
 ```
 
-`Manager.Authenticate` 会按顺序遍历 provider：遇到成功立即返回，`AuthErrorCodeNotHandled` 会继续尝试下一个；`AuthErrorCodeNoCredentials` / `AuthErrorCodeInvalidCredential` 会在遍历结束后汇总给调用方。
+`Manager.Authenticate` 按配置顺序遍历提供者。遇到成功立即返回，`ErrNotHandled` 会继续尝试下一个；若发现 `ErrNoCredentials` 或 `ErrInvalidCredential`，会在遍历结束后汇总给调用方。
 
-`Result` 提供认证提供者标识、解析出的主体以及可选元数据（例如凭证来源）。
-
-## 内建 `config-api-key` Provider
+若管理器本身为 `nil` 或尚未注册提供者，调用会返回 `nil, nil`，让调用方无需针对错误做额外分支即可关闭访问控制。
 
-代理内置一个访问提供者：
+`Result` 提供认证提供者标识、解析出的主体以及可选元数据（例如凭证来源）。
 
-- `config-api-key`：校验 `config.yaml` 顶层的 `api-keys`。
-  - 凭证来源：`Authorization: Bearer`、`X-Goog-Api-Key`、`X-Api-Key`、`?key=`、`?auth_token=`
-  - 元数据：`Result.Metadata["source"]` 会写入匹配到的来源标识
+## 配置结构
 
-在 CLI 服务端与 `sdk/cliproxy` 中，该 provider 会根据加载到的配置自动注册。
+在 `config.yaml` 的 `auth.providers` 下定义访问提供者：
 
 ```yaml
-api-keys:
-  - sk-test-123
-  - sk-prod-456
+auth:
+  providers:
+    - name: inline-api
+      type: config-api-key
+      api-keys:
+        - sk-test-123
+        - sk-prod-456
 ```
 
-## 引入外部 Go 模块提供者
+条目映射到 `config.AccessProvider`：`name` 指定实例名，`type` 选择注册的工厂，`sdk` 可引用第三方模块，`api-keys` 提供内联凭证，`config` 用于传递特定选项。
+
+### 引入外部 SDK 提供者
 
-若要消费其它 Go 模块输出的访问提供者，直接用空白标识符导入以触发其 `init` 注册即可：
+若要消费其它 Go 模块输出的访问提供者，可在配置里填写 `sdk` 字段并在代码中引入该包，利用其 `init` 注册过程：
+
+```yaml
+auth:
+  providers:
+    - name: partner-auth
+      type: partner-token
+      sdk: github.com/acme/xplatform/sdk/access/providers/partner
+      config:
+        region: us-west-2
+        audience: cli-proxy
+```
 
 ```go
 import (
@@ -80,11 +89,19 @@ import (
 )
 ```
 
-空白导入可确保 `init` 先执行，从而在你调用 `RegisteredProviders()`（或 `cliproxy.NewBuilder().Build()`）之前完成 `sdkaccess.RegisterProvider`。
+通过空白标识符导入即可确保 `init` 调用，先于 `BuildProviders` 完成 `sdkaccess.RegisterProvider`。
+
+## 内建提供者
+
+当前 SDK 默认内置：
+
+- `config-api-key`：校验配置中的 API Key。它从 `Authorization: Bearer`、`X-Goog-Api-Key`、`X-Api-Key` 以及查询参数 `?key=` 提取凭证，不匹配时抛出 `ErrInvalidCredential`。
+
+导入第三方包即可通过 `sdkaccess.RegisterProvider` 注册更多类型。
 
 ### 元数据与审计
 
-`Result.Metadata` 用于携带提供者特定的上下文信息。内建的 `config-api-key` 会记录凭证来源（`authorization`、`x-goog-api-key`、`x-api-key`、`query-key`、`query-auth-token`）。自定义提供者同样可以填充该 Map，以便丰富日志与审计场景。
+`Result.Metadata` 用于携带提供者特定的上下文信息。内建的 `config-api-key` 会记录凭证来源（`authorization`、`x-goog-api-key`、`x-api-key` 或 `query-key`）。自定义提供者同样可以填充该 Map，以便丰富日志与审计场景。
 
 ## 编写自定义提供者
 
@@ -93,13 +110,13 @@ type customProvider struct{}
 
 func (p *customProvider) Identifier() string { return "my-provider" }
 
-func (p *customProvider) Authenticate(ctx context.Context, r *http.Request) (*sdkaccess.Result, *sdkaccess.AuthError) {
+func (p *customProvider) Authenticate(ctx context.Context, r *http.Request) (*sdkaccess.Result, error) {
     token := r.Header.Get("X-Custom")
     if token == "" {
-        return nil, sdkaccess.NewNotHandledError()
+        return nil, sdkaccess.ErrNoCredentials
     }
     if token != "expected" {
-        return nil, sdkaccess.NewInvalidCredentialError()
+        return nil, sdkaccess.ErrInvalidCredential
     }
     return &sdkaccess.Result{
         Provider:  p.Identifier(),
@@ -109,46 +126,51 @@ func (p *customProvider) Authenticate(ctx context.Context, r *http.Request) (*sd
 }
 
 func init() {
-    sdkaccess.RegisterProvider("custom", &customProvider{})
+    sdkaccess.RegisterProvider("custom", func(cfg *config.AccessProvider, root *config.Config) (sdkaccess.Provider, error) {
+        return &customProvider{}, nil
+    })
 }
 ```
 
-自定义提供者需要实现 `Identifier()` 与 `Authenticate()`。在 `init` 中用已初始化实例调用 `RegisterProvider` 注册到全局 registry。
+自定义提供者需要实现 `Identifier()` 与 `Authenticate()`。在 `init` 中调用 `RegisterProvider` 暴露给配置层，工厂函数既能读取当前条目，也能访问完整根配置。
 
 ## 错误语义
 
-- `NewNoCredentialsError()`（`AuthErrorCodeNoCredentials`）：未提供或未识别到凭证。（HTTP 401）
-- `NewInvalidCredentialError()`（`AuthErrorCodeInvalidCredential`）：凭证存在但校验失败。（HTTP 401）
-- `NewNotHandledError()`（`AuthErrorCodeNotHandled`）：告诉管理器跳到下一个 provider。
-- `NewInternalAuthError(message, cause)`（`AuthErrorCodeInternal`）：网络/系统错误。（HTTP 500）
+- `ErrNoCredentials`：任何提供者都未识别到凭证。
+- `ErrInvalidCredential`：至少一个提供者处理了凭证但判定无效。
+- `ErrNotHandled`：告诉管理器跳到下一个提供者，不影响最终错误统计。
 
-除可汇总的 `not_handled` / `no_credentials` / `invalid_credential` 外，其它错误会立即冒泡返回。
+自定义错误（例如网络异常）会马上冒泡返回。
 
 ## 与 cliproxy 集成
 
-使用 `sdk/cliproxy` 构建服务时会自动接入 `@sdk/access`。如果希望在宿主进程里复用同一个 `Manager` 实例，可传入自定义管理器：
+使用 `sdk/cliproxy` 构建服务时会自动接入 `@sdk/access`。如果需要扩展内置行为，可传入自定义管理器：
 
 ```go
 coreCfg, _ := config.LoadConfig("config.yaml")
-accessManager := sdkaccess.NewManager()
+providers, _ := sdkaccess.BuildProviders(coreCfg)
+manager := sdkaccess.NewManager()
+manager.SetProviders(providers)
 
 svc, _ := cliproxy.NewBuilder().
   WithConfig(coreCfg).
-  WithConfigPath("config.yaml").
-  WithRequestAccessManager(accessManager).
+  WithAccessManager(manager).
   Build()
 ```
 
-请在调用 `Build()` 之前完成自定义 provider 的注册（通常通过空白导入触发 `init`），以确保它们被包含在全局 registry 的快照中。
+服务会复用该管理器处理每一个入站请求，实现与 CLI 二进制一致的访问控制体验。
 
 ### 动态热更新提供者
 
-当配置发生变化时，刷新依赖配置的 provider，然后重置 manager 的 provider 链：
+当配置发生变化时，可以重新构建提供者并替换当前列表：
 
 ```go
-// configaccess is github.com/router-for-me/CLIProxyAPI/v6/internal/access/config_access
-configaccess.Register(&newCfg.SDKConfig)
-accessManager.SetProviders(sdkaccess.RegisteredProviders())
+providers, err := sdkaccess.BuildProviders(newCfg)
+if err != nil {
+    log.Errorf("reload auth providers failed: %v", err)
+    return
+}
+accessManager.SetProviders(providers)
 ```
 
-这一流程与 `internal/access.ApplyAccessProviders` 保持一致，避免为更新访问策略而重启进程。
+这一流程与 `cliproxy.Service.refreshAccessProviders` 和 `api.Server.applyAccessConfig` 保持一致，避免为更新访问策略而重启进程。
diff --git a/examples/custom-provider/main.go b/examples/custom-provider/main.go
index 7c611f9eb3..2f530d7c82 100644
--- a/examples/custom-provider/main.go
+++ b/examples/custom-provider/main.go
@@ -159,13 +159,13 @@ func (MyExecutor) CountTokens(context.Context, *coreauth.Auth, clipexec.Request,
 	return clipexec.Response{}, errors.New("count tokens not implemented")
 }
 
-func (MyExecutor) ExecuteStream(ctx context.Context, a *coreauth.Auth, req clipexec.Request, opts clipexec.Options) (*clipexec.StreamResult, error) {
+func (MyExecutor) ExecuteStream(ctx context.Context, a *coreauth.Auth, req clipexec.Request, opts clipexec.Options) (<-chan clipexec.StreamChunk, error) {
 	ch := make(chan clipexec.StreamChunk, 1)
 	go func() {
 		defer close(ch)
 		ch <- clipexec.StreamChunk{Payload: []byte("data: {\"ok\":true}\n\n")}
 	}()
-	return &clipexec.StreamResult{Chunks: ch}, nil
+	return ch, nil
 }
 
 func (MyExecutor) Refresh(ctx context.Context, a *coreauth.Auth) (*coreauth.Auth, error) {
diff --git a/examples/http-request/main.go b/examples/http-request/main.go
index a667a9ca0c..4daee547ff 100644
--- a/examples/http-request/main.go
+++ b/examples/http-request/main.go
@@ -58,7 +58,7 @@ func (EchoExecutor) Execute(context.Context, *coreauth.Auth, clipexec.Request, c
 	return clipexec.Response{}, errors.New("echo executor: Execute not implemented")
 }
 
-func (EchoExecutor) ExecuteStream(context.Context, *coreauth.Auth, clipexec.Request, clipexec.Options) (*clipexec.StreamResult, error) {
+func (EchoExecutor) ExecuteStream(context.Context, *coreauth.Auth, clipexec.Request, clipexec.Options) (<-chan clipexec.StreamChunk, error) {
 	return nil, errors.New("echo executor: ExecuteStream not implemented")
 }
 
diff --git a/go.mod b/go.mod
index 972646c818..b2d9f93548 100644
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
 module github.com/router-for-me/CLIProxyAPI/v6
 
-go 1.26.0
+go 1.24.2
 
 require (
 	github.com/andybalholm/brotli v1.0.6
@@ -22,6 +22,7 @@ require (
 	github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c
 	github.com/refraction-networking/utls v1.8.2
 	github.com/sirupsen/logrus v1.9.3
+	github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966
 	github.com/stretchr/testify v1.11.1
 	github.com/tidwall/gjson v1.18.0
 	github.com/tidwall/sjson v1.2.5
diff --git a/go.sum b/go.sum
index 8fe0c12d13..e81182cae2 100644
--- a/go.sum
+++ b/go.sum
@@ -182,6 +182,8 @@ github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw=
 github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
 github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
+github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966 h1:JIAuq3EEf9cgbU6AtGPK4CTG3Zf6CKMNqf0MHTggAUA=
+github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
diff --git a/internal/access/config_access/provider.go b/internal/access/config_access/provider.go
index 84e8abcb0e..70824524b2 100644
--- a/internal/access/config_access/provider.go
+++ b/internal/access/config_access/provider.go
@@ -4,28 +4,19 @@ import (
 	"context"
 	"net/http"
 	"strings"
+	"sync"
 
 	sdkaccess "github.com/router-for-me/CLIProxyAPI/v6/sdk/access"
 	sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
 )
 
-// Register ensures the config-access provider is available to the access manager.
-func Register(cfg *sdkconfig.SDKConfig) {
-	if cfg == nil {
-		sdkaccess.UnregisterProvider(sdkaccess.AccessProviderTypeConfigAPIKey)
-		return
-	}
+var registerOnce sync.Once
 
-	keys := normalizeKeys(cfg.APIKeys)
-	if len(keys) == 0 {
-		sdkaccess.UnregisterProvider(sdkaccess.AccessProviderTypeConfigAPIKey)
-		return
-	}
-
-	sdkaccess.RegisterProvider(
-		sdkaccess.AccessProviderTypeConfigAPIKey,
-		newProvider(sdkaccess.DefaultAccessProviderName, keys),
-	)
+// Register ensures the config-access provider is available to the access manager.
+func Register() {
+	registerOnce.Do(func() {
+		sdkaccess.RegisterProvider(sdkconfig.AccessProviderTypeConfigAPIKey, newProvider)
+	})
 }
 
 type provider struct {
@@ -33,31 +24,34 @@ type provider struct {
 	keys map[string]struct{}
 }
 
-func newProvider(name string, keys []string) *provider {
-	providerName := strings.TrimSpace(name)
-	if providerName == "" {
-		providerName = sdkaccess.DefaultAccessProviderName
-	}
-	keySet := make(map[string]struct{}, len(keys))
-	for _, key := range keys {
-		keySet[key] = struct{}{}
+func newProvider(cfg *sdkconfig.AccessProvider, _ *sdkconfig.SDKConfig) (sdkaccess.Provider, error) {
+	name := cfg.Name
+	if name == "" {
+		name = sdkconfig.DefaultAccessProviderName
+	}
+	keys := make(map[string]struct{}, len(cfg.APIKeys))
+	for _, key := range cfg.APIKeys {
+		if key == "" {
+			continue
+		}
+		keys[key] = struct{}{}
 	}
-	return &provider{name: providerName, keys: keySet}
+	return &provider{name: name, keys: keys}, nil
 }
 
 func (p *provider) Identifier() string {
 	if p == nil || p.name == "" {
-		return sdkaccess.DefaultAccessProviderName
+		return sdkconfig.DefaultAccessProviderName
 	}
 	return p.name
 }
 
-func (p *provider) Authenticate(_ context.Context, r *http.Request) (*sdkaccess.Result, *sdkaccess.AuthError) {
+func (p *provider) Authenticate(_ context.Context, r *http.Request) (*sdkaccess.Result, error) {
 	if p == nil {
-		return nil, sdkaccess.NewNotHandledError()
+		return nil, sdkaccess.ErrNotHandled
 	}
 	if len(p.keys) == 0 {
-		return nil, sdkaccess.NewNotHandledError()
+		return nil, sdkaccess.ErrNotHandled
 	}
 	authHeader := r.Header.Get("Authorization")
 	authHeaderGoogle := r.Header.Get("X-Goog-Api-Key")
@@ -69,7 +63,7 @@ func (p *provider) Authenticate(_ context.Context, r *http.Request) (*sdkaccess.
 		queryAuthToken = r.URL.Query().Get("auth_token")
 	}
 	if authHeader == "" && authHeaderGoogle == "" && authHeaderAnthropic == "" && queryKey == "" && queryAuthToken == "" {
-		return nil, sdkaccess.NewNoCredentialsError()
+		return nil, sdkaccess.ErrNoCredentials
 	}
 
 	apiKey := extractBearerToken(authHeader)
@@ -100,7 +94,7 @@ func (p *provider) Authenticate(_ context.Context, r *http.Request) (*sdkaccess.
 		}
 	}
 
-	return nil, sdkaccess.NewInvalidCredentialError()
+	return nil, sdkaccess.ErrInvalidCredential
 }
 
 func extractBearerToken(header string) string {
@@ -116,26 +110,3 @@ func extractBearerToken(header string) string {
 	}
 	return strings.TrimSpace(parts[1])
 }
-
-func normalizeKeys(keys []string) []string {
-	if len(keys) == 0 {
-		return nil
-	}
-	normalized := make([]string, 0, len(keys))
-	seen := make(map[string]struct{}, len(keys))
-	for _, key := range keys {
-		trimmedKey := strings.TrimSpace(key)
-		if trimmedKey == "" {
-			continue
-		}
-		if _, exists := seen[trimmedKey]; exists {
-			continue
-		}
-		seen[trimmedKey] = struct{}{}
-		normalized = append(normalized, trimmedKey)
-	}
-	if len(normalized) == 0 {
-		return nil
-	}
-	return normalized
-}
diff --git a/internal/access/reconcile.go b/internal/access/reconcile.go
index 36601f9998..267d2fe0f5 100644
--- a/internal/access/reconcile.go
+++ b/internal/access/reconcile.go
@@ -6,9 +6,9 @@ import (
 	"sort"
 	"strings"
 
-	configaccess "github.com/router-for-me/CLIProxyAPI/v6/internal/access/config_access"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	sdkaccess "github.com/router-for-me/CLIProxyAPI/v6/sdk/access"
+	sdkConfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
 	log "github.com/sirupsen/logrus"
 )
 
@@ -17,26 +17,26 @@ import (
 // ordered provider slice along with the identifiers of providers that were added, updated, or
 // removed compared to the previous configuration.
 func ReconcileProviders(oldCfg, newCfg *config.Config, existing []sdkaccess.Provider) (result []sdkaccess.Provider, added, updated, removed []string, err error) {
-	_ = oldCfg
 	if newCfg == nil {
 		return nil, nil, nil, nil, nil
 	}
 
-	result = sdkaccess.RegisteredProviders()
-
 	existingMap := make(map[string]sdkaccess.Provider, len(existing))
 	for _, provider := range existing {
-		providerID := identifierFromProvider(provider)
-		if providerID == "" {
+		if provider == nil {
 			continue
 		}
-		existingMap[providerID] = provider
+		existingMap[provider.Identifier()] = provider
 	}
 
-	finalIDs := make(map[string]struct{}, len(result))
+	oldCfgMap := accessProviderMap(oldCfg)
+	newEntries := collectProviderEntries(newCfg)
+
+	result = make([]sdkaccess.Provider, 0, len(newEntries))
+	finalIDs := make(map[string]struct{}, len(newEntries))
 
 	isInlineProvider := func(id string) bool {
-		return strings.EqualFold(id, sdkaccess.DefaultAccessProviderName)
+		return strings.EqualFold(id, sdkConfig.DefaultAccessProviderName)
 	}
 	appendChange := func(list *[]string, id string) {
 		if isInlineProvider(id) {
@@ -45,28 +45,85 @@ func ReconcileProviders(oldCfg, newCfg *config.Config, existing []sdkaccess.Prov
 		*list = append(*list, id)
 	}
 
-	for _, provider := range result {
-		providerID := identifierFromProvider(provider)
-		if providerID == "" {
+	for _, providerCfg := range newEntries {
+		key := providerIdentifier(providerCfg)
+		if key == "" {
 			continue
 		}
-		finalIDs[providerID] = struct{}{}
 
-		existingProvider, exists := existingMap[providerID]
-		if !exists {
-			appendChange(&added, providerID)
-			continue
+		forceRebuild := strings.EqualFold(strings.TrimSpace(providerCfg.Type), sdkConfig.AccessProviderTypeConfigAPIKey)
+		if oldCfgProvider, ok := oldCfgMap[key]; ok {
+			isAliased := oldCfgProvider == providerCfg
+			if !forceRebuild && !isAliased && providerConfigEqual(oldCfgProvider, providerCfg) {
+				if existingProvider, okExisting := existingMap[key]; okExisting {
+					result = append(result, existingProvider)
+					finalIDs[key] = struct{}{}
+					continue
+				}
+			}
 		}
-		if !providerInstanceEqual(existingProvider, provider) {
-			appendChange(&updated, providerID)
+
+		provider, buildErr := sdkaccess.BuildProvider(providerCfg, &newCfg.SDKConfig)
+		if buildErr != nil {
+			return nil, nil, nil, nil, buildErr
 		}
+		if _, ok := oldCfgMap[key]; ok {
+			if _, existed := existingMap[key]; existed {
+				appendChange(&updated, key)
+			} else {
+				appendChange(&added, key)
+			}
+		} else {
+			appendChange(&added, key)
+		}
+		result = append(result, provider)
+		finalIDs[key] = struct{}{}
 	}
 
-	for providerID := range existingMap {
-		if _, exists := finalIDs[providerID]; exists {
-			continue
+	if len(result) == 0 {
+		if inline := sdkConfig.MakeInlineAPIKeyProvider(newCfg.APIKeys); inline != nil {
+			key := providerIdentifier(inline)
+			if key != "" {
+				if oldCfgProvider, ok := oldCfgMap[key]; ok {
+					if providerConfigEqual(oldCfgProvider, inline) {
+						if existingProvider, okExisting := existingMap[key]; okExisting {
+							result = append(result, existingProvider)
+							finalIDs[key] = struct{}{}
+							goto inlineDone
+						}
+					}
+				}
+				provider, buildErr := sdkaccess.BuildProvider(inline, &newCfg.SDKConfig)
+				if buildErr != nil {
+					return nil, nil, nil, nil, buildErr
+				}
+				if _, existed := existingMap[key]; existed {
+					appendChange(&updated, key)
+				} else if _, hadOld := oldCfgMap[key]; hadOld {
+					appendChange(&updated, key)
+				} else {
+					appendChange(&added, key)
+				}
+				result = append(result, provider)
+				finalIDs[key] = struct{}{}
+			}
 		}
-		appendChange(&removed, providerID)
+	inlineDone:
+	}
+
+	removedSet := make(map[string]struct{})
+	for id := range existingMap {
+		if _, ok := finalIDs[id]; !ok {
+			if isInlineProvider(id) {
+				continue
+			}
+			removedSet[id] = struct{}{}
+		}
+	}
+
+	removed = make([]string, 0, len(removedSet))
+	for id := range removedSet {
+		removed = append(removed, id)
 	}
 
 	sort.Strings(added)
@@ -85,7 +142,6 @@ func ApplyAccessProviders(manager *sdkaccess.Manager, oldCfg, newCfg *config.Con
 	}
 
 	existing := manager.Providers()
-	configaccess.Register(&newCfg.SDKConfig)
 	providers, added, updated, removed, err := ReconcileProviders(oldCfg, newCfg, existing)
 	if err != nil {
 		log.Errorf("failed to reconcile request auth providers: %v", err)
@@ -104,24 +160,111 @@ func ApplyAccessProviders(manager *sdkaccess.Manager, oldCfg, newCfg *config.Con
 	return false, nil
 }
 
-func identifierFromProvider(provider sdkaccess.Provider) string {
+func accessProviderMap(cfg *config.Config) map[string]*sdkConfig.AccessProvider {
+	result := make(map[string]*sdkConfig.AccessProvider)
+	if cfg == nil {
+		return result
+	}
+	for i := range cfg.Access.Providers {
+		providerCfg := &cfg.Access.Providers[i]
+		if providerCfg.Type == "" {
+			continue
+		}
+		key := providerIdentifier(providerCfg)
+		if key == "" {
+			continue
+		}
+		result[key] = providerCfg
+	}
+	if len(result) == 0 && len(cfg.APIKeys) > 0 {
+		if provider := sdkConfig.MakeInlineAPIKeyProvider(cfg.APIKeys); provider != nil {
+			if key := providerIdentifier(provider); key != "" {
+				result[key] = provider
+			}
+		}
+	}
+	return result
+}
+
+func collectProviderEntries(cfg *config.Config) []*sdkConfig.AccessProvider {
+	entries := make([]*sdkConfig.AccessProvider, 0, len(cfg.Access.Providers))
+	for i := range cfg.Access.Providers {
+		providerCfg := &cfg.Access.Providers[i]
+		if providerCfg.Type == "" {
+			continue
+		}
+		if key := providerIdentifier(providerCfg); key != "" {
+			entries = append(entries, providerCfg)
+		}
+	}
+	if len(entries) == 0 && len(cfg.APIKeys) > 0 {
+		if inline := sdkConfig.MakeInlineAPIKeyProvider(cfg.APIKeys); inline != nil {
+			entries = append(entries, inline)
+		}
+	}
+	return entries
+}
+
+func providerIdentifier(provider *sdkConfig.AccessProvider) string {
 	if provider == nil {
 		return ""
 	}
-	return strings.TrimSpace(provider.Identifier())
+	if name := strings.TrimSpace(provider.Name); name != "" {
+		return name
+	}
+	typ := strings.TrimSpace(provider.Type)
+	if typ == "" {
+		return ""
+	}
+	if strings.EqualFold(typ, sdkConfig.AccessProviderTypeConfigAPIKey) {
+		return sdkConfig.DefaultAccessProviderName
+	}
+	return typ
 }
 
-func providerInstanceEqual(a, b sdkaccess.Provider) bool {
+func providerConfigEqual(a, b *sdkConfig.AccessProvider) bool {
 	if a == nil || b == nil {
 		return a == nil && b == nil
 	}
-	if reflect.TypeOf(a) != reflect.TypeOf(b) {
+	if !strings.EqualFold(strings.TrimSpace(a.Type), strings.TrimSpace(b.Type)) {
+		return false
+	}
+	if strings.TrimSpace(a.SDK) != strings.TrimSpace(b.SDK) {
+		return false
+	}
+	if !stringSetEqual(a.APIKeys, b.APIKeys) {
 		return false
 	}
-	valueA := reflect.ValueOf(a)
-	valueB := reflect.ValueOf(b)
-	if valueA.Kind() == reflect.Pointer && valueB.Kind() == reflect.Pointer {
-		return valueA.Pointer() == valueB.Pointer()
+	if len(a.Config) != len(b.Config) {
+		return false
+	}
+	if len(a.Config) > 0 && !reflect.DeepEqual(a.Config, b.Config) {
+		return false
+	}
+	return true
+}
+
+func stringSetEqual(a, b []string) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	if len(a) == 0 {
+		return true
+	}
+	seen := make(map[string]int, len(a))
+	for _, val := range a {
+		seen[val]++
+	}
+	for _, val := range b {
+		count := seen[val]
+		if count == 0 {
+			return false
+		}
+		if count == 1 {
+			delete(seen, val)
+		} else {
+			seen[val] = count - 1
+		}
 	}
-	return reflect.DeepEqual(a, b)
+	return len(seen) == 0
 }
diff --git a/internal/api/handlers/management/api_tools.go b/internal/api/handlers/management/api_tools.go
index 48774343e9..c7846a7599 100644
--- a/internal/api/handlers/management/api_tools.go
+++ b/internal/api/handlers/management/api_tools.go
@@ -1,7 +1,6 @@
 package management
 
 import (
-	"bytes"
 	"context"
 	"encoding/json"
 	"fmt"
@@ -9,37 +8,24 @@ import (
 	"net"
 	"net/http"
 	"net/url"
-	"os"
 	"strings"
 	"time"
 
-	"github.com/fxamacker/cbor/v2"
 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	log "github.com/sirupsen/logrus"
 	"golang.org/x/net/proxy"
 	"golang.org/x/oauth2"
 	"golang.org/x/oauth2/google"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
-	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 )
 
 const defaultAPICallTimeout = 60 * time.Second
 
-// OAuth credentials should be loaded from environment variables or config, not hardcoded
-// Placeholder values - replace with env var lookups in production
-var geminiOAuthClientID = os.Getenv("GEMINI_OAUTH_CLIENT_ID")
-var geminiOAuthClientSecret = os.Getenv("GEMINI_OAUTH_CLIENT_SECRET")
-
-func init() {
-	// Allow env override for OAuth credentials
-	if geminiOAuthClientID == "" {
-		geminiOAuthClientID = "PLACEHOLDER_SET_FROM_CONFIG"
-	}
-	if geminiOAuthClientSecret == "" {
-		geminiOAuthClientSecret = "PLACEHOLDER_SET_FROM_CONFIG"
-	}
-}
+const (
+	geminiOAuthClientID     = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
+	geminiOAuthClientSecret = "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"
+)
 
 var geminiOAuthScopes = []string{
 	"https://www.googleapis.com/auth/cloud-platform",
@@ -47,9 +33,10 @@ var geminiOAuthScopes = []string{
 	"https://www.googleapis.com/auth/userinfo.profile",
 }
 
-// OAuth credentials loaded from environment variables - never hardcode
-var antigravityOAuthClientID = os.Getenv("ANTIGRAVITY_OAUTH_CLIENT_ID")
-var antigravityOAuthClientSecret = os.Getenv("ANTIGRAVITY_OAUTH_CLIENT_SECRET")
+const (
+	antigravityOAuthClientID     = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
+	antigravityOAuthClientSecret = "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"
+)
 
 var antigravityOAuthTokenURL = "https://oauth2.googleapis.com/token"
 
@@ -67,7 +54,6 @@ type apiCallResponse struct {
 	StatusCode int                 `json:"status_code"`
 	Header     map[string][]string `json:"header"`
 	Body       string              `json:"body"`
-	Quota      *QuotaSnapshots     `json:"quota,omitempty"`
 }
 
 // APICall makes a generic HTTP request on behalf of the management API caller.
@@ -84,7 +70,7 @@ type apiCallResponse struct {
 //	- Authorization: Bearer <key>
 //	- X-Management-Key: <key>
 //
-// Request JSON (supports both application/json and application/cbor):
+// Request JSON:
 //   - auth_index / authIndex / AuthIndex (optional):
 //     The credential "auth_index" from GET /v0/management/auth-files (or other endpoints returning it).
 //     If omitted or not found, credential-specific proxy/token substitution is skipped.
@@ -104,14 +90,10 @@ type apiCallResponse struct {
 //  2. Global config proxy-url
 //  3. Direct connect (environment proxies are not used)
 //
-// Response (returned with HTTP 200 when the APICall itself succeeds):
-//
-//	Format matches request Content-Type (application/json or application/cbor)
-//	- status_code: Upstream HTTP status code.
-//	- header: Upstream response headers.
-//	- body: Upstream response body as string.
-//	- quota (optional): For GitHub Copilot enterprise accounts, contains quota_snapshots
-//	  with details for chat, completions, and premium_interactions.
+// Response JSON (returned with HTTP 200 when the APICall itself succeeds):
+//   - status_code: Upstream HTTP status code.
+//   - header: Upstream response headers.
+//   - body: Upstream response body as string.
 //
 // Example:
 //
@@ -125,28 +107,10 @@ type apiCallResponse struct {
 //	  -H "Content-Type: application/json" \
 //	  -d '{"auth_index":"<AUTH_INDEX>","method":"POST","url":"https://api.example.com/v1/fetchAvailableModels","header":{"Authorization":"Bearer $TOKEN$","Content-Type":"application/json","User-Agent":"cliproxyapi"},"data":"{}"}'
 func (h *Handler) APICall(c *gin.Context) {
-	// Detect content type
-	contentType := strings.ToLower(strings.TrimSpace(c.GetHeader("Content-Type")))
-	isCBOR := strings.Contains(contentType, "application/cbor")
-
 	var body apiCallRequest
-
-	// Parse request body based on content type
-	if isCBOR {
-		rawBody, errRead := io.ReadAll(c.Request.Body)
-		if errRead != nil {
-			c.JSON(http.StatusBadRequest, gin.H{"error": "failed to read request body"})
-			return
-		}
-		if errUnmarshal := cbor.Unmarshal(rawBody, &body); errUnmarshal != nil {
-			c.JSON(http.StatusBadRequest, gin.H{"error": "invalid cbor body"})
-			return
-		}
-	} else {
-		if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil {
-			c.JSON(http.StatusBadRequest, gin.H{"error": "invalid body"})
-			return
-		}
+	if errBindJSON := c.ShouldBindJSON(&body); errBindJSON != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid body"})
+		return
 	}
 
 	method := strings.ToUpper(strings.TrimSpace(body.Method))
@@ -200,21 +164,9 @@ func (h *Handler) APICall(c *gin.Context) {
 		reqHeaders[key] = strings.ReplaceAll(value, "$TOKEN$", token)
 	}
 
-	// When caller indicates CBOR in request headers, convert JSON string payload to CBOR bytes.
-	useCBORPayload := headerContainsValue(reqHeaders, "Content-Type", "application/cbor")
-
 	var requestBody io.Reader
 	if body.Data != "" {
-		if useCBORPayload {
-			cborPayload, errEncode := encodeJSONStringToCBOR(body.Data)
-			if errEncode != nil {
-				c.JSON(http.StatusBadRequest, gin.H{"error": "invalid json data for cbor content-type"})
-				return
-			}
-			requestBody = bytes.NewReader(cborPayload)
-		} else {
-			requestBody = strings.NewReader(body.Data)
-		}
+		requestBody = strings.NewReader(body.Data)
 	}
 
 	req, errNewRequest := http.NewRequestWithContext(c.Request.Context(), method, urlStr, requestBody)
@@ -257,38 +209,11 @@ func (h *Handler) APICall(c *gin.Context) {
 		return
 	}
 
-	// For CBOR upstream responses, decode into plain text or JSON string before returning.
-	responseBodyText := string(respBody)
-	if headerContainsValue(reqHeaders, "Accept", "application/cbor") || strings.Contains(strings.ToLower(resp.Header.Get("Content-Type")), "application/cbor") {
-		if decodedBody, errDecode := decodeCBORBodyToTextOrJSON(respBody); errDecode == nil {
-			responseBodyText = decodedBody
-		}
-	}
-
-	response := apiCallResponse{
+	c.JSON(http.StatusOK, apiCallResponse{
 		StatusCode: resp.StatusCode,
 		Header:     resp.Header,
-		Body:       responseBodyText,
-	}
-
-	// If this is a GitHub Copilot token endpoint response, try to enrich with quota information
-	if resp.StatusCode == http.StatusOK &&
-		strings.Contains(urlStr, "copilot_internal") &&
-		strings.Contains(urlStr, "/token") {
-		response = h.enrichCopilotTokenResponse(c.Request.Context(), response, auth, urlStr)
-	}
-
-	// Return response in the same format as the request
-	if isCBOR {
-		cborData, errMarshal := cbor.Marshal(response)
-		if errMarshal != nil {
-			c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to encode cbor response"})
-			return
-		}
-		c.Data(http.StatusOK, "application/cbor", cborData)
-	} else {
-		c.JSON(http.StatusOK, response)
-	}
+		Body:       string(respBody),
+	})
 }
 
 func firstNonEmptyString(values ...*string) string {
@@ -777,421 +702,3 @@ func buildProxyTransport(proxyStr string) *http.Transport {
 	log.Debugf("unsupported proxy scheme: %s", proxyURL.Scheme)
 	return nil
 }
-
-// headerContainsValue checks whether a header map contains a target value (case-insensitive key and value).
-func headerContainsValue(headers map[string]string, targetKey, targetValue string) bool {
-	if len(headers) == 0 {
-		return false
-	}
-	for key, value := range headers {
-		if !strings.EqualFold(strings.TrimSpace(key), strings.TrimSpace(targetKey)) {
-			continue
-		}
-		if strings.Contains(strings.ToLower(value), strings.ToLower(strings.TrimSpace(targetValue))) {
-			return true
-		}
-	}
-	return false
-}
-
-// encodeJSONStringToCBOR converts a JSON string payload into CBOR bytes.
-func encodeJSONStringToCBOR(jsonString string) ([]byte, error) {
-	var payload any
-	if errUnmarshal := json.Unmarshal([]byte(jsonString), &payload); errUnmarshal != nil {
-		return nil, errUnmarshal
-	}
-	return cbor.Marshal(payload)
-}
-
-// decodeCBORBodyToTextOrJSON decodes CBOR bytes to plain text (for string payloads) or JSON string.
-func decodeCBORBodyToTextOrJSON(raw []byte) (string, error) {
-	if len(raw) == 0 {
-		return "", nil
-	}
-
-	var payload any
-	if errUnmarshal := cbor.Unmarshal(raw, &payload); errUnmarshal != nil {
-		return "", errUnmarshal
-	}
-
-	jsonCompatible := cborValueToJSONCompatible(payload)
-	switch typed := jsonCompatible.(type) {
-	case string:
-		return typed, nil
-	case []byte:
-		return string(typed), nil
-	default:
-		jsonBytes, errMarshal := json.Marshal(jsonCompatible)
-		if errMarshal != nil {
-			return "", errMarshal
-		}
-		return string(jsonBytes), nil
-	}
-}
-
-// cborValueToJSONCompatible recursively converts CBOR-decoded values into JSON-marshalable values.
-func cborValueToJSONCompatible(value any) any {
-	switch typed := value.(type) {
-	case map[any]any:
-		out := make(map[string]any, len(typed))
-		for key, item := range typed {
-			out[fmt.Sprint(key)] = cborValueToJSONCompatible(item)
-		}
-		return out
-	case map[string]any:
-		out := make(map[string]any, len(typed))
-		for key, item := range typed {
-			out[key] = cborValueToJSONCompatible(item)
-		}
-		return out
-	case []any:
-		out := make([]any, len(typed))
-		for i, item := range typed {
-			out[i] = cborValueToJSONCompatible(item)
-		}
-		return out
-	default:
-		return typed
-	}
-}
-
-// QuotaDetail represents quota information for a specific resource type
-type QuotaDetail struct {
-	Entitlement      float64 `json:"entitlement"`
-	OverageCount     float64 `json:"overage_count"`
-	OveragePermitted bool    `json:"overage_permitted"`
-	PercentRemaining float64 `json:"percent_remaining"`
-	QuotaID          string  `json:"quota_id"`
-	QuotaRemaining   float64 `json:"quota_remaining"`
-	Remaining        float64 `json:"remaining"`
-	Unlimited        bool    `json:"unlimited"`
-}
-
-// QuotaSnapshots contains quota details for different resource types
-type QuotaSnapshots struct {
-	Chat                QuotaDetail `json:"chat"`
-	Completions         QuotaDetail `json:"completions"`
-	PremiumInteractions QuotaDetail `json:"premium_interactions"`
-}
-
-// CopilotUsageResponse represents the GitHub Copilot usage information
-type CopilotUsageResponse struct {
-	AccessTypeSKU         string         `json:"access_type_sku"`
-	AnalyticsTrackingID   string         `json:"analytics_tracking_id"`
-	AssignedDate          string         `json:"assigned_date"`
-	CanSignupForLimited   bool           `json:"can_signup_for_limited"`
-	ChatEnabled           bool           `json:"chat_enabled"`
-	CopilotPlan           string         `json:"copilot_plan"`
-	OrganizationLoginList []interface{}  `json:"organization_login_list"`
-	OrganizationList      []interface{}  `json:"organization_list"`
-	QuotaResetDate        string         `json:"quota_reset_date"`
-	QuotaSnapshots        QuotaSnapshots `json:"quota_snapshots"`
-}
-
-type copilotQuotaRequest struct {
-	AuthIndexSnake  *string `json:"auth_index"`
-	AuthIndexCamel  *string `json:"authIndex"`
-	AuthIndexPascal *string `json:"AuthIndex"`
-}
-
-// GetCopilotQuota fetches GitHub Copilot quota information from the /copilot_internal/user endpoint.
-//
-// Endpoint:
-//
-//	GET /v0/management/copilot-quota
-//
-// Query Parameters (optional):
-//   - auth_index: The credential "auth_index" from GET /v0/management/auth-files.
-//     If omitted, uses the first available GitHub Copilot credential.
-//
-// Response:
-//
-//	Returns the CopilotUsageResponse with quota_snapshots containing detailed quota information
-//	for chat, completions, and premium_interactions.
-//
-// Example:
-//
-//	curl -sS -X GET "http://127.0.0.1:8317/v0/management/copilot-quota?auth_index=<AUTH_INDEX>" \
-//	  -H "Authorization: Bearer <MANAGEMENT_KEY>"
-func (h *Handler) GetCopilotQuota(c *gin.Context) {
-	authIndex := strings.TrimSpace(c.Query("auth_index"))
-	if authIndex == "" {
-		authIndex = strings.TrimSpace(c.Query("authIndex"))
-	}
-	if authIndex == "" {
-		authIndex = strings.TrimSpace(c.Query("AuthIndex"))
-	}
-
-	auth := h.findCopilotAuth(authIndex)
-	if auth == nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "no github copilot credential found"})
-		return
-	}
-
-	token, tokenErr := h.resolveTokenForAuth(c.Request.Context(), auth)
-	if tokenErr != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "failed to refresh copilot token"})
-		return
-	}
-	if token == "" {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "copilot token not found"})
-		return
-	}
-
-	apiURL := "https://api.github.com/copilot_internal/user"
-	req, errNewRequest := http.NewRequestWithContext(c.Request.Context(), http.MethodGet, apiURL, nil)
-	if errNewRequest != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to build request"})
-		return
-	}
-
-	req.Header.Set("Authorization", "Bearer "+token)
-	req.Header.Set("User-Agent", "CLIProxyAPIPlus")
-	req.Header.Set("Accept", "application/json")
-
-	httpClient := &http.Client{
-		Timeout:   defaultAPICallTimeout,
-		Transport: h.apiCallTransport(auth),
-	}
-
-	resp, errDo := httpClient.Do(req)
-	if errDo != nil {
-		log.WithError(errDo).Debug("copilot quota request failed")
-		c.JSON(http.StatusBadGateway, gin.H{"error": "request failed"})
-		return
-	}
-	defer func() {
-		if errClose := resp.Body.Close(); errClose != nil {
-			log.Errorf("response body close error: %v", errClose)
-		}
-	}()
-
-	respBody, errReadAll := io.ReadAll(resp.Body)
-	if errReadAll != nil {
-		c.JSON(http.StatusBadGateway, gin.H{"error": "failed to read response"})
-		return
-	}
-
-	if resp.StatusCode != http.StatusOK {
-		c.JSON(http.StatusBadGateway, gin.H{
-			"error":       "github api request failed",
-			"status_code": resp.StatusCode,
-			"body":        string(respBody),
-		})
-		return
-	}
-
-	var usage CopilotUsageResponse
-	if errUnmarshal := json.Unmarshal(respBody, &usage); errUnmarshal != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to parse response"})
-		return
-	}
-
-	c.JSON(http.StatusOK, usage)
-}
-
-// findCopilotAuth locates a GitHub Copilot credential by auth_index or returns the first available one
-func (h *Handler) findCopilotAuth(authIndex string) *coreauth.Auth {
-	if h == nil || h.authManager == nil {
-		return nil
-	}
-
-	auths := h.authManager.List()
-	var firstCopilot *coreauth.Auth
-
-	for _, auth := range auths {
-		if auth == nil {
-			continue
-		}
-
-		provider := strings.ToLower(strings.TrimSpace(auth.Provider))
-		if provider != "copilot" && provider != "github" && provider != "github-copilot" {
-			continue
-		}
-
-		if firstCopilot == nil {
-			firstCopilot = auth
-		}
-
-		if authIndex != "" {
-			auth.EnsureIndex()
-			if auth.Index == authIndex {
-				return auth
-			}
-		}
-	}
-
-	return firstCopilot
-}
-
-// enrichCopilotTokenResponse fetches quota information and adds it to the Copilot token response body
-func (h *Handler) enrichCopilotTokenResponse(ctx context.Context, response apiCallResponse, auth *coreauth.Auth, originalURL string) apiCallResponse {
-	if auth == nil || response.Body == "" {
-		return response
-	}
-
-	// Parse the token response to check if it's enterprise (null limited_user_quotas)
-	var tokenResp map[string]interface{}
-	if err := json.Unmarshal([]byte(response.Body), &tokenResp); err != nil {
-		log.WithError(err).Debug("enrichCopilotTokenResponse: failed to parse copilot token response")
-		return response
-	}
-
-	// Get the GitHub token to call the copilot_internal/user endpoint
-	token, tokenErr := h.resolveTokenForAuth(ctx, auth)
-	if tokenErr != nil {
-		log.WithError(tokenErr).Debug("enrichCopilotTokenResponse: failed to resolve token")
-		return response
-	}
-	if token == "" {
-		return response
-	}
-
-	// Fetch quota information from /copilot_internal/user
-	// Derive the base URL from the original token request to support proxies and test servers
-	parsedURL, errParse := url.Parse(originalURL)
-	if errParse != nil {
-		log.WithError(errParse).Debug("enrichCopilotTokenResponse: failed to parse URL")
-		return response
-	}
-	quotaURL := fmt.Sprintf("%s://%s/copilot_internal/user", parsedURL.Scheme, parsedURL.Host)
-
-	req, errNewRequest := http.NewRequestWithContext(ctx, http.MethodGet, quotaURL, nil)
-	if errNewRequest != nil {
-		log.WithError(errNewRequest).Debug("enrichCopilotTokenResponse: failed to build request")
-		return response
-	}
-
-	req.Header.Set("Authorization", "Bearer "+token)
-	req.Header.Set("User-Agent", "CLIProxyAPIPlus")
-	req.Header.Set("Accept", "application/json")
-
-	httpClient := &http.Client{
-		Timeout:   defaultAPICallTimeout,
-		Transport: h.apiCallTransport(auth),
-	}
-
-	quotaResp, errDo := httpClient.Do(req)
-	if errDo != nil {
-		log.WithError(errDo).Debug("enrichCopilotTokenResponse: quota fetch HTTP request failed")
-		return response
-	}
-
-	defer func() {
-		if errClose := quotaResp.Body.Close(); errClose != nil {
-			log.Errorf("quota response body close error: %v", errClose)
-		}
-	}()
-
-	if quotaResp.StatusCode != http.StatusOK {
-		return response
-	}
-
-	quotaBody, errReadAll := io.ReadAll(quotaResp.Body)
-	if errReadAll != nil {
-		log.WithError(errReadAll).Debug("enrichCopilotTokenResponse: failed to read response")
-		return response
-	}
-
-	// Parse the quota response
-	var quotaData CopilotUsageResponse
-	if err := json.Unmarshal(quotaBody, &quotaData); err != nil {
-		log.WithError(err).Debug("enrichCopilotTokenResponse: failed to parse response")
-		return response
-	}
-
-	// Check if this is an enterprise account by looking for quota_snapshots in the response
-	// Enterprise accounts have quota_snapshots, non-enterprise have limited_user_quotas
-	var quotaRaw map[string]interface{}
-	if err := json.Unmarshal(quotaBody, &quotaRaw); err == nil {
-		if _, hasQuotaSnapshots := quotaRaw["quota_snapshots"]; hasQuotaSnapshots {
-			// Enterprise account - has quota_snapshots
-			tokenResp["quota_snapshots"] = quotaData.QuotaSnapshots
-			tokenResp["access_type_sku"] = quotaData.AccessTypeSKU
-			tokenResp["copilot_plan"] = quotaData.CopilotPlan
-
-			// Add quota reset date for enterprise (quota_reset_date_utc)
-			if quotaResetDateUTC, ok := quotaRaw["quota_reset_date_utc"]; ok {
-				tokenResp["quota_reset_date"] = quotaResetDateUTC
-			} else if quotaData.QuotaResetDate != "" {
-				tokenResp["quota_reset_date"] = quotaData.QuotaResetDate
-			}
-		} else {
-			// Non-enterprise account - build quota from limited_user_quotas and monthly_quotas
-			var quotaSnapshots QuotaSnapshots
-
-			// Get monthly quotas (total entitlement) and limited_user_quotas (remaining)
-			monthlyQuotas, hasMonthly := quotaRaw["monthly_quotas"].(map[string]interface{})
-			limitedQuotas, hasLimited := quotaRaw["limited_user_quotas"].(map[string]interface{})
-
-			// Process chat quota
-			if hasMonthly && hasLimited {
-				if chatTotal, ok := monthlyQuotas["chat"].(float64); ok {
-					chatRemaining := chatTotal // default to full if no limited quota
-					if chatLimited, ok := limitedQuotas["chat"].(float64); ok {
-						chatRemaining = chatLimited
-					}
-					percentRemaining := 0.0
-					if chatTotal > 0 {
-						percentRemaining = (chatRemaining / chatTotal) * 100.0
-					}
-					quotaSnapshots.Chat = QuotaDetail{
-						Entitlement:      chatTotal,
-						Remaining:        chatRemaining,
-						QuotaRemaining:   chatRemaining,
-						PercentRemaining: percentRemaining,
-						QuotaID:          "chat",
-						Unlimited:        false,
-					}
-				}
-
-				// Process completions quota
-				if completionsTotal, ok := monthlyQuotas["completions"].(float64); ok {
-					completionsRemaining := completionsTotal // default to full if no limited quota
-					if completionsLimited, ok := limitedQuotas["completions"].(float64); ok {
-						completionsRemaining = completionsLimited
-					}
-					percentRemaining := 0.0
-					if completionsTotal > 0 {
-						percentRemaining = (completionsRemaining / completionsTotal) * 100.0
-					}
-					quotaSnapshots.Completions = QuotaDetail{
-						Entitlement:      completionsTotal,
-						Remaining:        completionsRemaining,
-						QuotaRemaining:   completionsRemaining,
-						PercentRemaining: percentRemaining,
-						QuotaID:          "completions",
-						Unlimited:        false,
-					}
-				}
-			}
-
-			// Premium interactions don't exist for non-enterprise, leave as zero values
-			quotaSnapshots.PremiumInteractions = QuotaDetail{
-				QuotaID:   "premium_interactions",
-				Unlimited: false,
-			}
-
-			// Add quota_snapshots to the token response
-			tokenResp["quota_snapshots"] = quotaSnapshots
-			tokenResp["access_type_sku"] = quotaData.AccessTypeSKU
-			tokenResp["copilot_plan"] = quotaData.CopilotPlan
-
-			// Add quota reset date for non-enterprise (limited_user_reset_date)
-			if limitedResetDate, ok := quotaRaw["limited_user_reset_date"]; ok {
-				tokenResp["quota_reset_date"] = limitedResetDate
-			}
-		}
-	}
-
-	// Re-serialize the enriched response
-	enrichedBody, errMarshal := json.Marshal(tokenResp)
-	if errMarshal != nil {
-		log.WithError(errMarshal).Debug("failed to marshal enriched response")
-		return response
-	}
-
-	response.Body = string(enrichedBody)
-
-	return response
-}
diff --git a/internal/api/handlers/management/auth_files.go b/internal/api/handlers/management/auth_files.go
index bd1338a279..996ea1a778 100644
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -3,9 +3,7 @@ package management
 import (
 	"bytes"
 	"context"
-	"crypto/rand"
 	"crypto/sha256"
-	"encoding/base64"
 	"encoding/hex"
 	"encoding/json"
 	"errors"
@@ -13,7 +11,6 @@ import (
 	"io"
 	"net"
 	"net/http"
-	"net/url"
 	"os"
 	"path/filepath"
 	"sort"
@@ -26,12 +23,8 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/antigravity"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
 	geminiAuth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/gemini"
 	iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kilo"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kimi"
-	kiroauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kiro"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
@@ -814,87 +807,6 @@ func (h *Handler) PatchAuthFileStatus(c *gin.Context) {
 	c.JSON(http.StatusOK, gin.H{"status": "ok", "disabled": *req.Disabled})
 }
 
-// PatchAuthFileFields updates editable fields (prefix, proxy_url, priority) of an auth file.
-func (h *Handler) PatchAuthFileFields(c *gin.Context) {
-	if h.authManager == nil {
-		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "core auth manager unavailable"})
-		return
-	}
-
-	var req struct {
-		Name     string  `json:"name"`
-		Prefix   *string `json:"prefix"`
-		ProxyURL *string `json:"proxy_url"`
-		Priority *int    `json:"priority"`
-	}
-	if err := c.ShouldBindJSON(&req); err != nil {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
-		return
-	}
-
-	name := strings.TrimSpace(req.Name)
-	if name == "" {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "name is required"})
-		return
-	}
-
-	ctx := c.Request.Context()
-
-	// Find auth by name or ID
-	var targetAuth *coreauth.Auth
-	if auth, ok := h.authManager.GetByID(name); ok {
-		targetAuth = auth
-	} else {
-		auths := h.authManager.List()
-		for _, auth := range auths {
-			if auth.FileName == name {
-				targetAuth = auth
-				break
-			}
-		}
-	}
-
-	if targetAuth == nil {
-		c.JSON(http.StatusNotFound, gin.H{"error": "auth file not found"})
-		return
-	}
-
-	changed := false
-	if req.Prefix != nil {
-		targetAuth.Prefix = *req.Prefix
-		changed = true
-	}
-	if req.ProxyURL != nil {
-		targetAuth.ProxyURL = *req.ProxyURL
-		changed = true
-	}
-	if req.Priority != nil {
-		if targetAuth.Metadata == nil {
-			targetAuth.Metadata = make(map[string]any)
-		}
-		if *req.Priority == 0 {
-			delete(targetAuth.Metadata, "priority")
-		} else {
-			targetAuth.Metadata["priority"] = *req.Priority
-		}
-		changed = true
-	}
-
-	if !changed {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "no fields to update"})
-		return
-	}
-
-	targetAuth.UpdatedAt = time.Now()
-
-	if _, err := h.authManager.Update(ctx, targetAuth); err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to update auth: %v", err)})
-		return
-	}
-
-	c.JSON(http.StatusOK, gin.H{"status": "ok"})
-}
-
 func (h *Handler) disableAuth(ctx context.Context, id string) {
 	if h == nil || h.authManager == nil {
 		return
@@ -1275,30 +1187,6 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 			}
 			ts.ProjectID = strings.Join(projects, ",")
 			ts.Checked = true
-		} else if strings.EqualFold(requestedProjectID, "GOOGLE_ONE") {
-			ts.Auto = false
-			if errSetup := performGeminiCLISetup(ctx, gemClient, &ts, ""); errSetup != nil {
-				log.Errorf("Google One auto-discovery failed: %v", errSetup)
-				SetOAuthSessionError(state, "Google One auto-discovery failed")
-				return
-			}
-			if strings.TrimSpace(ts.ProjectID) == "" {
-				log.Error("Google One auto-discovery returned empty project ID")
-				SetOAuthSessionError(state, "Google One auto-discovery returned empty project ID")
-				return
-			}
-			isChecked, errCheck := checkCloudAPIIsEnabled(ctx, gemClient, ts.ProjectID)
-			if errCheck != nil {
-				log.Errorf("Failed to verify Cloud AI API status: %v", errCheck)
-				SetOAuthSessionError(state, "Failed to verify Cloud AI API status")
-				return
-			}
-			ts.Checked = isChecked
-			if !isChecked {
-				log.Error("Cloud AI API is not enabled for the auto-discovered project")
-				SetOAuthSessionError(state, "Cloud AI API not enabled")
-				return
-			}
 		} else {
 			if errEnsure := ensureGeminiProjectAndOnboard(ctx, gemClient, &ts, requestedProjectID); errEnsure != nil {
 				log.Errorf("Failed to complete Gemini CLI onboarding: %v", errEnsure)
@@ -1720,82 +1608,6 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
 	c.JSON(200, gin.H{"status": "ok", "url": authURL, "state": state})
 }
 
-func (h *Handler) RequestKimiToken(c *gin.Context) {
-	ctx := context.Background()
-
-	fmt.Println("Initializing Kimi authentication...")
-
-	state := fmt.Sprintf("kmi-%d", time.Now().UnixNano())
-	// Initialize Kimi auth service
-	kimiAuth := kimi.NewKimiAuth(h.cfg)
-
-	// Generate authorization URL
-	deviceFlow, errStartDeviceFlow := kimiAuth.StartDeviceFlow(ctx)
-	if errStartDeviceFlow != nil {
-		log.Errorf("Failed to generate authorization URL: %v", errStartDeviceFlow)
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate authorization url"})
-		return
-	}
-	authURL := deviceFlow.VerificationURIComplete
-	if authURL == "" {
-		authURL = deviceFlow.VerificationURI
-	}
-
-	RegisterOAuthSession(state, "kimi")
-
-	go func() {
-		fmt.Println("Waiting for authentication...")
-		authBundle, errWaitForAuthorization := kimiAuth.WaitForAuthorization(ctx, deviceFlow)
-		if errWaitForAuthorization != nil {
-			SetOAuthSessionError(state, "Authentication failed")
-			fmt.Printf("Authentication failed: %v\n", errWaitForAuthorization)
-			return
-		}
-
-		// Create token storage
-		tokenStorage := kimiAuth.CreateTokenStorage(authBundle)
-
-		metadata := map[string]any{
-			"type":          "kimi",
-			"access_token":  authBundle.TokenData.AccessToken,
-			"refresh_token": authBundle.TokenData.RefreshToken,
-			"token_type":    authBundle.TokenData.TokenType,
-			"scope":         authBundle.TokenData.Scope,
-			"timestamp":     time.Now().UnixMilli(),
-		}
-		if authBundle.TokenData.ExpiresAt > 0 {
-			expired := time.Unix(authBundle.TokenData.ExpiresAt, 0).UTC().Format(time.RFC3339)
-			metadata["expired"] = expired
-		}
-		if strings.TrimSpace(authBundle.DeviceID) != "" {
-			metadata["device_id"] = strings.TrimSpace(authBundle.DeviceID)
-		}
-
-		fileName := fmt.Sprintf("kimi-%d.json", time.Now().UnixMilli())
-		record := &coreauth.Auth{
-			ID:       fileName,
-			Provider: "kimi",
-			FileName: fileName,
-			Label:    "Kimi User",
-			Storage:  tokenStorage,
-			Metadata: metadata,
-		}
-		savedPath, errSave := h.saveTokenRecord(ctx, record)
-		if errSave != nil {
-			log.Errorf("Failed to save authentication tokens: %v", errSave)
-			SetOAuthSessionError(state, "Failed to save authentication tokens")
-			return
-		}
-
-		fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
-		fmt.Println("You can now use Kimi services through this CLI")
-		CompleteOAuthSession(state)
-		CompleteOAuthSessionsByProvider("kimi")
-	}()
-
-	c.JSON(200, gin.H{"status": "ok", "url": authURL, "state": state})
-}
-
 func (h *Handler) RequestIFlowToken(c *gin.Context) {
 	ctx := context.Background()
 
@@ -1909,89 +1721,6 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 	c.JSON(http.StatusOK, gin.H{"status": "ok", "url": authURL, "state": state})
 }
 
-func (h *Handler) RequestGitHubToken(c *gin.Context) {
-	ctx := context.Background()
-
-	fmt.Println("Initializing GitHub Copilot authentication...")
-
-	state := fmt.Sprintf("gh-%d", time.Now().UnixNano())
-
-	// Initialize Copilot auth service
-	// We need to import "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot" first if not present
-	// Assuming copilot package is imported as "copilot"
-	deviceClient := copilot.NewDeviceFlowClient(h.cfg)
-
-	// Initiate device flow
-	deviceCode, err := deviceClient.RequestDeviceCode(ctx)
-	if err != nil {
-		log.Errorf("Failed to initiate device flow: %v", err)
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to initiate device flow"})
-		return
-	}
-
-	authURL := deviceCode.VerificationURI
-	userCode := deviceCode.UserCode
-
-	RegisterOAuthSession(state, "github")
-
-	go func() {
-		fmt.Printf("Please visit %s and enter code: %s\n", authURL, userCode)
-
-		tokenData, errPoll := deviceClient.PollForToken(ctx, deviceCode)
-		if errPoll != nil {
-			SetOAuthSessionError(state, "Authentication failed")
-			fmt.Printf("Authentication failed: %v\n", errPoll)
-			return
-		}
-
-		username, errUser := deviceClient.FetchUserInfo(ctx, tokenData.AccessToken)
-		if errUser != nil {
-			log.Warnf("Failed to fetch user info: %v", errUser)
-			username = "github-user"
-		}
-
-		tokenStorage := &copilot.CopilotTokenStorage{
-			AccessToken: tokenData.AccessToken,
-			TokenType:   tokenData.TokenType,
-			Scope:       tokenData.Scope,
-			Username:    username,
-			Type:        "github-copilot",
-		}
-
-		fileName := fmt.Sprintf("github-%s.json", username)
-		record := &coreauth.Auth{
-			ID:       fileName,
-			Provider: "github",
-			FileName: fileName,
-			Storage:  tokenStorage,
-			Metadata: map[string]any{
-				"email":    username,
-				"username": username,
-			},
-		}
-
-		savedPath, errSave := h.saveTokenRecord(ctx, record)
-		if errSave != nil {
-			log.Errorf("Failed to save authentication tokens: %v", errSave)
-			SetOAuthSessionError(state, "Failed to save authentication tokens")
-			return
-		}
-
-		fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
-		fmt.Println("You can now use GitHub Copilot services through this CLI")
-		CompleteOAuthSession(state)
-		CompleteOAuthSessionsByProvider("github")
-	}()
-
-	c.JSON(200, gin.H{
-		"status":           "ok",
-		"url":              authURL,
-		"state":            state,
-		"user_code":        userCode,
-		"verification_uri": authURL,
-	})
-}
-
 func (h *Handler) RequestIFlowCookieToken(c *gin.Context) {
 	ctx := context.Background()
 
@@ -2230,48 +1959,7 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
 		}
 	}
 	if projectID == "" {
-		// Auto-discovery: try onboardUser without specifying a project
-		// to let Google auto-provision one (matches Gemini CLI headless behavior
-		// and Antigravity's FetchProjectID pattern).
-		autoOnboardReq := map[string]any{
-			"tierId":   tierID,
-			"metadata": metadata,
-		}
-
-		autoCtx, autoCancel := context.WithTimeout(ctx, 30*time.Second)
-		defer autoCancel()
-		for attempt := 1; ; attempt++ {
-			var onboardResp map[string]any
-			if errOnboard := callGeminiCLI(autoCtx, httpClient, "onboardUser", autoOnboardReq, &onboardResp); errOnboard != nil {
-				return fmt.Errorf("auto-discovery onboardUser: %w", errOnboard)
-			}
-
-			if done, okDone := onboardResp["done"].(bool); okDone && done {
-				if resp, okResp := onboardResp["response"].(map[string]any); okResp {
-					switch v := resp["cloudaicompanionProject"].(type) {
-					case string:
-						projectID = strings.TrimSpace(v)
-					case map[string]any:
-						if id, okID := v["id"].(string); okID {
-							projectID = strings.TrimSpace(id)
-						}
-					}
-				}
-				break
-			}
-
-			log.Debugf("Auto-discovery: onboarding in progress, attempt %d...", attempt)
-			select {
-			case <-autoCtx.Done():
-				return &projectSelectionRequiredError{}
-			case <-time.After(2 * time.Second):
-			}
-		}
-
-		if projectID == "" {
-			return &projectSelectionRequiredError{}
-		}
-		log.Infof("Auto-discovered project ID via onboarding: %s", projectID)
+		return &projectSelectionRequiredError{}
 	}
 
 	onboardReqBody := map[string]any{
@@ -2496,407 +2184,8 @@ func (h *Handler) GetAuthStatus(c *gin.Context) {
 		return
 	}
 	if status != "" {
-		if strings.HasPrefix(status, "device_code|") {
-			parts := strings.SplitN(status, "|", 3)
-			if len(parts) == 3 {
-				c.JSON(http.StatusOK, gin.H{
-					"status":           "device_code",
-					"verification_url": parts[1],
-					"user_code":        parts[2],
-				})
-				return
-			}
-		}
-		if strings.HasPrefix(status, "auth_url|") {
-			authURL := strings.TrimPrefix(status, "auth_url|")
-			c.JSON(http.StatusOK, gin.H{
-				"status": "auth_url",
-				"url":    authURL,
-			})
-			return
-		}
 		c.JSON(http.StatusOK, gin.H{"status": "error", "error": status})
 		return
 	}
 	c.JSON(http.StatusOK, gin.H{"status": "wait"})
 }
-
-const kiroCallbackPort = 9876
-
-func (h *Handler) RequestKiroToken(c *gin.Context) {
-	ctx := context.Background()
-
-	// Get the login method from query parameter (default: aws for device code flow)
-	method := strings.ToLower(strings.TrimSpace(c.Query("method")))
-	if method == "" {
-		method = "aws"
-	}
-
-	fmt.Println("Initializing Kiro authentication...")
-
-	state := fmt.Sprintf("kiro-%d", time.Now().UnixNano())
-
-	switch method {
-	case "aws", "builder-id":
-		RegisterOAuthSession(state, "kiro")
-
-		// AWS Builder ID uses device code flow (no callback needed)
-		go func() {
-			ssoClient := kiroauth.NewSSOOIDCClient(h.cfg)
-
-			// Step 1: Register client
-			fmt.Println("Registering client...")
-			regResp, errRegister := ssoClient.RegisterClient(ctx)
-			if errRegister != nil {
-				log.Errorf("Failed to register client: %v", errRegister)
-				SetOAuthSessionError(state, "Failed to register client")
-				return
-			}
-
-			// Step 2: Start device authorization
-			fmt.Println("Starting device authorization...")
-			authResp, errAuth := ssoClient.StartDeviceAuthorization(ctx, regResp.ClientID, regResp.ClientSecret)
-			if errAuth != nil {
-				log.Errorf("Failed to start device auth: %v", errAuth)
-				SetOAuthSessionError(state, "Failed to start device authorization")
-				return
-			}
-
-			// Store the verification URL for the frontend to display.
-			// Using "|" as separator because URLs contain ":".
-			SetOAuthSessionError(state, "device_code|"+authResp.VerificationURIComplete+"|"+authResp.UserCode)
-
-			// Step 3: Poll for token
-			fmt.Println("Waiting for authorization...")
-			interval := 5 * time.Second
-			if authResp.Interval > 0 {
-				interval = time.Duration(authResp.Interval) * time.Second
-			}
-			deadline := time.Now().Add(time.Duration(authResp.ExpiresIn) * time.Second)
-
-			for time.Now().Before(deadline) {
-				select {
-				case <-ctx.Done():
-					SetOAuthSessionError(state, "Authorization cancelled")
-					return
-				case <-time.After(interval):
-					tokenResp, errToken := ssoClient.CreateToken(ctx, regResp.ClientID, regResp.ClientSecret, authResp.DeviceCode)
-					if errToken != nil {
-						errStr := errToken.Error()
-						if strings.Contains(errStr, "authorization_pending") {
-							continue
-						}
-						if strings.Contains(errStr, "slow_down") {
-							interval += 5 * time.Second
-							continue
-						}
-						log.Errorf("Token creation failed: %v", errToken)
-						SetOAuthSessionError(state, "Token creation failed")
-						return
-					}
-
-					// Success! Save the token
-					expiresAt := time.Now().Add(time.Duration(tokenResp.ExpiresIn) * time.Second)
-					email := kiroauth.ExtractEmailFromJWT(tokenResp.AccessToken)
-
-					idPart := kiroauth.SanitizeEmailForFilename(email)
-					if idPart == "" {
-						idPart = fmt.Sprintf("%d", time.Now().UnixNano()%100000)
-					}
-
-					now := time.Now()
-					fileName := fmt.Sprintf("kiro-aws-%s.json", idPart)
-
-					record := &coreauth.Auth{
-						ID:       fileName,
-						Provider: "kiro",
-						FileName: fileName,
-						Metadata: map[string]any{
-							"type":          "kiro",
-							"access_token":  tokenResp.AccessToken,
-							"refresh_token": tokenResp.RefreshToken,
-							"expires_at":    expiresAt.Format(time.RFC3339),
-							"auth_method":   "builder-id",
-							"provider":      "AWS",
-							"client_id":     regResp.ClientID,
-							"client_secret": regResp.ClientSecret,
-							"email":         email,
-							"last_refresh":  now.Format(time.RFC3339),
-						},
-					}
-
-					savedPath, errSave := h.saveTokenRecord(ctx, record)
-					if errSave != nil {
-						log.Errorf("Failed to save authentication tokens: %v", errSave)
-						SetOAuthSessionError(state, "Failed to save authentication tokens")
-						return
-					}
-
-					fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
-					if email != "" {
-						fmt.Printf("Authenticated as: %s\n", email)
-					}
-					CompleteOAuthSession(state)
-					return
-				}
-			}
-
-			SetOAuthSessionError(state, "Authorization timed out")
-		}()
-
-		// Return immediately with the state for polling
-		c.JSON(http.StatusOK, gin.H{"status": "ok", "state": state, "method": "device_code"})
-
-	case "google", "github":
-		RegisterOAuthSession(state, "kiro")
-
-		// Social auth uses protocol handler - for WEB UI we use a callback forwarder
-		provider := "Google"
-		if method == "github" {
-			provider = "Github"
-		}
-
-		isWebUI := isWebUIRequest(c)
-		if isWebUI {
-			targetURL, errTarget := h.managementCallbackURL("/kiro/callback")
-			if errTarget != nil {
-				log.WithError(errTarget).Error("failed to compute kiro callback target")
-				c.JSON(http.StatusInternalServerError, gin.H{"error": "callback server unavailable"})
-				return
-			}
-			if _, errStart := startCallbackForwarder(kiroCallbackPort, "kiro", targetURL); errStart != nil {
-				log.WithError(errStart).Error("failed to start kiro callback forwarder")
-				c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to start callback server"})
-				return
-			}
-		}
-
-		go func() {
-			if isWebUI {
-				defer stopCallbackForwarder(kiroCallbackPort)
-			}
-
-			socialClient := kiroauth.NewSocialAuthClient(h.cfg)
-
-			// Generate PKCE codes
-			codeVerifier, codeChallenge, errPKCE := generateKiroPKCE()
-			if errPKCE != nil {
-				log.Errorf("Failed to generate PKCE: %v", errPKCE)
-				SetOAuthSessionError(state, "Failed to generate PKCE")
-				return
-			}
-
-			// Build login URL
-			authURL := fmt.Sprintf("%s/login?idp=%s&redirect_uri=%s&code_challenge=%s&code_challenge_method=S256&state=%s&prompt=select_account",
-				"https://prod.us-east-1.auth.desktop.kiro.dev",
-				provider,
-				url.QueryEscape(kiroauth.KiroRedirectURI),
-				codeChallenge,
-				state,
-			)
-
-			// Store auth URL for frontend.
-			// Using "|" as separator because URLs contain ":".
-			SetOAuthSessionError(state, "auth_url|"+authURL)
-
-			// Wait for callback file
-			waitFile := filepath.Join(h.cfg.AuthDir, fmt.Sprintf(".oauth-kiro-%s.oauth", state))
-			deadline := time.Now().Add(5 * time.Minute)
-
-			for {
-				if time.Now().After(deadline) {
-					log.Error("oauth flow timed out")
-					SetOAuthSessionError(state, "OAuth flow timed out")
-					return
-				}
-				if data, errRead := os.ReadFile(waitFile); errRead == nil {
-					var m map[string]string
-					_ = json.Unmarshal(data, &m)
-					_ = os.Remove(waitFile)
-					if errStr := m["error"]; errStr != "" {
-						log.Errorf("Authentication failed: %s", errStr)
-						SetOAuthSessionError(state, "Authentication failed")
-						return
-					}
-					if m["state"] != state {
-						log.Errorf("State mismatch")
-						SetOAuthSessionError(state, "State mismatch")
-						return
-					}
-					code := m["code"]
-					if code == "" {
-						log.Error("No authorization code received")
-						SetOAuthSessionError(state, "No authorization code received")
-						return
-					}
-
-					// Exchange code for tokens
-					tokenReq := &kiroauth.CreateTokenRequest{
-						Code:         code,
-						CodeVerifier: codeVerifier,
-						RedirectURI:  kiroauth.KiroRedirectURI,
-					}
-
-					tokenResp, errToken := socialClient.CreateToken(ctx, tokenReq)
-					if errToken != nil {
-						log.Errorf("Failed to exchange code for tokens: %v", errToken)
-						SetOAuthSessionError(state, "Failed to exchange code for tokens")
-						return
-					}
-
-					// Save the token
-					expiresIn := tokenResp.ExpiresIn
-					if expiresIn <= 0 {
-						expiresIn = 3600
-					}
-					expiresAt := time.Now().Add(time.Duration(expiresIn) * time.Second)
-					email := kiroauth.ExtractEmailFromJWT(tokenResp.AccessToken)
-
-					idPart := kiroauth.SanitizeEmailForFilename(email)
-					if idPart == "" {
-						idPart = fmt.Sprintf("%d", time.Now().UnixNano()%100000)
-					}
-
-					now := time.Now()
-					fileName := fmt.Sprintf("kiro-%s-%s.json", strings.ToLower(provider), idPart)
-
-					record := &coreauth.Auth{
-						ID:       fileName,
-						Provider: "kiro",
-						FileName: fileName,
-						Metadata: map[string]any{
-							"type":          "kiro",
-							"access_token":  tokenResp.AccessToken,
-							"refresh_token": tokenResp.RefreshToken,
-							"profile_arn":   tokenResp.ProfileArn,
-							"expires_at":    expiresAt.Format(time.RFC3339),
-							"auth_method":   "social",
-							"provider":      provider,
-							"email":         email,
-							"last_refresh":  now.Format(time.RFC3339),
-						},
-					}
-
-					savedPath, errSave := h.saveTokenRecord(ctx, record)
-					if errSave != nil {
-						log.Errorf("Failed to save authentication tokens: %v", errSave)
-						SetOAuthSessionError(state, "Failed to save authentication tokens")
-						return
-					}
-
-					fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
-					if email != "" {
-						fmt.Printf("Authenticated as: %s\n", email)
-					}
-					CompleteOAuthSession(state)
-					return
-				}
-				time.Sleep(500 * time.Millisecond)
-			}
-		}()
-
-		c.JSON(http.StatusOK, gin.H{"status": "ok", "state": state, "method": "social"})
-
-	default:
-		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid method, use 'aws', 'google', or 'github'"})
-	}
-}
-
-// generateKiroPKCE generates PKCE code verifier and challenge for Kiro OAuth.
-func generateKiroPKCE() (verifier, challenge string, err error) {
-	b := make([]byte, 32)
-	if _, errRead := io.ReadFull(rand.Reader, b); errRead != nil {
-		return "", "", fmt.Errorf("failed to generate random bytes: %w", errRead)
-	}
-	verifier = base64.RawURLEncoding.EncodeToString(b)
-
-	h := sha256.Sum256([]byte(verifier))
-	challenge = base64.RawURLEncoding.EncodeToString(h[:])
-
-	return verifier, challenge, nil
-}
-
-func (h *Handler) RequestKiloToken(c *gin.Context) {
-	ctx := context.Background()
-
-	fmt.Println("Initializing Kilo authentication...")
-
-	state := fmt.Sprintf("kil-%d", time.Now().UnixNano())
-	kilocodeAuth := kilo.NewKiloAuth()
-
-	resp, err := kilocodeAuth.InitiateDeviceFlow(ctx)
-	if err != nil {
-		log.Errorf("Failed to initiate device flow: %v", err)
-		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to initiate device flow"})
-		return
-	}
-
-	RegisterOAuthSession(state, "kilo")
-
-	go func() {
-		fmt.Printf("Please visit %s and enter code: %s\n", resp.VerificationURL, resp.Code)
-
-		status, err := kilocodeAuth.PollForToken(ctx, resp.Code)
-		if err != nil {
-			SetOAuthSessionError(state, "Authentication failed")
-			fmt.Printf("Authentication failed: %v\n", err)
-			return
-		}
-
-		profile, err := kilocodeAuth.GetProfile(ctx, status.Token)
-		if err != nil {
-			log.Warnf("Failed to fetch profile: %v", err)
-			profile = &kilo.Profile{Email: status.UserEmail}
-		}
-
-		var orgID string
-		if len(profile.Orgs) > 0 {
-			orgID = profile.Orgs[0].ID
-		}
-
-		defaults, err := kilocodeAuth.GetDefaults(ctx, status.Token, orgID)
-		if err != nil {
-			defaults = &kilo.Defaults{}
-		}
-
-		ts := &kilo.KiloTokenStorage{
-			Token:          status.Token,
-			OrganizationID: orgID,
-			Model:          defaults.Model,
-			Email:          status.UserEmail,
-			Type:           "kilo",
-		}
-
-		fileName := kilo.CredentialFileName(status.UserEmail)
-		record := &coreauth.Auth{
-			ID:       fileName,
-			Provider: "kilo",
-			FileName: fileName,
-			Storage:  ts,
-			Metadata: map[string]any{
-				"email":           status.UserEmail,
-				"organization_id": orgID,
-				"model":          defaults.Model,
-			},
-		}
-
-		savedPath, errSave := h.saveTokenRecord(ctx, record)
-		if errSave != nil {
-			log.Errorf("Failed to save authentication tokens: %v", errSave)
-			SetOAuthSessionError(state, "Failed to save authentication tokens")
-			return
-		}
-
-		fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
-		CompleteOAuthSession(state)
-		CompleteOAuthSessionsByProvider("kilo")
-	}()
-
-	c.JSON(200, gin.H{
-		"status":           "ok",
-		"url":              resp.VerificationURL,
-		"state":            state,
-		"user_code":        resp.Code,
-		"verification_uri": resp.VerificationURL,
-	})
-}
diff --git a/internal/api/handlers/management/config_basic.go b/internal/api/handlers/management/config_basic.go
index 72f73d32ca..ee2d5c353f 100644
--- a/internal/api/handlers/management/config_basic.go
+++ b/internal/api/handlers/management/config_basic.go
@@ -19,8 +19,8 @@ import (
 )
 
 const (
-	latestReleaseURL       = "https://api.github.com/repos/router-for-me/CLIProxyAPIPlus/releases/latest"
-	latestReleaseUserAgent = "CLIProxyAPIPlus"
+	latestReleaseURL       = "https://api.github.com/repos/router-for-me/CLIProxyAPI/releases/latest"
+	latestReleaseUserAgent = "CLIProxyAPI"
 )
 
 func (h *Handler) GetConfig(c *gin.Context) {
@@ -28,7 +28,8 @@ func (h *Handler) GetConfig(c *gin.Context) {
 		c.JSON(200, gin.H{})
 		return
 	}
-	c.JSON(200, new(*h.cfg))
+	cfgCopy := *h.cfg
+	c.JSON(200, &cfgCopy)
 }
 
 type releaseInfo struct {
diff --git a/internal/api/handlers/management/config_lists.go b/internal/api/handlers/management/config_lists.go
index 0153a38129..4e0e02843b 100644
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -109,13 +109,14 @@ func (h *Handler) GetAPIKeys(c *gin.Context) { c.JSON(200, gin.H{"api-keys": h.c
 func (h *Handler) PutAPIKeys(c *gin.Context) {
 	h.putStringList(c, func(v []string) {
 		h.cfg.APIKeys = append([]string(nil), v...)
+		h.cfg.Access.Providers = nil
 	}, nil)
 }
 func (h *Handler) PatchAPIKeys(c *gin.Context) {
-	h.patchStringList(c, &h.cfg.APIKeys, func() {})
+	h.patchStringList(c, &h.cfg.APIKeys, func() { h.cfg.Access.Providers = nil })
 }
 func (h *Handler) DeleteAPIKeys(c *gin.Context) {
-	h.deleteFromStringList(c, &h.cfg.APIKeys, func() {})
+	h.deleteFromStringList(c, &h.cfg.APIKeys, func() { h.cfg.Access.Providers = nil })
 }
 
 // gemini-api-key: []GeminiKey
@@ -753,22 +754,18 @@ func (h *Handler) PatchOAuthModelAlias(c *gin.Context) {
 	normalizedMap := sanitizedOAuthModelAlias(map[string][]config.OAuthModelAlias{channel: body.Aliases})
 	normalized := normalizedMap[channel]
 	if len(normalized) == 0 {
-		// Only delete if channel exists, otherwise just create empty entry
-		if h.cfg.OAuthModelAlias != nil {
-			if _, ok := h.cfg.OAuthModelAlias[channel]; ok {
-				delete(h.cfg.OAuthModelAlias, channel)
-				if len(h.cfg.OAuthModelAlias) == 0 {
-					h.cfg.OAuthModelAlias = nil
-				}
-				h.persist(c)
-				return
-			}
-		}
-		// Create new channel with empty aliases
 		if h.cfg.OAuthModelAlias == nil {
-			h.cfg.OAuthModelAlias = make(map[string][]config.OAuthModelAlias)
+			c.JSON(404, gin.H{"error": "channel not found"})
+			return
+		}
+		if _, ok := h.cfg.OAuthModelAlias[channel]; !ok {
+			c.JSON(404, gin.H{"error": "channel not found"})
+			return
+		}
+		delete(h.cfg.OAuthModelAlias, channel)
+		if len(h.cfg.OAuthModelAlias) == 0 {
+			h.cfg.OAuthModelAlias = nil
 		}
-		h.cfg.OAuthModelAlias[channel] = []config.OAuthModelAlias{}
 		h.persist(c)
 		return
 	}
@@ -796,10 +793,10 @@ func (h *Handler) DeleteOAuthModelAlias(c *gin.Context) {
 		c.JSON(404, gin.H{"error": "channel not found"})
 		return
 	}
-	// Set to nil instead of deleting the key so that the "explicitly disabled"
-	// marker survives config reload and prevents SanitizeOAuthModelAlias from
-	// re-injecting default aliases (fixes #222).
-	h.cfg.OAuthModelAlias[channel] = nil
+	delete(h.cfg.OAuthModelAlias, channel)
+	if len(h.cfg.OAuthModelAlias) == 0 {
+		h.cfg.OAuthModelAlias = nil
+	}
 	h.persist(c)
 }
 
diff --git a/internal/api/handlers/management/oauth_sessions.go b/internal/api/handlers/management/oauth_sessions.go
index bc882e990e..05ff8d1f52 100644
--- a/internal/api/handlers/management/oauth_sessions.go
+++ b/internal/api/handlers/management/oauth_sessions.go
@@ -158,12 +158,7 @@ func (s *oauthSessionStore) IsPending(state, provider string) bool {
 		return false
 	}
 	if session.Status != "" {
-		if !strings.EqualFold(session.Provider, "kiro") {
-			return false
-		}
-		if !strings.HasPrefix(session.Status, "device_code|") && !strings.HasPrefix(session.Status, "auth_url|") {
-			return false
-		}
+		return false
 	}
 	if provider == "" {
 		return true
@@ -236,10 +231,6 @@ func NormalizeOAuthProvider(provider string) (string, error) {
 		return "antigravity", nil
 	case "qwen":
 		return "qwen", nil
-	case "kiro":
-		return "kiro", nil
-	case "github":
-		return "github", nil
 	default:
 		return "", errUnsupportedOAuthFlow
 	}
diff --git a/internal/api/middleware/request_logging.go b/internal/api/middleware/request_logging.go
index b57dd8aa42..2c9fdbdd04 100644
--- a/internal/api/middleware/request_logging.go
+++ b/internal/api/middleware/request_logging.go
@@ -15,12 +15,10 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 )
 
-const maxErrorOnlyCapturedRequestBodyBytes int64 = 1 << 20 // 1 MiB
-
 // RequestLoggingMiddleware creates a Gin middleware that logs HTTP requests and responses.
 // It captures detailed information about the request and response, including headers and body,
-// and uses the provided RequestLogger to record this data. When full request logging is disabled,
-// body capture is limited to small known-size payloads to avoid large per-request memory spikes.
+// and uses the provided RequestLogger to record this data. When logging is disabled in the
+// logger, it still captures data so that upstream errors can be persisted.
 func RequestLoggingMiddleware(logger logging.RequestLogger) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		if logger == nil {
@@ -28,7 +26,7 @@ func RequestLoggingMiddleware(logger logging.RequestLogger) gin.HandlerFunc {
 			return
 		}
 
-		if shouldSkipMethodForRequestLogging(c.Request) {
+		if c.Request.Method == http.MethodGet {
 			c.Next()
 			return
 		}
@@ -39,10 +37,8 @@ func RequestLoggingMiddleware(logger logging.RequestLogger) gin.HandlerFunc {
 			return
 		}
 
-		loggerEnabled := logger.IsEnabled()
-
 		// Capture request information
-		requestInfo, err := captureRequestInfo(c, shouldCaptureRequestBody(loggerEnabled, c.Request))
+		requestInfo, err := captureRequestInfo(c)
 		if err != nil {
 			// Log error but continue processing
 			// In a real implementation, you might want to use a proper logger here
@@ -52,7 +48,7 @@ func RequestLoggingMiddleware(logger logging.RequestLogger) gin.HandlerFunc {
 
 		// Create response writer wrapper
 		wrapper := NewResponseWriterWrapper(c.Writer, logger, requestInfo)
-		if !loggerEnabled {
+		if !logger.IsEnabled() {
 			wrapper.logOnErrorOnly = true
 		}
 		c.Writer = wrapper
@@ -68,47 +64,10 @@ func RequestLoggingMiddleware(logger logging.RequestLogger) gin.HandlerFunc {
 	}
 }
 
-func shouldSkipMethodForRequestLogging(req *http.Request) bool {
-	if req == nil {
-		return true
-	}
-	if req.Method != http.MethodGet {
-		return false
-	}
-	return !isResponsesWebsocketUpgrade(req)
-}
-
-func isResponsesWebsocketUpgrade(req *http.Request) bool {
-	if req == nil || req.URL == nil {
-		return false
-	}
-	if req.URL.Path != "/v1/responses" {
-		return false
-	}
-	return strings.EqualFold(strings.TrimSpace(req.Header.Get("Upgrade")), "websocket")
-}
-
-func shouldCaptureRequestBody(loggerEnabled bool, req *http.Request) bool {
-	if loggerEnabled {
-		return true
-	}
-	if req == nil || req.Body == nil {
-		return false
-	}
-	contentType := strings.ToLower(strings.TrimSpace(req.Header.Get("Content-Type")))
-	if strings.HasPrefix(contentType, "multipart/form-data") {
-		return false
-	}
-	if req.ContentLength <= 0 {
-		return false
-	}
-	return req.ContentLength <= maxErrorOnlyCapturedRequestBodyBytes
-}
-
 // captureRequestInfo extracts relevant information from the incoming HTTP request.
 // It captures the URL, method, headers, and body. The request body is read and then
 // restored so that it can be processed by subsequent handlers.
-func captureRequestInfo(c *gin.Context, captureBody bool) (*RequestInfo, error) {
+func captureRequestInfo(c *gin.Context) (*RequestInfo, error) {
 	// Capture URL with sensitive query parameters masked
 	maskedQuery := util.MaskSensitiveQuery(c.Request.URL.RawQuery)
 	url := c.Request.URL.Path
@@ -127,7 +86,7 @@ func captureRequestInfo(c *gin.Context, captureBody bool) (*RequestInfo, error)
 
 	// Capture request body
 	var body []byte
-	if captureBody && c.Request.Body != nil {
+	if c.Request.Body != nil {
 		// Read the body
 		bodyBytes, err := io.ReadAll(c.Request.Body)
 		if err != nil {
diff --git a/internal/api/middleware/response_writer.go b/internal/api/middleware/response_writer.go
index 363278ab35..50fa1c6979 100644
--- a/internal/api/middleware/response_writer.go
+++ b/internal/api/middleware/response_writer.go
@@ -14,8 +14,6 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 )
 
-const requestBodyOverrideContextKey = "REQUEST_BODY_OVERRIDE"
-
 // RequestInfo holds essential details of an incoming HTTP request for logging purposes.
 type RequestInfo struct {
 	URL       string              // URL is the request URL.
@@ -225,8 +223,8 @@ func (w *ResponseWriterWrapper) detectStreaming(contentType string) bool {
 
 	// Only fall back to request payload hints when Content-Type is not set yet.
 	if w.requestInfo != nil && len(w.requestInfo.Body) > 0 {
-		return bytes.Contains(w.requestInfo.Body, []byte(`"stream": true`)) ||
-			bytes.Contains(w.requestInfo.Body, []byte(`"stream":true`))
+		bodyStr := string(w.requestInfo.Body)
+		return strings.Contains(bodyStr, `"stream": true`) || strings.Contains(bodyStr, `"stream":true`)
 	}
 
 	return false
@@ -312,7 +310,7 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
 		return nil
 	}
 
-	return w.logRequest(w.extractRequestBody(c), finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), w.extractAPIResponseTimestamp(c), slicesAPIResponseError, forceLog)
+	return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), w.extractAPIResponseTimestamp(c), slicesAPIResponseError, forceLog)
 }
 
 func (w *ResponseWriterWrapper) cloneHeaders() map[string][]string {
@@ -363,32 +361,16 @@ func (w *ResponseWriterWrapper) extractAPIResponseTimestamp(c *gin.Context) time
 	return time.Time{}
 }
 
-func (w *ResponseWriterWrapper) extractRequestBody(c *gin.Context) []byte {
-	if c != nil {
-		if bodyOverride, isExist := c.Get(requestBodyOverrideContextKey); isExist {
-			switch value := bodyOverride.(type) {
-			case []byte:
-				if len(value) > 0 {
-					return bytes.Clone(value)
-				}
-			case string:
-				if strings.TrimSpace(value) != "" {
-					return []byte(value)
-				}
-			}
-		}
-	}
-	if w.requestInfo != nil && len(w.requestInfo.Body) > 0 {
-		return w.requestInfo.Body
-	}
-	return nil
-}
-
-func (w *ResponseWriterWrapper) logRequest(requestBody []byte, statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseTimestamp time.Time, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
+func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseTimestamp time.Time, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
 	if w.requestInfo == nil {
 		return nil
 	}
 
+	var requestBody []byte
+	if len(w.requestInfo.Body) > 0 {
+		requestBody = w.requestInfo.Body
+	}
+
 	if loggerWithOptions, ok := w.logger.(interface {
 		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string, time.Time, time.Time) error
 	}); ok {
diff --git a/internal/api/modules/amp/amp.go b/internal/api/modules/amp/amp.go
index a12733e2a1..7c243ebb91 100644
--- a/internal/api/modules/amp/amp.go
+++ b/internal/api/modules/amp/amp.go
@@ -125,9 +125,12 @@ func (m *AmpModule) Register(ctx modules.Context) error {
 	m.registerOnce.Do(func() {
 		// Initialize model mapper from config (for routing unavailable models to alternatives)
 		m.modelMapper = NewModelMapper(settings.ModelMappings)
+		// Load oauth-model-alias for provider lookup via aliases
+		m.modelMapper.UpdateOAuthModelAlias(ctx.Config.OAuthModelAlias)
 
 		// Store initial config for partial reload comparison
-		m.lastConfig = new(settings)
+		settingsCopy := settings
+		m.lastConfig = &settingsCopy
 
 		// Initialize localhost restriction setting (hot-reloadable)
 		m.setRestrictToLocalhost(settings.RestrictManagementToLocalhost)
@@ -211,6 +214,11 @@ func (m *AmpModule) OnConfigUpdated(cfg *config.Config) error {
 		}
 	}
 
+	// Always update oauth-model-alias for model mapper (used for provider lookup)
+	if m.modelMapper != nil {
+		m.modelMapper.UpdateOAuthModelAlias(cfg.OAuthModelAlias)
+	}
+
 	if m.enabled {
 		// Check upstream URL change - now supports hot-reload
 		if newUpstreamURL == "" && oldUpstreamURL != "" {
diff --git a/internal/api/modules/amp/fallback_handlers.go b/internal/api/modules/amp/fallback_handlers.go
index 7d7f7f5f28..f46af1c0f4 100644
--- a/internal/api/modules/amp/fallback_handlers.go
+++ b/internal/api/modules/amp/fallback_handlers.go
@@ -2,12 +2,15 @@ package amp
 
 import (
 	"bytes"
+	"errors"
 	"io"
+	"net/http"
 	"net/http/httputil"
 	"strings"
 	"time"
 
 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/routing/ctxkeys"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
@@ -30,7 +33,13 @@ const (
 )
 
 // MappedModelContextKey is the Gin context key for passing mapped model names.
-const MappedModelContextKey = "mapped_model"
+// Deprecated: Use ctxkeys.MappedModel instead.
+const MappedModelContextKey = string(ctxkeys.MappedModel)
+
+// FallbackModelsContextKey is the Gin context key for passing fallback model names.
+// When the primary mapped model fails (e.g., quota exceeded), these models can be tried.
+// Deprecated: Use ctxkeys.FallbackModels instead.
+const FallbackModelsContextKey = string(ctxkeys.FallbackModels)
 
 // logAmpRouting logs the routing decision for an Amp request with structured fields
 func logAmpRouting(routeType AmpRouteType, requestedModel, resolvedModel, provider, path string) {
@@ -77,6 +86,10 @@ func logAmpRouting(routeType AmpRouteType, requestedModel, resolvedModel, provid
 
 // FallbackHandler wraps a standard handler with fallback logic to ampcode.com
 // when the model's provider is not available in CLIProxyAPI
+//
+// Deprecated: FallbackHandler is deprecated in favor of routing.ModelRoutingWrapper.
+// Use routing.NewModelRoutingWrapper() instead for unified routing logic.
+// This type is kept for backward compatibility and test purposes.
 type FallbackHandler struct {
 	getProxy           func() *httputil.ReverseProxy
 	modelMapper        ModelMapper
@@ -85,6 +98,8 @@ type FallbackHandler struct {
 
 // NewFallbackHandler creates a new fallback handler wrapper
 // The getProxy function allows lazy evaluation of the proxy (useful when proxy is created after routes)
+//
+// Deprecated: Use routing.NewModelRoutingWrapper() instead.
 func NewFallbackHandler(getProxy func() *httputil.ReverseProxy) *FallbackHandler {
 	return &FallbackHandler{
 		getProxy:           getProxy,
@@ -93,6 +108,8 @@ func NewFallbackHandler(getProxy func() *httputil.ReverseProxy) *FallbackHandler
 }
 
 // NewFallbackHandlerWithMapper creates a new fallback handler with model mapping support
+//
+// Deprecated: Use routing.NewModelRoutingWrapper() instead.
 func NewFallbackHandlerWithMapper(getProxy func() *httputil.ReverseProxy, mapper ModelMapper, forceModelMappings func() bool) *FallbackHandler {
 	if forceModelMappings == nil {
 		forceModelMappings = func() bool { return false }
@@ -113,6 +130,20 @@ func (fh *FallbackHandler) SetModelMapper(mapper ModelMapper) {
 // If the model's provider is not configured in CLIProxyAPI, it forwards to ampcode.com
 func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc {
 	return func(c *gin.Context) {
+		// Swallow ErrAbortHandler panics from ReverseProxy to avoid noisy stack traces.
+		// ReverseProxy raises this panic when the client connection is closed prematurely
+		// (e.g., user cancels request, network disconnect) or when ServeHTTP is called
+		// with a ResponseWriter that doesn't implement http.CloseNotifier.
+		// This is an expected error condition, not a bug, so we handle it gracefully.
+		defer func() {
+			if rec := recover(); rec != nil {
+				if err, ok := rec.(error); ok && errors.Is(err, http.ErrAbortHandler) {
+					return
+				}
+				panic(rec)
+			}
+		}()
+
 		requestPath := c.Request.URL.Path
 
 		// Read the request body to extract the model name
@@ -142,36 +173,57 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 			thinkingSuffix = "(" + suffixResult.RawSuffix + ")"
 		}
 
-		resolveMappedModel := func() (string, []string) {
+		// resolveMappedModels returns all mapped models (primary + fallbacks) and providers for the first one.
+		resolveMappedModels := func() ([]string, []string) {
 			if fh.modelMapper == nil {
-				return "", nil
+				return nil, nil
 			}
 
-			mappedModel := fh.modelMapper.MapModel(modelName)
-			if mappedModel == "" {
-				mappedModel = fh.modelMapper.MapModel(normalizedModel)
+			mapper, ok := fh.modelMapper.(*DefaultModelMapper)
+			if !ok {
+				// Fallback to single model for non-DefaultModelMapper
+				mappedModel := fh.modelMapper.MapModel(modelName)
+				if mappedModel == "" {
+					mappedModel = fh.modelMapper.MapModel(normalizedModel)
+				}
+				if mappedModel == "" {
+					return nil, nil
+				}
+				mappedBaseModel := thinking.ParseSuffix(mappedModel).ModelName
+				mappedProviders := util.GetProviderName(mappedBaseModel)
+				if len(mappedProviders) == 0 {
+					return nil, nil
+				}
+				return []string{mappedModel}, mappedProviders
 			}
-			mappedModel = strings.TrimSpace(mappedModel)
-			if mappedModel == "" {
-				return "", nil
+
+			// Use MapModelWithFallbacks for DefaultModelMapper
+			mappedModels := mapper.MapModelWithFallbacks(modelName)
+			if len(mappedModels) == 0 {
+				mappedModels = mapper.MapModelWithFallbacks(normalizedModel)
+			}
+			if len(mappedModels) == 0 {
+				return nil, nil
 			}
 
-			// Preserve dynamic thinking suffix (e.g. "(xhigh)") when mapping applies, unless the target
-			// already specifies its own thinking suffix.
-			if thinkingSuffix != "" {
-				mappedSuffixResult := thinking.ParseSuffix(mappedModel)
-				if !mappedSuffixResult.HasSuffix {
-					mappedModel += thinkingSuffix
+			// Apply thinking suffix if needed
+			for i, model := range mappedModels {
+				if thinkingSuffix != "" {
+					suffixResult := thinking.ParseSuffix(model)
+					if !suffixResult.HasSuffix {
+						mappedModels[i] = model + thinkingSuffix
+					}
 				}
 			}
 
-			mappedBaseModel := thinking.ParseSuffix(mappedModel).ModelName
-			mappedProviders := util.GetProviderName(mappedBaseModel)
-			if len(mappedProviders) == 0 {
-				return "", nil
+			// Get providers for the first model
+			firstBaseModel := thinking.ParseSuffix(mappedModels[0]).ModelName
+			providers := util.GetProviderName(firstBaseModel)
+			if len(providers) == 0 {
+				return nil, nil
 			}
 
-			return mappedModel, mappedProviders
+			return mappedModels, providers
 		}
 
 		// Track resolved model for logging (may change if mapping is applied)
@@ -179,21 +231,27 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 		usedMapping := false
 		var providers []string
 
+		// Helper to apply model mapping and update state
+		applyMapping := func(mappedModels []string, mappedProviders []string) {
+			bodyBytes = rewriteModelInRequest(bodyBytes, mappedModels[0])
+			c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
+			c.Set(string(ctxkeys.MappedModel), mappedModels[0])
+			if len(mappedModels) > 1 {
+				c.Set(string(ctxkeys.FallbackModels), mappedModels[1:])
+			}
+			resolvedModel = mappedModels[0]
+			usedMapping = true
+			providers = mappedProviders
+		}
+
 		// Check if model mappings should be forced ahead of local API keys
 		forceMappings := fh.forceModelMappings != nil && fh.forceModelMappings()
 
 		if forceMappings {
 			// FORCE MODE: Check model mappings FIRST (takes precedence over local API keys)
 			// This allows users to route Amp requests to their preferred OAuth providers
-			if mappedModel, mappedProviders := resolveMappedModel(); mappedModel != "" {
-				// Mapping found and provider available - rewrite the model in request body
-				bodyBytes = rewriteModelInRequest(bodyBytes, mappedModel)
-				c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
-				// Store mapped model in context for handlers that check it (like gemini bridge)
-				c.Set(MappedModelContextKey, mappedModel)
-				resolvedModel = mappedModel
-				usedMapping = true
-				providers = mappedProviders
+			if mappedModels, mappedProviders := resolveMappedModels(); len(mappedModels) > 0 {
+				applyMapping(mappedModels, mappedProviders)
 			}
 
 			// If no mapping applied, check for local providers
@@ -206,15 +264,8 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 
 			if len(providers) == 0 {
 				// No providers configured - check if we have a model mapping
-				if mappedModel, mappedProviders := resolveMappedModel(); mappedModel != "" {
-					// Mapping found and provider available - rewrite the model in request body
-					bodyBytes = rewriteModelInRequest(bodyBytes, mappedModel)
-					c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
-					// Store mapped model in context for handlers that check it (like gemini bridge)
-					c.Set(MappedModelContextKey, mappedModel)
-					resolvedModel = mappedModel
-					usedMapping = true
-					providers = mappedProviders
+				if mappedModels, mappedProviders := resolveMappedModels(); len(mappedModels) > 0 {
+					applyMapping(mappedModels, mappedProviders)
 				}
 			}
 		}
diff --git a/internal/api/modules/amp/fallback_handlers_characterization_test.go b/internal/api/modules/amp/fallback_handlers_characterization_test.go
new file mode 100644
index 0000000000..e52bc5cef2
--- /dev/null
+++ b/internal/api/modules/amp/fallback_handlers_characterization_test.go
@@ -0,0 +1,326 @@
+package amp
+
+import (
+	"bytes"
+	"net/http"
+	"net/http/httptest"
+	"net/http/httputil"
+	"net/url"
+	"strings"
+	"testing"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/routing/testutil"
+	"github.com/stretchr/testify/assert"
+)
+
+// Characterization tests for fallback_handlers.go using testutil recorders
+// These tests capture existing behavior before refactoring to routing layer
+
+func TestCharacterization_LocalProvider(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	// Register a mock provider for the test model
+	reg := registry.GetGlobalRegistry()
+	reg.RegisterClient("char-test-local", "anthropic", []*registry.ModelInfo{
+		{ID: "test-model-local"},
+	})
+	defer reg.UnregisterClient("char-test-local")
+
+	// Setup recorders
+	proxyRecorder := testutil.NewFakeProxyRecorder()
+	handlerRecorder := testutil.NewFakeHandlerRecorder()
+
+	// Create gin context
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+
+	body := `{"model": "test-model-local", "messages": [{"role": "user", "content": "hello"}]}`
+	req := httptest.NewRequest(http.MethodPost, "/api/provider/anthropic/v1/messages", bytes.NewReader([]byte(body)))
+	req.Header.Set("Content-Type", "application/json")
+	c.Request = req
+
+	// Create fallback handler with proxy recorder
+	// Create a test server to act as the proxy target
+	proxyServer := httptest.NewServer(proxyRecorder.ToHandler())
+	defer proxyServer.Close()
+
+	fh := NewFallbackHandler(func() *httputil.ReverseProxy {
+		// Create a reverse proxy that forwards to our test server
+		targetURL, _ := url.Parse(proxyServer.URL)
+		return httputil.NewSingleHostReverseProxy(targetURL)
+	})
+
+	// Execute
+	wrapped := fh.WrapHandler(handlerRecorder.GinHandler())
+	wrapped(c)
+
+	// Assert: proxy NOT called
+	assert.False(t, proxyRecorder.Called, "proxy should NOT be called for local provider")
+
+	// Assert: local handler called once
+	assert.True(t, handlerRecorder.WasCalled(), "local handler should be called")
+	assert.Equal(t, 1, handlerRecorder.GetCallCount(), "local handler should be called exactly once")
+
+	// Assert: request body model unchanged
+	assert.Contains(t, string(handlerRecorder.RequestBody), "test-model-local", "request body model should be unchanged")
+}
+
+func TestCharacterization_ModelMapping(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	// Register a mock provider for the TARGET model (the mapped-to model)
+	reg := registry.GetGlobalRegistry()
+	reg.RegisterClient("char-test-mapped", "openai", []*registry.ModelInfo{
+		{ID: "gpt-4-local"},
+	})
+	defer reg.UnregisterClient("char-test-mapped")
+
+	// Setup recorders
+	proxyRecorder := testutil.NewFakeProxyRecorder()
+	handlerRecorder := testutil.NewFakeHandlerRecorder()
+
+	// Create model mapper with a mapping
+	mapper := NewModelMapper([]config.AmpModelMapping{
+		{From: "gpt-4-turbo", To: "gpt-4-local"},
+	})
+
+	// Create gin context
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+
+	// Request with original model that gets mapped
+	body := `{"model": "gpt-4-turbo", "messages": [{"role": "user", "content": "hello"}]}`
+	req := httptest.NewRequest(http.MethodPost, "/api/provider/openai/v1/chat/completions", bytes.NewReader([]byte(body)))
+	req.Header.Set("Content-Type", "application/json")
+	c.Request = req
+
+	// Create fallback handler with mapper
+	proxyServer := httptest.NewServer(proxyRecorder.ToHandler())
+	defer proxyServer.Close()
+
+	fh := NewFallbackHandlerWithMapper(func() *httputil.ReverseProxy {
+		targetURL, _ := url.Parse(proxyServer.URL)
+		return httputil.NewSingleHostReverseProxy(targetURL)
+	}, mapper, func() bool { return false })
+
+	// Execute - use handler that returns model in response for rewriter to work
+	wrapped := fh.WrapHandler(handlerRecorder.GinHandlerWithModel())
+	wrapped(c)
+
+	// Assert: proxy NOT called
+	assert.False(t, proxyRecorder.Called, "proxy should NOT be called for model mapping")
+
+	// Assert: local handler called once
+	assert.True(t, handlerRecorder.WasCalled(), "local handler should be called")
+	assert.Equal(t, 1, handlerRecorder.GetCallCount(), "local handler should be called exactly once")
+
+	// Assert: request body model was rewritten to mapped model
+	assert.Contains(t, string(handlerRecorder.RequestBody), "gpt-4-local", "request body model should be rewritten to mapped model")
+	assert.NotContains(t, string(handlerRecorder.RequestBody), "gpt-4-turbo", "request body should NOT contain original model")
+
+	// Assert: context has mapped_model key set
+	mappedModel, exists := handlerRecorder.GetContextKey("mapped_model")
+	assert.True(t, exists, "context should have mapped_model key")
+	assert.Equal(t, "gpt-4-local", mappedModel, "mapped_model should be the target model")
+
+	// Assert: response body model rewritten back to original
+	// The response writer should rewrite model names in the response
+	responseBody := w.Body.String()
+	assert.Contains(t, responseBody, "gpt-4-turbo", "response should have original model name")
+}
+
+func TestCharacterization_AmpCreditsProxy(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	// Setup recorders - NO local provider registered, NO mapping configured
+	proxyRecorder := testutil.NewFakeProxyRecorder()
+	handlerRecorder := testutil.NewFakeHandlerRecorder()
+
+	// Create gin context with CloseNotifier support (required for ReverseProxy)
+	w := testutil.NewCloseNotifierRecorder()
+	c, _ := gin.CreateTestContext(w)
+
+	// Request with a model that has no local provider and no mapping
+	body := `{"model": "unknown-model-no-provider", "messages": [{"role": "user", "content": "hello"}]}`
+	req := httptest.NewRequest(http.MethodPost, "/api/provider/openai/v1/chat/completions", bytes.NewReader([]byte(body)))
+	req.Header.Set("Content-Type", "application/json")
+	c.Request = req
+
+	// Create fallback handler
+	proxyServer := httptest.NewServer(proxyRecorder.ToHandler())
+	defer proxyServer.Close()
+
+	fh := NewFallbackHandler(func() *httputil.ReverseProxy {
+		targetURL, _ := url.Parse(proxyServer.URL)
+		return httputil.NewSingleHostReverseProxy(targetURL)
+	})
+
+	// Execute
+	wrapped := fh.WrapHandler(handlerRecorder.GinHandler())
+	wrapped(c)
+
+	// Assert: proxy called once
+	assert.True(t, proxyRecorder.Called, "proxy should be called when no local provider and no mapping")
+	assert.Equal(t, 1, proxyRecorder.GetCallCount(), "proxy should be called exactly once")
+
+	// Assert: local handler NOT called
+	assert.False(t, handlerRecorder.WasCalled(), "local handler should NOT be called when falling back to proxy")
+
+	// Assert: body forwarded to proxy is original (no rewrite)
+	assert.Contains(t, string(proxyRecorder.RequestBody), "unknown-model-no-provider", "request body model should be unchanged when proxying")
+}
+
+func TestCharacterization_BodyRestore(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	// Register a mock provider for the test model
+	reg := registry.GetGlobalRegistry()
+	reg.RegisterClient("char-test-body", "anthropic", []*registry.ModelInfo{
+		{ID: "test-model-body"},
+	})
+	defer reg.UnregisterClient("char-test-body")
+
+	// Setup recorders
+	proxyRecorder := testutil.NewFakeProxyRecorder()
+	handlerRecorder := testutil.NewFakeHandlerRecorder()
+
+	// Create gin context
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+
+	// Create a complex request body that will be read by the wrapper for model extraction
+	originalBody := `{"model": "test-model-body", "messages": [{"role": "user", "content": "hello"}], "temperature": 0.7, "stream": true}`
+	req := httptest.NewRequest(http.MethodPost, "/api/provider/anthropic/v1/messages", bytes.NewReader([]byte(originalBody)))
+	req.Header.Set("Content-Type", "application/json")
+	c.Request = req
+
+	// Create fallback handler with proxy recorder
+	proxyServer := httptest.NewServer(proxyRecorder.ToHandler())
+	defer proxyServer.Close()
+
+	fh := NewFallbackHandler(func() *httputil.ReverseProxy {
+		targetURL, _ := url.Parse(proxyServer.URL)
+		return httputil.NewSingleHostReverseProxy(targetURL)
+	})
+
+	// Execute
+	wrapped := fh.WrapHandler(handlerRecorder.GinHandler())
+	wrapped(c)
+
+	// Assert: local handler called (not proxy, since we have a local provider)
+	assert.True(t, handlerRecorder.WasCalled(), "local handler should be called")
+	assert.False(t, proxyRecorder.Called, "proxy should NOT be called for local provider")
+
+	// Assert: handler receives complete original body
+	// This verifies that the body was properly restored after the wrapper read it for model extraction
+	assert.Equal(t, originalBody, string(handlerRecorder.RequestBody), "handler should receive complete original body after wrapper reads it for model extraction")
+}
+
+// TestCharacterization_GeminiV1Beta1_PostModels tests that POST requests with /models/ path use Gemini bridge handler
+// This is a characterization test for the route gating logic in routes.go
+func TestCharacterization_GeminiV1Beta1_PostModels(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	// Register a mock provider for the test model (Gemini format uses path-based model extraction)
+	reg := registry.GetGlobalRegistry()
+	reg.RegisterClient("char-test-gemini", "google", []*registry.ModelInfo{
+		{ID: "gemini-pro"},
+	})
+	defer reg.UnregisterClient("char-test-gemini")
+
+	// Setup recorders
+	proxyRecorder := testutil.NewFakeProxyRecorder()
+	handlerRecorder := testutil.NewFakeHandlerRecorder()
+
+	// Create a test server for the proxy
+	proxyServer := httptest.NewServer(proxyRecorder.ToHandler())
+	defer proxyServer.Close()
+
+	// Create fallback handler
+	fh := NewFallbackHandler(func() *httputil.ReverseProxy {
+		targetURL, _ := url.Parse(proxyServer.URL)
+		return httputil.NewSingleHostReverseProxy(targetURL)
+	})
+
+	// Create the Gemini bridge handler (simulating what routes.go does)
+	geminiBridge := createGeminiBridgeHandler(handlerRecorder.GinHandler())
+	geminiV1Beta1Handler := fh.WrapHandler(geminiBridge)
+
+	// Create router with the same gating logic as routes.go
+	r := gin.New()
+	r.Any("/api/provider/google/v1beta1/*path", func(c *gin.Context) {
+		if c.Request.Method == "POST" {
+			if path := c.Param("path"); strings.Contains(path, "/models/") {
+				// POST with /models/ path -> use Gemini bridge with fallback handler
+				geminiV1Beta1Handler(c)
+				return
+			}
+		}
+		// Non-POST or no /models/ in path -> proxy upstream
+		proxyRecorder.ServeHTTP(c.Writer, c.Request)
+	})
+
+	// Execute: POST request with /models/ in path
+	body := `{"contents": [{"role": "user", "parts": [{"text": "hello"}]}]}`
+	req := httptest.NewRequest(http.MethodPost, "/api/provider/google/v1beta1/publishers/google/models/gemini-pro:generateContent", bytes.NewReader([]byte(body)))
+	req.Header.Set("Content-Type", "application/json")
+	w := httptest.NewRecorder()
+	r.ServeHTTP(w, req)
+
+	// Assert: local Gemini handler called
+	assert.True(t, handlerRecorder.WasCalled(), "local Gemini handler should be called for POST /models/")
+
+	// Assert: proxy NOT called
+	assert.False(t, proxyRecorder.Called, "proxy should NOT be called for POST /models/ path")
+}
+
+// TestCharacterization_GeminiV1Beta1_GetProxies tests that GET requests to Gemini v1beta1 always use proxy
+// This is a characterization test for the route gating logic in routes.go
+func TestCharacterization_GeminiV1Beta1_GetProxies(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	// Setup recorders
+	proxyRecorder := testutil.NewFakeProxyRecorder()
+	handlerRecorder := testutil.NewFakeHandlerRecorder()
+
+	// Create a test server for the proxy
+	proxyServer := httptest.NewServer(proxyRecorder.ToHandler())
+	defer proxyServer.Close()
+
+	// Create fallback handler
+	fh := NewFallbackHandler(func() *httputil.ReverseProxy {
+		targetURL, _ := url.Parse(proxyServer.URL)
+		return httputil.NewSingleHostReverseProxy(targetURL)
+	})
+
+	// Create the Gemini bridge handler
+	geminiBridge := createGeminiBridgeHandler(handlerRecorder.GinHandler())
+	geminiV1Beta1Handler := fh.WrapHandler(geminiBridge)
+
+	// Create router with the same gating logic as routes.go
+	r := gin.New()
+	r.Any("/api/provider/google/v1beta1/*path", func(c *gin.Context) {
+		if c.Request.Method == "POST" {
+			if path := c.Param("path"); strings.Contains(path, "/models/") {
+				geminiV1Beta1Handler(c)
+				return
+			}
+		}
+		proxyRecorder.ServeHTTP(c.Writer, c.Request)
+	})
+
+	// Execute: GET request (even with /models/ in path)
+	req := httptest.NewRequest(http.MethodGet, "/api/provider/google/v1beta1/publishers/google/models/gemini-pro", nil)
+	w := httptest.NewRecorder()
+	r.ServeHTTP(w, req)
+
+	// Assert: proxy called
+	assert.True(t, proxyRecorder.Called, "proxy should be called for GET requests")
+	assert.Equal(t, 1, proxyRecorder.GetCallCount(), "proxy should be called exactly once")
+
+	// Assert: local handler NOT called
+	assert.False(t, handlerRecorder.WasCalled(), "local handler should NOT be called for GET requests")
+}
diff --git a/internal/api/modules/amp/fallback_handlers_test.go b/internal/api/modules/amp/fallback_handlers_test.go
index a687fd116b..eef73cbd9b 100644
--- a/internal/api/modules/amp/fallback_handlers_test.go
+++ b/internal/api/modules/amp/fallback_handlers_test.go
@@ -2,7 +2,7 @@ package amp
 
 import (
 	"bytes"
-	"encoding/json"
+	"io"
 	"net/http"
 	"net/http/httptest"
 	"net/http/httputil"
@@ -11,63 +11,138 @@ import (
 	"github.com/gin-gonic/gin"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/stretchr/testify/assert"
 )
 
-func TestFallbackHandler_ModelMapping_PreservesThinkingSuffixAndRewritesResponse(t *testing.T) {
+// Characterization tests for fallback_handlers.go
+// These tests capture existing behavior before refactoring to routing layer
+
+func TestFallbackHandler_WrapHandler_LocalProvider_NoMapping(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 
-	reg := registry.GetGlobalRegistry()
-	reg.RegisterClient("test-client-amp-fallback", "codex", []*registry.ModelInfo{
-		{ID: "test/gpt-5.2", OwnedBy: "openai", Type: "codex"},
+	// Setup: model that has local providers (gemini-2.5-pro is registered)
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+
+	body := `{"model": "gemini-2.5-pro", "messages": [{"role": "user", "content": "hello"}]}`
+	req := httptest.NewRequest(http.MethodPost, "/api/provider/anthropic/v1/messages", bytes.NewReader([]byte(body)))
+	req.Header.Set("Content-Type", "application/json")
+	c.Request = req
+
+	// Handler that should be called (not proxy)
+	handlerCalled := false
+	handler := func(c *gin.Context) {
+		handlerCalled = true
+		c.JSON(200, gin.H{"status": "ok"})
+	}
+
+	// Create fallback handler
+	fh := NewFallbackHandler(func() *httputil.ReverseProxy {
+		return nil // no proxy
 	})
-	defer reg.UnregisterClient("test-client-amp-fallback")
 
-	mapper := NewModelMapper([]config.AmpModelMapping{
-		{From: "gpt-5.2", To: "test/gpt-5.2"},
+	// Execute
+	wrapped := fh.WrapHandler(handler)
+	wrapped(c)
+
+	// Assert: handler should be called directly (no mapping needed)
+	assert.True(t, handlerCalled, "handler should be called for local provider")
+	assert.Equal(t, 200, w.Code)
+}
+
+func TestFallbackHandler_WrapHandler_MappingApplied(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	// Register a mock provider for the target model
+	reg := registry.GetGlobalRegistry()
+	reg.RegisterClient("test-client", "anthropic", []*registry.ModelInfo{
+		{ID: "claude-opus-4-5-thinking"},
 	})
 
-	fallback := NewFallbackHandlerWithMapper(func() *httputil.ReverseProxy { return nil }, mapper, nil)
+	// Setup: model that needs mapping
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
 
+	body := `{"model": "claude-opus-4-5-20251101", "messages": [{"role": "user", "content": "hello"}]}`
+	req := httptest.NewRequest(http.MethodPost, "/api/provider/anthropic/v1/messages", bytes.NewReader([]byte(body)))
+	req.Header.Set("Content-Type", "application/json")
+	c.Request = req
+
+	// Handler to capture rewritten body
+	var capturedBody []byte
 	handler := func(c *gin.Context) {
-		var req struct {
-			Model string `json:"model"`
-		}
-		if err := c.ShouldBindJSON(&req); err != nil {
-			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
-			return
-		}
-
-		c.JSON(http.StatusOK, gin.H{
-			"model":      req.Model,
-			"seen_model": req.Model,
-		})
+		capturedBody, _ = io.ReadAll(c.Request.Body)
+		c.JSON(200, gin.H{"status": "ok"})
 	}
 
-	r := gin.New()
-	r.POST("/chat/completions", fallback.WrapHandler(handler))
+	// Create fallback handler with mapper
+	mapper := NewModelMapper([]config.AmpModelMapping{
+		{From: "claude-opus-4-5-20251101", To: "claude-opus-4-5-thinking"},
+	})
+
+	fh := NewFallbackHandlerWithMapper(
+		func() *httputil.ReverseProxy { return nil },
+		mapper,
+		func() bool { return false },
+	)
+
+	// Execute
+	wrapped := fh.WrapHandler(handler)
+	wrapped(c)
+
+	// Assert: body should be rewritten
+	assert.Contains(t, string(capturedBody), "claude-opus-4-5-thinking")
+
+	// Assert: context should have mapped model
+	mappedModel, exists := c.Get(MappedModelContextKey)
+	assert.True(t, exists, "MappedModelContextKey should be set")
+	assert.NotEmpty(t, mappedModel)
+}
+
+func TestFallbackHandler_WrapHandler_ThinkingSuffixPreserved(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	// Register a mock provider for the target model
+	reg := registry.GetGlobalRegistry()
+	reg.RegisterClient("test-client-2", "anthropic", []*registry.ModelInfo{
+		{ID: "claude-opus-4-5-thinking"},
+	})
 
-	reqBody := []byte(`{"model":"gpt-5.2(xhigh)"}`)
-	req := httptest.NewRequest(http.MethodPost, "/chat/completions", bytes.NewReader(reqBody))
-	req.Header.Set("Content-Type", "application/json")
 	w := httptest.NewRecorder()
-	r.ServeHTTP(w, req)
+	c, _ := gin.CreateTestContext(w)
 
-	if w.Code != http.StatusOK {
-		t.Fatalf("Expected status 200, got %d", w.Code)
-	}
+	// Model with thinking suffix
+	body := `{"model": "claude-opus-4-5-20251101(xhigh)", "messages": []}`
+	req := httptest.NewRequest(http.MethodPost, "/api/provider/anthropic/v1/messages", bytes.NewReader([]byte(body)))
+	req.Header.Set("Content-Type", "application/json")
+	c.Request = req
 
-	var resp struct {
-		Model     string `json:"model"`
-		SeenModel string `json:"seen_model"`
-	}
-	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
-		t.Fatalf("Failed to parse response JSON: %v", err)
+	var capturedBody []byte
+	handler := func(c *gin.Context) {
+		capturedBody, _ = io.ReadAll(c.Request.Body)
+		c.JSON(200, gin.H{"status": "ok"})
 	}
 
-	if resp.Model != "gpt-5.2(xhigh)" {
-		t.Errorf("Expected response model gpt-5.2(xhigh), got %s", resp.Model)
-	}
-	if resp.SeenModel != "test/gpt-5.2(xhigh)" {
-		t.Errorf("Expected handler to see test/gpt-5.2(xhigh), got %s", resp.SeenModel)
-	}
+	mapper := NewModelMapper([]config.AmpModelMapping{
+		{From: "claude-opus-4-5-20251101", To: "claude-opus-4-5-thinking"},
+	})
+
+	fh := NewFallbackHandlerWithMapper(
+		func() *httputil.ReverseProxy { return nil },
+		mapper,
+		func() bool { return false },
+	)
+
+	wrapped := fh.WrapHandler(handler)
+	wrapped(c)
+
+	// Assert: thinking suffix should be preserved
+	assert.Contains(t, string(capturedBody), "(xhigh)")
+}
+
+func TestFallbackHandler_WrapHandler_NoProvider_NoMapping_ProxyEnabled(t *testing.T) {
+	// Skip: httptest.ResponseRecorder doesn't implement http.CloseNotifier
+	// which is required by httputil.ReverseProxy. This test requires a real
+	// HTTP server and client to properly test proxy behavior.
+	t.Skip("requires real HTTP server for proxy testing")
 }
diff --git a/internal/api/modules/amp/model_mapping.go b/internal/api/modules/amp/model_mapping.go
index 4159a2b576..b8d4743296 100644
--- a/internal/api/modules/amp/model_mapping.go
+++ b/internal/api/modules/amp/model_mapping.go
@@ -30,18 +30,98 @@ type DefaultModelMapper struct {
 	mu       sync.RWMutex
 	mappings map[string]string // exact: from -> to (normalized lowercase keys)
 	regexps  []regexMapping    // regex rules evaluated in order
+
+	// oauthAliasForward maps channel -> name (lower) -> []alias for oauth-model-alias lookup.
+	// This allows model-mappings targets to find providers via their aliases.
+	oauthAliasForward map[string]map[string][]string
 }
 
 // NewModelMapper creates a new model mapper with the given initial mappings.
 func NewModelMapper(mappings []config.AmpModelMapping) *DefaultModelMapper {
 	m := &DefaultModelMapper{
-		mappings: make(map[string]string),
-		regexps:  nil,
+		mappings:          make(map[string]string),
+		regexps:           nil,
+		oauthAliasForward: nil,
 	}
 	m.UpdateMappings(mappings)
 	return m
 }
 
+// UpdateOAuthModelAlias updates the oauth-model-alias lookup table.
+// This is called during initialization and on config hot-reload.
+func (m *DefaultModelMapper) UpdateOAuthModelAlias(aliases map[string][]config.OAuthModelAlias) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	if len(aliases) == 0 {
+		m.oauthAliasForward = nil
+		return
+	}
+
+	forward := make(map[string]map[string][]string, len(aliases))
+	for rawChannel, entries := range aliases {
+		channel := strings.ToLower(strings.TrimSpace(rawChannel))
+		if channel == "" || len(entries) == 0 {
+			continue
+		}
+		channelMap := make(map[string][]string)
+		for _, entry := range entries {
+			name := strings.TrimSpace(entry.Name)
+			alias := strings.TrimSpace(entry.Alias)
+			if name == "" || alias == "" {
+				continue
+			}
+			if strings.EqualFold(name, alias) {
+				continue
+			}
+			nameKey := strings.ToLower(name)
+			channelMap[nameKey] = append(channelMap[nameKey], alias)
+		}
+		if len(channelMap) > 0 {
+			forward[channel] = channelMap
+		}
+	}
+	if len(forward) == 0 {
+		m.oauthAliasForward = nil
+		return
+	}
+	m.oauthAliasForward = forward
+	log.Debugf("amp model mapping: loaded oauth-model-alias for %d channel(s)", len(forward))
+}
+
+// findAllAliasesWithProviders returns all oauth-model-alias aliases for targetModel
+// that have available providers. Useful for fallback when one alias is quota-exceeded.
+func (m *DefaultModelMapper) findAllAliasesWithProviders(targetModel string) []string {
+	if m.oauthAliasForward == nil {
+		return nil
+	}
+
+	targetKey := strings.ToLower(strings.TrimSpace(targetModel))
+	if targetKey == "" {
+		return nil
+	}
+
+	var result []string
+	seen := make(map[string]struct{})
+
+	// Check all channels for this model name
+	for _, channelMap := range m.oauthAliasForward {
+		aliases := channelMap[targetKey]
+		for _, alias := range aliases {
+			aliasLower := strings.ToLower(alias)
+			if _, exists := seen[aliasLower]; exists {
+				continue
+			}
+			providers := util.GetProviderName(alias)
+			if len(providers) > 0 {
+				result = append(result, alias)
+				seen[aliasLower] = struct{}{}
+			}
+		}
+	}
+	return result
+}
+
 // MapModel checks if a mapping exists for the requested model and if the
 // target model has available local providers. Returns the mapped model name
 // or empty string if no valid mapping exists.
@@ -51,9 +131,20 @@ func NewModelMapper(mappings []config.AmpModelMapping) *DefaultModelMapper {
 // However, if the mapping target already contains a suffix, the config suffix
 // takes priority over the user's suffix.
 func (m *DefaultModelMapper) MapModel(requestedModel string) string {
-	if requestedModel == "" {
+	models := m.MapModelWithFallbacks(requestedModel)
+	if len(models) == 0 {
 		return ""
 	}
+	return models[0]
+}
+
+// MapModelWithFallbacks returns all possible target models for the requested model,
+// including fallback aliases from oauth-model-alias. The first model is the primary target,
+// and subsequent models are fallbacks to try if the primary is unavailable (e.g., quota exceeded).
+func (m *DefaultModelMapper) MapModelWithFallbacks(requestedModel string) []string {
+	if requestedModel == "" {
+		return nil
+	}
 
 	m.mu.RLock()
 	defer m.mu.RUnlock()
@@ -78,34 +169,54 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string {
 			}
 		}
 		if !exists {
-			return ""
+			return nil
 		}
 	}
 
 	// Check if target model already has a thinking suffix (config priority)
 	targetResult := thinking.ParseSuffix(targetModel)
+	targetBase := targetResult.ModelName
+
+	// Helper to apply suffix to a model
+	applySuffix := func(model string) string {
+		modelResult := thinking.ParseSuffix(model)
+		if modelResult.HasSuffix {
+			return model
+		}
+		if requestResult.HasSuffix && requestResult.RawSuffix != "" {
+			return model + "(" + requestResult.RawSuffix + ")"
+		}
+		return model
+	}
 
 	// Verify target model has available providers (use base model for lookup)
-	providers := util.GetProviderName(targetResult.ModelName)
-	if len(providers) == 0 {
-		log.Debugf("amp model mapping: target model %s has no available providers, skipping mapping", targetModel)
-		return ""
+	providers := util.GetProviderName(targetBase)
+
+	// If direct provider available, return it as primary
+	if len(providers) > 0 {
+		return []string{applySuffix(targetModel)}
 	}
 
-	// Suffix handling: config suffix takes priority, otherwise preserve user suffix
-	if targetResult.HasSuffix {
-		// Config's "to" already contains a suffix - use it as-is (config priority)
-		return targetModel
+	// No direct providers - check oauth-model-alias for all aliases that have providers
+	allAliases := m.findAllAliasesWithProviders(targetBase)
+	if len(allAliases) == 0 {
+		log.Debugf("amp model mapping: target model %s has no available providers, skipping mapping", targetModel)
+		return nil
 	}
 
-	// Preserve user's thinking suffix on the mapped model
-	// (skip empty suffixes to avoid returning "model()")
-	if requestResult.HasSuffix && requestResult.RawSuffix != "" {
-		return targetModel + "(" + requestResult.RawSuffix + ")"
+	// Log resolution
+	if len(allAliases) == 1 {
+		log.Debugf("amp model mapping: resolved %s -> %s via oauth-model-alias", targetModel, allAliases[0])
+	} else {
+		log.Debugf("amp model mapping: resolved %s -> %v via oauth-model-alias (%d fallbacks)", targetModel, allAliases, len(allAliases)-1)
 	}
 
-	// Note: Detailed routing log is handled by logAmpRouting in fallback_handlers.go
-	return targetModel
+	// Apply suffix to all aliases
+	result := make([]string, len(allAliases))
+	for i, alias := range allAliases {
+		result[i] = applySuffix(alias)
+	}
+	return result
 }
 
 // UpdateMappings refreshes the mapping configuration from config.
@@ -165,6 +276,22 @@ func (m *DefaultModelMapper) GetMappings() map[string]string {
 	return result
 }
 
+// GetMappingsAsConfig returns the current model mappings as config.AmpModelMapping slice.
+// Safe for concurrent use.
+func (m *DefaultModelMapper) GetMappingsAsConfig() []config.AmpModelMapping {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+
+	result := make([]config.AmpModelMapping, 0, len(m.mappings))
+	for from, to := range m.mappings {
+		result = append(result, config.AmpModelMapping{
+			From: from,
+			To:   to,
+		})
+	}
+	return result
+}
+
 type regexMapping struct {
 	re *regexp.Regexp
 	to string
diff --git a/internal/api/modules/amp/proxy.go b/internal/api/modules/amp/proxy.go
index c593c1b328..c460a0d60f 100644
--- a/internal/api/modules/amp/proxy.go
+++ b/internal/api/modules/amp/proxy.go
@@ -3,11 +3,8 @@ package amp
 import (
 	"bytes"
 	"compress/gzip"
-	"context"
-	"errors"
 	"fmt"
 	"io"
-	"net"
 	"net/http"
 	"net/http/httputil"
 	"net/url"
@@ -105,15 +102,7 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 	// Modify incoming responses to handle gzip without Content-Encoding
 	// This addresses the same issue as inline handler gzip handling, but at the proxy level
 	proxy.ModifyResponse = func(resp *http.Response) error {
-		// Log upstream error responses for diagnostics (502, 503, etc.)
-		// These are NOT proxy connection errors - the upstream responded with an error status
-		if resp.StatusCode >= 500 {
-			log.Errorf("amp upstream responded with error [%d] for %s %s", resp.StatusCode, resp.Request.Method, resp.Request.URL.Path)
-		} else if resp.StatusCode >= 400 {
-			log.Warnf("amp upstream responded with client error [%d] for %s %s", resp.StatusCode, resp.Request.Method, resp.Request.URL.Path)
-		}
-
-		// Only process successful responses for gzip decompression
+		// Only process successful responses
 		if resp.StatusCode < 200 || resp.StatusCode >= 300 {
 			return nil
 		}
@@ -197,29 +186,9 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 		return nil
 	}
 
-	// Error handler for proxy failures with detailed error classification for diagnostics
+	// Error handler for proxy failures
 	proxy.ErrorHandler = func(rw http.ResponseWriter, req *http.Request, err error) {
-		// Classify the error type for better diagnostics
-		var errType string
-		if errors.Is(err, context.DeadlineExceeded) {
-			errType = "timeout"
-		} else if errors.Is(err, context.Canceled) {
-			errType = "canceled"
-		} else if netErr, ok := err.(net.Error); ok && netErr.Timeout() {
-			errType = "dial_timeout"
-		} else if _, ok := err.(net.Error); ok {
-			errType = "network_error"
-		} else {
-			errType = "connection_error"
-		}
-
-		// Don't log as error for context canceled - it's usually client closing connection
-		if errors.Is(err, context.Canceled) {
-			return
-		} else {
-			log.Errorf("amp upstream proxy error [%s] for %s %s: %v", errType, req.Method, req.URL.Path, err)
-		}
-
+		log.Errorf("amp upstream proxy error for %s %s: %v", req.Method, req.URL.Path, err)
 		rw.Header().Set("Content-Type", "application/json")
 		rw.WriteHeader(http.StatusBadGateway)
 		_, _ = rw.Write([]byte(`{"error":"amp_upstream_proxy_error","message":"Failed to reach Amp upstream"}`))
diff --git a/internal/api/modules/amp/proxy_test.go b/internal/api/modules/amp/proxy_test.go
index 32f5d8605b..ff23e3986b 100644
--- a/internal/api/modules/amp/proxy_test.go
+++ b/internal/api/modules/amp/proxy_test.go
@@ -493,30 +493,6 @@ func TestReverseProxy_ErrorHandler(t *testing.T) {
 	}
 }
 
-func TestReverseProxy_ErrorHandler_ContextCanceled(t *testing.T) {
-	// Test that context.Canceled errors return 499 without generic error response
-	proxy, err := createReverseProxy("http://example.com", NewStaticSecretSource(""))
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// Create a canceled context to trigger the cancellation path
-	ctx, cancel := context.WithCancel(context.Background())
-	cancel() // Cancel immediately
-
-	req := httptest.NewRequest(http.MethodGet, "/test", nil).WithContext(ctx)
-	rr := httptest.NewRecorder()
-
-	// Directly invoke the ErrorHandler with context.Canceled
-	proxy.ErrorHandler(rr, req, context.Canceled)
-
-	// Body should be empty for canceled requests (no JSON error response)
-	body := rr.Body.Bytes()
-	if len(body) > 0 {
-		t.Fatalf("expected empty body for canceled context, got: %s", body)
-	}
-}
-
 func TestReverseProxy_FullRoundTrip_Gzip(t *testing.T) {
 	// Upstream returns gzipped JSON without Content-Encoding header
 	upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
diff --git a/internal/api/modules/amp/response_rewriter.go b/internal/api/modules/amp/response_rewriter.go
index 8a9cad704d..57e4922a7c 100644
--- a/internal/api/modules/amp/response_rewriter.go
+++ b/internal/api/modules/amp/response_rewriter.go
@@ -29,71 +29,15 @@ func NewResponseRewriter(w gin.ResponseWriter, originalModel string) *ResponseRe
 	}
 }
 
-const maxBufferedResponseBytes = 2 * 1024 * 1024 // 2MB safety cap
-
-func looksLikeSSEChunk(data []byte) bool {
-	// Fallback detection: some upstreams may omit/lie about Content-Type, causing SSE to be buffered.
-	// Heuristics are intentionally simple and cheap.
-	return bytes.Contains(data, []byte("data:")) ||
-		bytes.Contains(data, []byte("event:")) ||
-		bytes.Contains(data, []byte("message_start")) ||
-		bytes.Contains(data, []byte("message_delta")) ||
-		bytes.Contains(data, []byte("content_block_start")) ||
-		bytes.Contains(data, []byte("content_block_delta")) ||
-		bytes.Contains(data, []byte("content_block_stop")) ||
-		bytes.Contains(data, []byte("\n\n"))
-}
-
-func (rw *ResponseRewriter) enableStreaming(reason string) error {
-	if rw.isStreaming {
-		return nil
-	}
-	rw.isStreaming = true
-
-	// Flush any previously buffered data to avoid reordering or data loss.
-	if rw.body != nil && rw.body.Len() > 0 {
-		buf := rw.body.Bytes()
-		// Copy before Reset() to keep bytes stable.
-		toFlush := make([]byte, len(buf))
-		copy(toFlush, buf)
-		rw.body.Reset()
-
-		if _, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(toFlush)); err != nil {
-			return err
-		}
-		if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
-			flusher.Flush()
-		}
-	}
-
-	log.Debugf("amp response rewriter: switched to streaming (%s)", reason)
-	return nil
-}
-
 // Write intercepts response writes and buffers them for model name replacement
 func (rw *ResponseRewriter) Write(data []byte) (int, error) {
-	// Detect streaming on first write (header-based)
-	if !rw.isStreaming && rw.body.Len() == 0 {
+	// Detect streaming on first write
+	if rw.body.Len() == 0 && !rw.isStreaming {
 		contentType := rw.Header().Get("Content-Type")
 		rw.isStreaming = strings.Contains(contentType, "text/event-stream") ||
 			strings.Contains(contentType, "stream")
 	}
 
-	if !rw.isStreaming {
-		// Content-based fallback: detect SSE-like chunks even if Content-Type is missing/wrong.
-		if looksLikeSSEChunk(data) {
-			if err := rw.enableStreaming("sse heuristic"); err != nil {
-				return 0, err
-			}
-		} else if rw.body.Len()+len(data) > maxBufferedResponseBytes {
-			// Safety cap: avoid unbounded buffering on large responses.
-			log.Warnf("amp response rewriter: buffer exceeded %d bytes, switching to streaming", maxBufferedResponseBytes)
-			if err := rw.enableStreaming("buffer limit"); err != nil {
-				return 0, err
-			}
-		}
-	}
-
 	if rw.isStreaming {
 		n, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
 		if err == nil {
@@ -122,7 +66,7 @@ func (rw *ResponseRewriter) Flush() {
 }
 
 // modelFieldPaths lists all JSON paths where model name may appear
-var modelFieldPaths = []string{"message.model", "model", "modelVersion", "response.model", "response.modelVersion"}
+var modelFieldPaths = []string{"model", "modelVersion", "response.modelVersion", "message.model"}
 
 // rewriteModelInResponse replaces all occurrences of the mapped model with the original model in JSON
 // It also suppresses "thinking" blocks if "tool_use" is present to ensure Amp client compatibility
diff --git a/internal/api/modules/amp/routes.go b/internal/api/modules/amp/routes.go
index 456a50ac12..790a3cce3f 100644
--- a/internal/api/modules/amp/routes.go
+++ b/internal/api/modules/amp/routes.go
@@ -5,11 +5,12 @@ import (
 	"errors"
 	"net"
 	"net/http"
-	"net/http/httputil"
 	"strings"
 
 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/routing"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/gemini"
@@ -234,19 +235,20 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
 	// If no local OAuth is available, falls back to ampcode.com proxy.
 	geminiHandlers := gemini.NewGeminiAPIHandler(baseHandler)
 	geminiBridge := createGeminiBridgeHandler(geminiHandlers.GeminiHandler)
-	geminiV1Beta1Fallback := NewFallbackHandlerWithMapper(func() *httputil.ReverseProxy {
-		return m.getProxy()
-	}, m.modelMapper, m.forceModelMappings)
-	geminiV1Beta1Handler := geminiV1Beta1Fallback.WrapHandler(geminiBridge)
 
-	// Route POST model calls through Gemini bridge with FallbackHandler.
-	// FallbackHandler checks provider -> mapping -> proxy fallback automatically.
+	// T-025: Migrated Gemini v1beta1 bridge to use ModelRoutingWrapper
+	// Create a dedicated routing wrapper for the Gemini bridge
+	geminiBridgeWrapper := m.createModelRoutingWrapper()
+	geminiV1Beta1Handler := geminiBridgeWrapper.Wrap(geminiBridge)
+
+	// Route POST model calls through Gemini bridge with ModelRoutingWrapper.
+	// ModelRoutingWrapper checks provider -> mapping -> proxy fallback automatically.
 	// All other methods (e.g., GET model listing) always proxy to upstream to preserve Amp CLI behavior.
 	ampAPI.Any("/provider/google/v1beta1/*path", func(c *gin.Context) {
 		if c.Request.Method == "POST" {
 			if path := c.Param("path"); strings.Contains(path, "/models/") {
-				// POST with /models/ path -> use Gemini bridge with fallback handler
-				// FallbackHandler will check provider/mapping and proxy if needed
+				// POST with /models/ path -> use Gemini bridge with unified routing wrapper
+				// ModelRoutingWrapper will check provider/mapping and proxy if needed
 				geminiV1Beta1Handler(c)
 				return
 			}
@@ -256,6 +258,41 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
 	})
 }
 
+// createModelRoutingWrapper creates a new ModelRoutingWrapper for unified routing.
+// This is used for testing the new routing implementation (T-021 onwards).
+func (m *AmpModule) createModelRoutingWrapper() *routing.ModelRoutingWrapper {
+	// Create a registry - in production this would be populated with actual providers
+	registry := routing.NewRegistry()
+
+	// Create a minimal config with just AmpCode settings
+	// The Router only needs AmpCode.ModelMappings and OAuthModelAlias
+	cfg := &config.Config{
+		AmpCode: func() config.AmpCode {
+			if m.modelMapper != nil {
+				return config.AmpCode{
+					ModelMappings: m.modelMapper.GetMappingsAsConfig(),
+				}
+			}
+			return config.AmpCode{}
+		}(),
+	}
+
+	// Create router with registry and config
+	router := routing.NewRouter(registry, cfg)
+
+	// Create wrapper with proxy function
+	proxyFunc := func(c *gin.Context) {
+		proxy := m.getProxy()
+		if proxy != nil {
+			proxy.ServeHTTP(c.Writer, c.Request)
+		} else {
+			c.JSON(503, gin.H{"error": "amp upstream proxy not available"})
+		}
+	}
+
+	return routing.NewModelRoutingWrapper(router, nil, nil, proxyFunc)
+}
+
 // registerProviderAliases registers /api/provider/{provider}/... routes
 // These allow Amp CLI to route requests like:
 //
@@ -269,12 +306,9 @@ func (m *AmpModule) registerProviderAliases(engine *gin.Engine, baseHandler *han
 	claudeCodeHandlers := claude.NewClaudeCodeAPIHandler(baseHandler)
 	openaiResponsesHandlers := openai.NewOpenAIResponsesAPIHandler(baseHandler)
 
-	// Create fallback handler wrapper that forwards to ampcode.com when provider not found
-	// Uses m.getProxy() for hot-reload support (proxy can be updated at runtime)
-	// Also includes model mapping support for routing unavailable models to alternatives
-	fallbackHandler := NewFallbackHandlerWithMapper(func() *httputil.ReverseProxy {
-		return m.getProxy()
-	}, m.modelMapper, m.forceModelMappings)
+	// Create unified routing wrapper (T-021 onwards)
+	// Replaces FallbackHandler with Router-based unified routing
+	routingWrapper := m.createModelRoutingWrapper()
 
 	// Provider-specific routes under /api/provider/:provider
 	ampProviders := engine.Group("/api/provider")
@@ -302,33 +336,36 @@ func (m *AmpModule) registerProviderAliases(engine *gin.Engine, baseHandler *han
 	}
 
 	// Root-level routes (for providers that omit /v1, like groq/cerebras)
-	// Wrap handlers with fallback logic to forward to ampcode.com when provider not found
+	// T-022: Migrated all OpenAI routes to use ModelRoutingWrapper for unified routing
 	provider.GET("/models", ampModelsHandler) // Models endpoint doesn't need fallback (no body to check)
-	provider.POST("/chat/completions", fallbackHandler.WrapHandler(openaiHandlers.ChatCompletions))
-	provider.POST("/completions", fallbackHandler.WrapHandler(openaiHandlers.Completions))
-	provider.POST("/responses", fallbackHandler.WrapHandler(openaiResponsesHandlers.Responses))
+	provider.POST("/chat/completions", routingWrapper.Wrap(openaiHandlers.ChatCompletions))
+	provider.POST("/completions", routingWrapper.Wrap(openaiHandlers.Completions))
+	provider.POST("/responses", routingWrapper.Wrap(openaiResponsesHandlers.Responses))
 
 	// /v1 routes (OpenAI/Claude-compatible endpoints)
 	v1Amp := provider.Group("/v1")
 	{
 		v1Amp.GET("/models", ampModelsHandler) // Models endpoint doesn't need fallback
 
-		// OpenAI-compatible endpoints with fallback
-		v1Amp.POST("/chat/completions", fallbackHandler.WrapHandler(openaiHandlers.ChatCompletions))
-		v1Amp.POST("/completions", fallbackHandler.WrapHandler(openaiHandlers.Completions))
-		v1Amp.POST("/responses", fallbackHandler.WrapHandler(openaiResponsesHandlers.Responses))
+		// OpenAI-compatible endpoints with ModelRoutingWrapper
+		// T-021, T-022: Migrated to unified routing wrapper
+		v1Amp.POST("/chat/completions", routingWrapper.Wrap(openaiHandlers.ChatCompletions))
+		v1Amp.POST("/completions", routingWrapper.Wrap(openaiHandlers.Completions))
+		v1Amp.POST("/responses", routingWrapper.Wrap(openaiResponsesHandlers.Responses))
 
-		// Claude/Anthropic-compatible endpoints with fallback
-		v1Amp.POST("/messages", fallbackHandler.WrapHandler(claudeCodeHandlers.ClaudeMessages))
-		v1Amp.POST("/messages/count_tokens", fallbackHandler.WrapHandler(claudeCodeHandlers.ClaudeCountTokens))
+		// Claude/Anthropic-compatible endpoints with ModelRoutingWrapper
+		// T-023: Migrated Claude routes to unified routing wrapper
+		v1Amp.POST("/messages", routingWrapper.Wrap(claudeCodeHandlers.ClaudeMessages))
+		v1Amp.POST("/messages/count_tokens", routingWrapper.Wrap(claudeCodeHandlers.ClaudeCountTokens))
 	}
 
 	// /v1beta routes (Gemini native API)
 	// Note: Gemini handler extracts model from URL path, so fallback logic needs special handling
+	// T-024: Migrated Gemini v1beta routes to unified routing wrapper
 	v1betaAmp := provider.Group("/v1beta")
 	{
 		v1betaAmp.GET("/models", geminiHandlers.GeminiModels)
-		v1betaAmp.POST("/models/*action", fallbackHandler.WrapHandler(geminiHandlers.GeminiHandler))
+		v1betaAmp.POST("/models/*action", routingWrapper.Wrap(geminiHandlers.GeminiHandler))
 		v1betaAmp.GET("/models/*action", geminiHandlers.GeminiGetHandler)
 	}
 }
diff --git a/internal/api/server.go b/internal/api/server.go
index 98041b8be4..bcb855d5a2 100644
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -24,7 +24,6 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/middleware"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/api/modules"
 	ampmodule "github.com/router-for-me/CLIProxyAPI/v6/internal/api/modules/amp"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kiro"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/managementasset"
@@ -285,19 +284,13 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
 		optionState.routerConfigurator(engine, s.handlers, cfg)
 	}
 
-	// Register management routes when configuration or environment secrets are available,
-	// or when a local management password is provided (e.g. TUI mode).
-	hasManagementSecret := cfg.RemoteManagement.SecretKey != "" || envManagementSecret || s.localPassword != ""
+	// Register management routes when configuration or environment secrets are available.
+	hasManagementSecret := cfg.RemoteManagement.SecretKey != "" || envManagementSecret
 	s.managementRoutesEnabled.Store(hasManagementSecret)
 	if hasManagementSecret {
 		s.registerManagementRoutes()
 	}
 
-	// === CLIProxyAPIPlus 扩展: 注册 Kiro OAuth Web 路由 ===
-	kiroOAuthHandler := kiro.NewOAuthWebHandler(cfg)
-	kiroOAuthHandler.RegisterRoutes(engine)
-	log.Info("Kiro OAuth Web routes registered at /v0/oauth/kiro/*")
-
 	if optionState.keepAliveEnabled {
 		s.enableKeepAlive(optionState.keepAliveTimeout, optionState.keepAliveOnTimeout)
 	}
@@ -330,7 +323,6 @@ func (s *Server) setupRoutes() {
 		v1.POST("/completions", openaiHandlers.Completions)
 		v1.POST("/messages", claudeCodeHandlers.ClaudeMessages)
 		v1.POST("/messages/count_tokens", claudeCodeHandlers.ClaudeCountTokens)
-		v1.GET("/responses", openaiResponsesHandlers.ResponsesWebsocket)
 		v1.POST("/responses", openaiResponsesHandlers.Responses)
 		v1.POST("/responses/compact", openaiResponsesHandlers.Compact)
 	}
@@ -355,12 +347,6 @@ func (s *Server) setupRoutes() {
 			},
 		})
 	})
-
-	// Event logging endpoint - handles Claude Code telemetry requests
-	// Returns 200 OK to prevent 404 errors in logs
-	s.engine.POST("/api/event_logging/batch", func(c *gin.Context) {
-		c.JSON(http.StatusOK, gin.H{"status": "ok"})
-	})
 	s.engine.POST("/v1internal:method", geminiCLIHandlers.CLIHandler)
 
 	// OAuth callback endpoints (reuse main server port)
@@ -436,20 +422,6 @@ func (s *Server) setupRoutes() {
 		c.String(http.StatusOK, oauthCallbackSuccessHTML)
 	})
 
-	s.engine.GET("/kiro/callback", func(c *gin.Context) {
-		code := c.Query("code")
-		state := c.Query("state")
-		errStr := c.Query("error")
-		if errStr == "" {
-			errStr = c.Query("error_description")
-		}
-		if state != "" {
-			_, _ = managementHandlers.WriteOAuthCallbackFileForPendingSession(s.cfg.AuthDir, "kiro", state, code, errStr)
-		}
-		c.Header("Content-Type", "text/html; charset=utf-8")
-		c.String(http.StatusOK, oauthCallbackSuccessHTML)
-	})
-
 	// Management routes are registered lazily by registerManagementRoutes when a secret is configured.
 }
 
@@ -644,7 +616,6 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
 		mgmt.DELETE("/auth-files", s.mgmt.DeleteAuthFile)
 		mgmt.PATCH("/auth-files/status", s.mgmt.PatchAuthFileStatus)
-		mgmt.PATCH("/auth-files/fields", s.mgmt.PatchAuthFileFields)
 		mgmt.POST("/vertex/import", s.mgmt.ImportVertexCredential)
 
 		mgmt.GET("/anthropic-auth-url", s.mgmt.RequestAnthropicToken)
@@ -652,12 +623,8 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/gemini-cli-auth-url", s.mgmt.RequestGeminiCLIToken)
 		mgmt.GET("/antigravity-auth-url", s.mgmt.RequestAntigravityToken)
 		mgmt.GET("/qwen-auth-url", s.mgmt.RequestQwenToken)
-		mgmt.GET("/kilo-auth-url", s.mgmt.RequestKiloToken)
-		mgmt.GET("/kimi-auth-url", s.mgmt.RequestKimiToken)
 		mgmt.GET("/iflow-auth-url", s.mgmt.RequestIFlowToken)
 		mgmt.POST("/iflow-auth-url", s.mgmt.RequestIFlowCookieToken)
-		mgmt.GET("/kiro-auth-url", s.mgmt.RequestKiroToken)
-		mgmt.GET("/github-auth-url", s.mgmt.RequestGitHubToken)
 		mgmt.POST("/oauth-callback", s.mgmt.PostOAuthCallback)
 		mgmt.GET("/get-auth-status", s.mgmt.GetAuthStatus)
 	}
@@ -687,17 +654,14 @@ func (s *Server) serveManagementControlPanel(c *gin.Context) {
 
 	if _, err := os.Stat(filePath); err != nil {
 		if os.IsNotExist(err) {
-			// Synchronously ensure management.html is available with a detached context.
-			// Control panel bootstrap should not be canceled by client disconnects.
-			if !managementasset.EnsureLatestManagementHTML(context.Background(), managementasset.StaticDir(s.configFilePath), cfg.ProxyURL, cfg.RemoteManagement.PanelGitHubRepository) {
-				c.AbortWithStatus(http.StatusNotFound)
-				return
-			}
-		} else {
-			log.WithError(err).Error("failed to stat management control panel asset")
-			c.AbortWithStatus(http.StatusInternalServerError)
+			go managementasset.EnsureLatestManagementHTML(context.Background(), managementasset.StaticDir(s.configFilePath), cfg.ProxyURL, cfg.RemoteManagement.PanelGitHubRepository)
+			c.AbortWithStatus(http.StatusNotFound)
 			return
 		}
+
+		log.WithError(err).Error("failed to stat management control panel asset")
+		c.AbortWithStatus(http.StatusInternalServerError)
+		return
 	}
 
 	c.File(filePath)
@@ -987,13 +951,17 @@ func (s *Server) UpdateClients(cfg *config.Config) {
 
 	s.handlers.UpdateClients(&cfg.SDKConfig)
 
+	if !cfg.RemoteManagement.DisableControlPanel {
+		staticDir := managementasset.StaticDir(s.configFilePath)
+		go managementasset.EnsureLatestManagementHTML(context.Background(), staticDir, cfg.ProxyURL, cfg.RemoteManagement.PanelGitHubRepository)
+	}
 	if s.mgmt != nil {
 		s.mgmt.SetConfig(cfg)
 		s.mgmt.SetAuthManager(s.handlers.AuthManager)
 	}
 
-	// Notify Amp module only when Amp config has changed.
-	ampConfigChanged := oldCfg == nil || !reflect.DeepEqual(oldCfg.AmpCode, cfg.AmpCode)
+	// Notify Amp module when Amp config or OAuth model aliases have changed.
+	ampConfigChanged := oldCfg == nil || !reflect.DeepEqual(oldCfg.AmpCode, cfg.AmpCode) || !reflect.DeepEqual(oldCfg.OAuthModelAlias, cfg.OAuthModelAlias)
 	if ampConfigChanged {
 		if s.ampModule != nil {
 			log.Debugf("triggering amp module config update")
@@ -1065,10 +1033,14 @@ func AuthMiddleware(manager *sdkaccess.Manager) gin.HandlerFunc {
 			return
 		}
 
-		statusCode := err.HTTPStatusCode()
-		if statusCode >= http.StatusInternalServerError {
+		switch {
+		case errors.Is(err, sdkaccess.ErrNoCredentials):
+			c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "Missing API key"})
+		case errors.Is(err, sdkaccess.ErrInvalidCredential):
+			c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "Invalid API key"})
+		default:
 			log.Errorf("authentication middleware error: %v", err)
+			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "Authentication service error"})
 		}
-		c.AbortWithStatusJSON(statusCode, gin.H{"error": err.Message})
 	}
 }
diff --git a/internal/auth/claude/anthropic_auth.go b/internal/auth/claude/anthropic_auth.go
index 2853e418e6..e0f6e3c8ad 100644
--- a/internal/auth/claude/anthropic_auth.go
+++ b/internal/auth/claude/anthropic_auth.go
@@ -20,7 +20,7 @@ import (
 // OAuth configuration constants for Claude/Anthropic
 const (
 	AuthURL     = "https://claude.ai/oauth/authorize"
-	TokenURL    = "https://api.anthropic.com/v1/oauth/token"
+	TokenURL    = "https://console.anthropic.com/v1/oauth/token"
 	ClientID    = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
 	RedirectURI = "http://localhost:54545/callback"
 )
diff --git a/internal/auth/claude/oauth_server.go b/internal/auth/claude/oauth_server.go
index 49b04794e5..a6ebe2f7b8 100644
--- a/internal/auth/claude/oauth_server.go
+++ b/internal/auth/claude/oauth_server.go
@@ -242,11 +242,6 @@ func (s *OAuthServer) handleSuccess(w http.ResponseWriter, r *http.Request) {
 		platformURL = "https://console.anthropic.com/"
 	}
 
-	// Validate platformURL to prevent XSS - only allow http/https URLs
-	if !isValidURL(platformURL) {
-		platformURL = "https://console.anthropic.com/"
-	}
-
 	// Generate success page HTML with dynamic content
 	successHTML := s.generateSuccessHTML(setupRequired, platformURL)
 
@@ -256,12 +251,6 @@ func (s *OAuthServer) handleSuccess(w http.ResponseWriter, r *http.Request) {
 	}
 }
 
-// isValidURL checks if the URL is a valid http/https URL to prevent XSS
-func isValidURL(urlStr string) bool {
-	urlStr = strings.TrimSpace(urlStr)
-	return strings.HasPrefix(urlStr, "https://") || strings.HasPrefix(urlStr, "http://")
-}
-
 // generateSuccessHTML creates the HTML content for the success page.
 // It customizes the page based on whether additional setup is required
 // and includes a link to the platform.
diff --git a/internal/auth/codex/oauth_server.go b/internal/auth/codex/oauth_server.go
index 58b5394efb..9c6a6c5b78 100644
--- a/internal/auth/codex/oauth_server.go
+++ b/internal/auth/codex/oauth_server.go
@@ -239,11 +239,6 @@ func (s *OAuthServer) handleSuccess(w http.ResponseWriter, r *http.Request) {
 		platformURL = "https://platform.openai.com"
 	}
 
-	// Validate platformURL to prevent XSS - only allow http/https URLs
-	if !isValidURL(platformURL) {
-		platformURL = "https://platform.openai.com"
-	}
-
 	// Generate success page HTML with dynamic content
 	successHTML := s.generateSuccessHTML(setupRequired, platformURL)
 
@@ -253,12 +248,6 @@ func (s *OAuthServer) handleSuccess(w http.ResponseWriter, r *http.Request) {
 	}
 }
 
-// isValidURL checks if the URL is a valid http/https URL to prevent XSS
-func isValidURL(urlStr string) bool {
-	urlStr = strings.TrimSpace(urlStr)
-	return strings.HasPrefix(urlStr, "https://") || strings.HasPrefix(urlStr, "http://")
-}
-
 // generateSuccessHTML creates the HTML content for the success page.
 // It customizes the page based on whether additional setup is required
 // and includes a link to the platform.
diff --git a/internal/auth/iflow/iflow_auth.go b/internal/auth/iflow/iflow_auth.go
index 279d7339d3..fa9f38c3e6 100644
--- a/internal/auth/iflow/iflow_auth.go
+++ b/internal/auth/iflow/iflow_auth.go
@@ -9,7 +9,6 @@ import (
 	"io"
 	"net/http"
 	"net/url"
-	"os"
 	"strings"
 	"time"
 
@@ -29,21 +28,10 @@ const (
 	iFlowAPIKeyEndpoint = "https://platform.iflow.cn/api/openapi/apikey"
 
 	// Client credentials provided by iFlow for the Code Assist integration.
-	iFlowOAuthClientID = "10009311001"
-	// Default client secret (can be overridden via IFLOW_CLIENT_SECRET env var)
-	defaultIFlowClientSecret = "4Z3YjXycVsQvyGF1etiNlIBB4RsqSDtW"
+	iFlowOAuthClientID     = "10009311001"
+	iFlowOAuthClientSecret = "4Z3YjXycVsQvyGF1etiNlIBB4RsqSDtW"
 )
 
-// getIFlowClientSecret returns the iFlow OAuth client secret.
-// It first checks the IFLOW_CLIENT_SECRET environment variable,
-// falling back to the default value if not set.
-func getIFlowClientSecret() string {
-	if secret := os.Getenv("IFLOW_CLIENT_SECRET"); secret != "" {
-		return secret
-	}
-	return defaultIFlowClientSecret
-}
-
 // DefaultAPIBaseURL is the canonical chat completions endpoint.
 const DefaultAPIBaseURL = "https://apis.iflow.cn/v1"
 
@@ -84,7 +72,7 @@ func (ia *IFlowAuth) ExchangeCodeForTokens(ctx context.Context, code, redirectUR
 	form.Set("code", code)
 	form.Set("redirect_uri", redirectURI)
 	form.Set("client_id", iFlowOAuthClientID)
-	form.Set("client_secret", getIFlowClientSecret())
+	form.Set("client_secret", iFlowOAuthClientSecret)
 
 	req, err := ia.newTokenRequest(ctx, form)
 	if err != nil {
@@ -100,7 +88,7 @@ func (ia *IFlowAuth) RefreshTokens(ctx context.Context, refreshToken string) (*I
 	form.Set("grant_type", "refresh_token")
 	form.Set("refresh_token", refreshToken)
 	form.Set("client_id", iFlowOAuthClientID)
-	form.Set("client_secret", getIFlowClientSecret())
+	form.Set("client_secret", iFlowOAuthClientSecret)
 
 	req, err := ia.newTokenRequest(ctx, form)
 	if err != nil {
@@ -116,7 +104,7 @@ func (ia *IFlowAuth) newTokenRequest(ctx context.Context, form url.Values) (*htt
 		return nil, fmt.Errorf("iflow token: create request failed: %w", err)
 	}
 
-	basic := base64.StdEncoding.EncodeToString([]byte(iFlowOAuthClientID + ":" + getIFlowClientSecret()))
+	basic := base64.StdEncoding.EncodeToString([]byte(iFlowOAuthClientID + ":" + iFlowOAuthClientSecret))
 	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
 	req.Header.Set("Accept", "application/json")
 	req.Header.Set("Authorization", "Basic "+basic)
diff --git a/internal/browser/browser.go b/internal/browser/browser.go
index 3a5aeea7e2..b24dc5e112 100644
--- a/internal/browser/browser.go
+++ b/internal/browser/browser.go
@@ -6,49 +6,14 @@ import (
 	"fmt"
 	"os/exec"
 	"runtime"
-	"strings"
-	"sync"
 
-	pkgbrowser "github.com/pkg/browser"
 	log "github.com/sirupsen/logrus"
+	"github.com/skratchdot/open-golang/open"
 )
 
-// incognitoMode controls whether to open URLs in incognito/private mode.
-// This is useful for OAuth flows where you want to use a different account.
-var incognitoMode bool
-
-// lastBrowserProcess stores the last opened browser process for cleanup
-var lastBrowserProcess *exec.Cmd
-var browserMutex sync.Mutex
-
-// SetIncognitoMode enables or disables incognito/private browsing mode.
-func SetIncognitoMode(enabled bool) {
-	incognitoMode = enabled
-}
-
-// IsIncognitoMode returns whether incognito mode is enabled.
-func IsIncognitoMode() bool {
-	return incognitoMode
-}
-
-// CloseBrowser closes the last opened browser process.
-func CloseBrowser() error {
-	browserMutex.Lock()
-	defer browserMutex.Unlock()
-
-	if lastBrowserProcess == nil || lastBrowserProcess.Process == nil {
-		return nil
-	}
-	
-	err := lastBrowserProcess.Process.Kill()
-	lastBrowserProcess = nil
-	return err
-}
-
 // OpenURL opens the specified URL in the default web browser.
-// It uses the pkg/browser library which provides robust cross-platform support
-// for Windows, macOS, and Linux.
-// If incognito mode is enabled, it will open in a private/incognito window.
+// It first attempts to use a platform-agnostic library and falls back to
+// platform-specific commands if that fails.
 //
 // Parameters:
 //   - url: The URL to open.
@@ -56,22 +21,16 @@ func CloseBrowser() error {
 // Returns:
 //   - An error if the URL cannot be opened, otherwise nil.
 func OpenURL(url string) error {
-	log.Debugf("Opening URL in browser: %s (incognito=%v)", url, incognitoMode)
+	fmt.Printf("Attempting to open URL in browser: %s\n", url)
 
-	// If incognito mode is enabled, use platform-specific incognito commands
-	if incognitoMode {
-		log.Debug("Using incognito mode")
-		return openURLIncognito(url)
-	}
-
-	// Use pkg/browser for cross-platform support
-	err := pkgbrowser.OpenURL(url)
+	// Try using the open-golang library first
+	err := open.Run(url)
 	if err == nil {
-		log.Debug("Successfully opened URL using pkg/browser library")
+		log.Debug("Successfully opened URL using open-golang library")
 		return nil
 	}
 
-	log.Debugf("pkg/browser failed: %v, trying platform-specific commands", err)
+	log.Debugf("open-golang failed: %v, trying platform-specific commands", err)
 
 	// Fallback to platform-specific commands
 	return openURLPlatformSpecific(url)
@@ -119,379 +78,18 @@ func openURLPlatformSpecific(url string) error {
 	return nil
 }
 
-// openURLIncognito opens a URL in incognito/private browsing mode.
-// It first tries to detect the default browser and use its incognito flag.
-// Falls back to a chain of known browsers if detection fails.
-//
-// Parameters:
-//   - url: The URL to open.
-//
-// Returns:
-//   - An error if the URL cannot be opened, otherwise nil.
-func openURLIncognito(url string) error {
-	// First, try to detect and use the default browser
-	if cmd := tryDefaultBrowserIncognito(url); cmd != nil {
-		log.Debugf("Using detected default browser: %s %v", cmd.Path, cmd.Args[1:])
-		if err := cmd.Start(); err == nil {
-			storeBrowserProcess(cmd)
-			log.Debug("Successfully opened URL in default browser's incognito mode")
-			return nil
-		}
-		log.Debugf("Failed to start default browser, trying fallback chain")
-	}
-
-	// Fallback to known browser chain
-	cmd := tryFallbackBrowsersIncognito(url)
-	if cmd == nil {
-		log.Warn("No browser with incognito support found, falling back to normal mode")
-		return openURLPlatformSpecific(url)
-	}
-
-	log.Debugf("Running incognito command: %s %v", cmd.Path, cmd.Args[1:])
-	err := cmd.Start()
-	if err != nil {
-		log.Warnf("Failed to open incognito browser: %v, falling back to normal mode", err)
-		return openURLPlatformSpecific(url)
-	}
-
-	storeBrowserProcess(cmd)
-	log.Debug("Successfully opened URL in incognito/private mode")
-	return nil
-}
-
-// storeBrowserProcess safely stores the browser process for later cleanup.
-func storeBrowserProcess(cmd *exec.Cmd) {
-	browserMutex.Lock()
-	lastBrowserProcess = cmd
-	browserMutex.Unlock()
-}
-
-// tryDefaultBrowserIncognito attempts to detect the default browser and return
-// an exec.Cmd configured with the appropriate incognito flag.
-func tryDefaultBrowserIncognito(url string) *exec.Cmd {
-	switch runtime.GOOS {
-	case "darwin":
-		return tryDefaultBrowserMacOS(url)
-	case "windows":
-		return tryDefaultBrowserWindows(url)
-	case "linux":
-		return tryDefaultBrowserLinux(url)
-	}
-	return nil
-}
-
-// tryDefaultBrowserMacOS detects the default browser on macOS.
-func tryDefaultBrowserMacOS(url string) *exec.Cmd {
-	// Try to get default browser from Launch Services
-	out, err := exec.Command("defaults", "read", "com.apple.LaunchServices/com.apple.launchservices.secure", "LSHandlers").Output()
-	if err != nil {
-		return nil
-	}
-
-	output := string(out)
-	var browserName string
-
-	// Parse the output to find the http/https handler
-	if containsBrowserID(output, "com.google.chrome") {
-		browserName = "chrome"
-	} else if containsBrowserID(output, "org.mozilla.firefox") {
-		browserName = "firefox"
-	} else if containsBrowserID(output, "com.apple.safari") {
-		browserName = "safari"
-	} else if containsBrowserID(output, "com.brave.browser") {
-		browserName = "brave"
-	} else if containsBrowserID(output, "com.microsoft.edgemac") {
-		browserName = "edge"
-	}
-
-	return createMacOSIncognitoCmd(browserName, url)
-}
-
-// containsBrowserID checks if the LaunchServices output contains a browser ID.
-func containsBrowserID(output, bundleID string) bool {
-	return strings.Contains(output, bundleID)
-}
-
-// createMacOSIncognitoCmd creates the appropriate incognito command for macOS browsers.
-func createMacOSIncognitoCmd(browserName, url string) *exec.Cmd {
-	switch browserName {
-	case "chrome":
-		// Try direct path first
-		chromePath := "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
-		if _, err := exec.LookPath(chromePath); err == nil {
-			return exec.Command(chromePath, "--incognito", url)
-		}
-		return exec.Command("open", "-na", "Google Chrome", "--args", "--incognito", url)
-	case "firefox":
-		return exec.Command("open", "-na", "Firefox", "--args", "--private-window", url)
-	case "safari":
-		// Safari doesn't have CLI incognito, try AppleScript
-		return tryAppleScriptSafariPrivate(url)
-	case "brave":
-		return exec.Command("open", "-na", "Brave Browser", "--args", "--incognito", url)
-	case "edge":
-		return exec.Command("open", "-na", "Microsoft Edge", "--args", "--inprivate", url)
-	}
-	return nil
-}
-
-// tryAppleScriptSafariPrivate attempts to open Safari in private browsing mode using AppleScript.
-func tryAppleScriptSafariPrivate(url string) *exec.Cmd {
-	// AppleScript to open a new private window in Safari
-	script := fmt.Sprintf(`
-		tell application "Safari"
-			activate
-			tell application "System Events"
-				keystroke "n" using {command down, shift down}
-				delay 0.5
-			end tell
-			set URL of document 1 to "%s"
-		end tell
-	`, url)
-
-	cmd := exec.Command("osascript", "-e", script)
-	// Test if this approach works by checking if Safari is available
-	if _, err := exec.LookPath("/Applications/Safari.app/Contents/MacOS/Safari"); err != nil {
-		log.Debug("Safari not found, AppleScript private window not available")
-		return nil
-	}
-	log.Debug("Attempting Safari private window via AppleScript")
-	return cmd
-}
-
-// tryDefaultBrowserWindows detects the default browser on Windows via registry.
-func tryDefaultBrowserWindows(url string) *exec.Cmd {
-	// Query registry for default browser
-	out, err := exec.Command("reg", "query",
-		`HKEY_CURRENT_USER\Software\Microsoft\Windows\Shell\Associations\UrlAssociations\http\UserChoice`,
-		"/v", "ProgId").Output()
-	if err != nil {
-		return nil
-	}
-
-	output := string(out)
-	var browserName string
-
-	// Map ProgId to browser name
-	if strings.Contains(output, "ChromeHTML") {
-		browserName = "chrome"
-	} else if strings.Contains(output, "FirefoxURL") {
-		browserName = "firefox"
-	} else if strings.Contains(output, "MSEdgeHTM") {
-		browserName = "edge"
-	} else if strings.Contains(output, "BraveHTML") {
-		browserName = "brave"
-	}
-
-	return createWindowsIncognitoCmd(browserName, url)
-}
-
-// createWindowsIncognitoCmd creates the appropriate incognito command for Windows browsers.
-func createWindowsIncognitoCmd(browserName, url string) *exec.Cmd {
-	switch browserName {
-	case "chrome":
-		paths := []string{
-			"chrome",
-			`C:\Program Files\Google\Chrome\Application\chrome.exe`,
-			`C:\Program Files (x86)\Google\Chrome\Application\chrome.exe`,
-		}
-		for _, p := range paths {
-			if _, err := exec.LookPath(p); err == nil {
-				return exec.Command(p, "--incognito", url)
-			}
-		}
-	case "firefox":
-		if path, err := exec.LookPath("firefox"); err == nil {
-			return exec.Command(path, "--private-window", url)
-		}
-	case "edge":
-		paths := []string{
-			"msedge",
-			`C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe`,
-			`C:\Program Files\Microsoft\Edge\Application\msedge.exe`,
-		}
-		for _, p := range paths {
-			if _, err := exec.LookPath(p); err == nil {
-				return exec.Command(p, "--inprivate", url)
-			}
-		}
-	case "brave":
-		paths := []string{
-			`C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe`,
-			`C:\Program Files (x86)\BraveSoftware\Brave-Browser\Application\brave.exe`,
-		}
-		for _, p := range paths {
-			if _, err := exec.LookPath(p); err == nil {
-				return exec.Command(p, "--incognito", url)
-			}
-		}
-	}
-	return nil
-}
-
-// tryDefaultBrowserLinux detects the default browser on Linux using xdg-settings.
-func tryDefaultBrowserLinux(url string) *exec.Cmd {
-	out, err := exec.Command("xdg-settings", "get", "default-web-browser").Output()
-	if err != nil {
-		return nil
-	}
-
-	desktop := string(out)
-	var browserName string
-
-	// Map .desktop file to browser name
-	if strings.Contains(desktop, "google-chrome") || strings.Contains(desktop, "chrome") {
-		browserName = "chrome"
-	} else if strings.Contains(desktop, "firefox") {
-		browserName = "firefox"
-	} else if strings.Contains(desktop, "chromium") {
-		browserName = "chromium"
-	} else if strings.Contains(desktop, "brave") {
-		browserName = "brave"
-	} else if strings.Contains(desktop, "microsoft-edge") || strings.Contains(desktop, "msedge") {
-		browserName = "edge"
-	}
-
-	return createLinuxIncognitoCmd(browserName, url)
-}
-
-// createLinuxIncognitoCmd creates the appropriate incognito command for Linux browsers.
-func createLinuxIncognitoCmd(browserName, url string) *exec.Cmd {
-	switch browserName {
-	case "chrome":
-		paths := []string{"google-chrome", "google-chrome-stable"}
-		for _, p := range paths {
-			if path, err := exec.LookPath(p); err == nil {
-				return exec.Command(path, "--incognito", url)
-			}
-		}
-	case "firefox":
-		paths := []string{"firefox", "firefox-esr"}
-		for _, p := range paths {
-			if path, err := exec.LookPath(p); err == nil {
-				return exec.Command(path, "--private-window", url)
-			}
-		}
-	case "chromium":
-		paths := []string{"chromium", "chromium-browser"}
-		for _, p := range paths {
-			if path, err := exec.LookPath(p); err == nil {
-				return exec.Command(path, "--incognito", url)
-			}
-		}
-	case "brave":
-		if path, err := exec.LookPath("brave-browser"); err == nil {
-			return exec.Command(path, "--incognito", url)
-		}
-	case "edge":
-		if path, err := exec.LookPath("microsoft-edge"); err == nil {
-			return exec.Command(path, "--inprivate", url)
-		}
-	}
-	return nil
-}
-
-// tryFallbackBrowsersIncognito tries a chain of known browsers as fallback.
-func tryFallbackBrowsersIncognito(url string) *exec.Cmd {
-	switch runtime.GOOS {
-	case "darwin":
-		return tryFallbackBrowsersMacOS(url)
-	case "windows":
-		return tryFallbackBrowsersWindows(url)
-	case "linux":
-		return tryFallbackBrowsersLinuxChain(url)
-	}
-	return nil
-}
-
-// tryFallbackBrowsersMacOS tries known browsers on macOS.
-func tryFallbackBrowsersMacOS(url string) *exec.Cmd {
-	// Try Chrome
-	chromePath := "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
-	if _, err := exec.LookPath(chromePath); err == nil {
-		return exec.Command(chromePath, "--incognito", url)
-	}
-	// Try Firefox
-	if _, err := exec.LookPath("/Applications/Firefox.app/Contents/MacOS/firefox"); err == nil {
-		return exec.Command("open", "-na", "Firefox", "--args", "--private-window", url)
-	}
-	// Try Brave
-	if _, err := exec.LookPath("/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"); err == nil {
-		return exec.Command("open", "-na", "Brave Browser", "--args", "--incognito", url)
-	}
-	// Try Edge
-	if _, err := exec.LookPath("/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"); err == nil {
-		return exec.Command("open", "-na", "Microsoft Edge", "--args", "--inprivate", url)
-	}
-	// Last resort: try Safari with AppleScript
-	if cmd := tryAppleScriptSafariPrivate(url); cmd != nil {
-		log.Info("Using Safari with AppleScript for private browsing (may require accessibility permissions)")
-		return cmd
-	}
-	return nil
-}
-
-// tryFallbackBrowsersWindows tries known browsers on Windows.
-func tryFallbackBrowsersWindows(url string) *exec.Cmd {
-	// Chrome
-	chromePaths := []string{
-		"chrome",
-		`C:\Program Files\Google\Chrome\Application\chrome.exe`,
-		`C:\Program Files (x86)\Google\Chrome\Application\chrome.exe`,
-	}
-	for _, p := range chromePaths {
-		if _, err := exec.LookPath(p); err == nil {
-			return exec.Command(p, "--incognito", url)
-		}
-	}
-	// Firefox
-	if path, err := exec.LookPath("firefox"); err == nil {
-		return exec.Command(path, "--private-window", url)
-	}
-	// Edge (usually available on Windows 10+)
-	edgePaths := []string{
-		"msedge",
-		`C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe`,
-		`C:\Program Files\Microsoft\Edge\Application\msedge.exe`,
-	}
-	for _, p := range edgePaths {
-		if _, err := exec.LookPath(p); err == nil {
-			return exec.Command(p, "--inprivate", url)
-		}
-	}
-	return nil
-}
-
-// tryFallbackBrowsersLinuxChain tries known browsers on Linux.
-func tryFallbackBrowsersLinuxChain(url string) *exec.Cmd {
-	type browserConfig struct {
-		name string
-		flag string
-	}
-	browsers := []browserConfig{
-		{"google-chrome", "--incognito"},
-		{"google-chrome-stable", "--incognito"},
-		{"chromium", "--incognito"},
-		{"chromium-browser", "--incognito"},
-		{"firefox", "--private-window"},
-		{"firefox-esr", "--private-window"},
-		{"brave-browser", "--incognito"},
-		{"microsoft-edge", "--inprivate"},
-	}
-	for _, b := range browsers {
-		if path, err := exec.LookPath(b.name); err == nil {
-			return exec.Command(path, b.flag, url)
-		}
-	}
-	return nil
-}
-
 // IsAvailable checks if the system has a command available to open a web browser.
 // It verifies the presence of necessary commands for the current operating system.
 //
 // Returns:
 //   - true if a browser can be opened, false otherwise.
 func IsAvailable() bool {
+	// First check if open-golang can work
+	testErr := open.Run("about:blank")
+	if testErr == nil {
+		return true
+	}
+
 	// Check platform-specific commands
 	switch runtime.GOOS {
 	case "darwin":
diff --git a/internal/cache/signature_cache.go b/internal/cache/signature_cache.go
index af5371bfbc..e15b0802ae 100644
--- a/internal/cache/signature_cache.go
+++ b/internal/cache/signature_cache.go
@@ -6,6 +6,8 @@ import (
 	"strings"
 	"sync"
 	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 )
 
 // SignatureEntry holds a cached thinking signature with timestamp
@@ -184,6 +186,7 @@ func HasValidSignature(modelName, signature string) bool {
 }
 
 func GetModelGroup(modelName string) string {
+	// Fast path: check model name patterns first
 	if strings.Contains(modelName, "gpt") {
 		return "gpt"
 	} else if strings.Contains(modelName, "claude") {
@@ -191,5 +194,21 @@ func GetModelGroup(modelName string) string {
 	} else if strings.Contains(modelName, "gemini") {
 		return "gemini"
 	}
+
+	// Slow path: check registry for provider-based grouping
+	// This handles models registered via claude-api-key, gemini-api-key, etc.
+	// that don't have provider name in their model name (e.g., kimi-k2.5 via claude-api-key)
+	if providers := registry.GetGlobalRegistry().GetModelProviders(modelName); len(providers) > 0 {
+		provider := strings.ToLower(providers[0])
+		switch provider {
+		case "claude":
+			return "claude"
+		case "gemini", "gemini-cli", "aistudio", "vertex", "antigravity":
+			return "gemini"
+		case "codex":
+			return "gpt"
+		}
+	}
+
 	return modelName
 }
diff --git a/internal/cache/signature_cache_test.go b/internal/cache/signature_cache_test.go
index 8340815934..af4361f9aa 100644
--- a/internal/cache/signature_cache_test.go
+++ b/internal/cache/signature_cache_test.go
@@ -208,3 +208,84 @@ func TestCacheSignature_ExpirationLogic(t *testing.T) {
 	// but the logic is verified by the implementation
 	_ = time.Now() // Acknowledge we're not testing time passage
 }
+
+// === GetModelGroup Tests ===
+// These tests verify that GetModelGroup correctly identifies model groups
+// both by name pattern (fast path) and by registry provider lookup (slow path).
+
+func TestGetModelGroup_ByNamePattern(t *testing.T) {
+	tests := []struct {
+		modelName     string
+		expectedGroup string
+	}{
+		{"gpt-4o", "gpt"},
+		{"gpt-4-turbo", "gpt"},
+		{"claude-sonnet-4-20250514", "claude"},
+		{"claude-opus-4-5-thinking", "claude"},
+		{"gemini-2.5-pro", "gemini"},
+		{"gemini-3-pro-preview", "gemini"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.modelName, func(t *testing.T) {
+			result := GetModelGroup(tt.modelName)
+			if result != tt.expectedGroup {
+				t.Errorf("GetModelGroup(%q) = %q, expected %q", tt.modelName, result, tt.expectedGroup)
+			}
+		})
+	}
+}
+
+func TestGetModelGroup_UnknownModel(t *testing.T) {
+	// For unknown models with no registry entry, should return the model name itself
+	result := GetModelGroup("unknown-model-xyz")
+	if result != "unknown-model-xyz" {
+		t.Errorf("GetModelGroup for unknown model should return model name, got %q", result)
+	}
+}
+
+// TestGetModelGroup_RegistryFallback tests that models registered via
+// provider-specific API keys (e.g., kimi-k2.5 via claude-api-key) are
+// correctly grouped by their provider.
+// This test requires a populated global registry.
+func TestGetModelGroup_RegistryFallback(t *testing.T) {
+	// This test only makes sense when the global registry is populated
+	// In unit test context, skip if registry is empty
+	
+	// Example: kimi-k2.5 registered via claude-api-key should group as "claude"
+	// The model name doesn't contain "claude", so name pattern matching fails.
+	// The registry should be checked to find the provider.
+	
+	// Skip for now - this requires integration test setup
+	t.Skip("Requires populated global registry - run as integration test")
+}
+
+// === Cross-Model Signature Validation Tests ===
+// These tests verify that signatures cached under one model name can be
+// validated under mapped model names (same provider group).
+
+func TestCacheSignature_CrossModelValidation(t *testing.T) {
+	ClearSignatureCache("")
+
+	// Original request uses "claude-opus-4-5-20251101"
+	originalModel := "claude-opus-4-5-20251101"
+	// Mapped model is "claude-opus-4-5-thinking"
+	mappedModel := "claude-opus-4-5-thinking"
+	
+	text := "Some thinking block content"
+	sig := "validSignature123456789012345678901234567890123456789012"
+
+	// Cache signature under the original model
+	CacheSignature(originalModel, text, sig)
+
+	// Both should return the same signature because they're in the same group
+	retrieved1 := GetCachedSignature(originalModel, text)
+	retrieved2 := GetCachedSignature(mappedModel, text)
+
+	if retrieved1 != sig {
+		t.Errorf("Original model signature mismatch: got %q", retrieved1)
+	}
+	if retrieved2 != sig {
+		t.Errorf("Mapped model signature mismatch: got %q", retrieved2)
+	}
+}
diff --git a/internal/cmd/anthropic_login.go b/internal/cmd/anthropic_login.go
index f7381461a6..dafdd02ba2 100644
--- a/internal/cmd/anthropic_login.go
+++ b/internal/cmd/anthropic_login.go
@@ -40,7 +40,8 @@ func DoClaudeLogin(cfg *config.Config, options *LoginOptions) {
 
 	_, savedPath, err := manager.Login(context.Background(), "claude", cfg, authOpts)
 	if err != nil {
-		if authErr, ok := errors.AsType[*claude.AuthenticationError](err); ok {
+		var authErr *claude.AuthenticationError
+		if errors.As(err, &authErr) {
 			log.Error(claude.GetUserFriendlyMessage(authErr))
 			if authErr.Type == claude.ErrPortInUse.Type {
 				os.Exit(claude.ErrPortInUse.Code)
diff --git a/internal/cmd/auth_manager.go b/internal/cmd/auth_manager.go
index 2a3407be49..e6caa95438 100644
--- a/internal/cmd/auth_manager.go
+++ b/internal/cmd/auth_manager.go
@@ -6,7 +6,7 @@ import (
 
 // newAuthManager creates a new authentication manager instance with all supported
 // authenticators and a file-based token store. It initializes authenticators for
-// Gemini, Codex, Claude, Qwen, IFlow, Antigravity, and GitHub Copilot providers.
+// Gemini, Codex, Claude, and Qwen providers.
 //
 // Returns:
 //   - *sdkAuth.Manager: A configured authentication manager instance
@@ -19,10 +19,6 @@ func newAuthManager() *sdkAuth.Manager {
 		sdkAuth.NewQwenAuthenticator(),
 		sdkAuth.NewIFlowAuthenticator(),
 		sdkAuth.NewAntigravityAuthenticator(),
-		sdkAuth.NewKimiAuthenticator(),
-		sdkAuth.NewKiroAuthenticator(),
-		sdkAuth.NewGitHubCopilotAuthenticator(),
-		sdkAuth.NewKiloAuthenticator(),
 	)
 	return manager
 }
diff --git a/internal/cmd/iflow_login.go b/internal/cmd/iflow_login.go
index 49e18e5b73..07360b8c68 100644
--- a/internal/cmd/iflow_login.go
+++ b/internal/cmd/iflow_login.go
@@ -32,7 +32,8 @@ func DoIFlowLogin(cfg *config.Config, options *LoginOptions) {
 
 	_, savedPath, err := manager.Login(context.Background(), "iflow", cfg, authOpts)
 	if err != nil {
-		if emailErr, ok := errors.AsType[*sdkAuth.EmailRequiredError](err); ok {
+		var emailErr *sdkAuth.EmailRequiredError
+		if errors.As(err, &emailErr) {
 			log.Error(emailErr.Error())
 			return
 		}
diff --git a/internal/cmd/login.go b/internal/cmd/login.go
index 1d8a1ae336..b5129cfd1a 100644
--- a/internal/cmd/login.go
+++ b/internal/cmd/login.go
@@ -100,74 +100,49 @@ func DoLogin(cfg *config.Config, projectID string, options *LoginOptions) {
 
 	log.Info("Authentication successful.")
 
-	var activatedProjects []string
-
-	useGoogleOne := false
-	if trimmedProjectID == "" && promptFn != nil {
-		fmt.Println("\nSelect login mode:")
-		fmt.Println("  1. Code Assist  (GCP project, manual selection)")
-		fmt.Println("  2. Google One   (personal account, auto-discover project)")
-		choice, errPrompt := promptFn("Enter choice [1/2] (default: 1): ")
-		if errPrompt == nil && strings.TrimSpace(choice) == "2" {
-			useGoogleOne = true
-		}
+	projects, errProjects := fetchGCPProjects(ctx, httpClient)
+	if errProjects != nil {
+		log.Errorf("Failed to get project list: %v", errProjects)
+		return
 	}
 
-	if useGoogleOne {
-		log.Info("Google One mode: auto-discovering project...")
-		if errSetup := performGeminiCLISetup(ctx, httpClient, storage, ""); errSetup != nil {
-			log.Errorf("Google One auto-discovery failed: %v", errSetup)
-			return
-		}
-		autoProject := strings.TrimSpace(storage.ProjectID)
-		if autoProject == "" {
-			log.Error("Google One auto-discovery returned empty project ID")
-			return
-		}
-		log.Infof("Auto-discovered project: %s", autoProject)
-		activatedProjects = []string{autoProject}
-	} else {
-		projects, errProjects := fetchGCPProjects(ctx, httpClient)
-		if errProjects != nil {
-			log.Errorf("Failed to get project list: %v", errProjects)
-			return
-		}
+	selectedProjectID := promptForProjectSelection(projects, trimmedProjectID, promptFn)
+	projectSelections, errSelection := resolveProjectSelections(selectedProjectID, projects)
+	if errSelection != nil {
+		log.Errorf("Invalid project selection: %v", errSelection)
+		return
+	}
+	if len(projectSelections) == 0 {
+		log.Error("No project selected; aborting login.")
+		return
+	}
 
-		selectedProjectID := promptForProjectSelection(projects, trimmedProjectID, promptFn)
-		projectSelections, errSelection := resolveProjectSelections(selectedProjectID, projects)
-		if errSelection != nil {
-			log.Errorf("Invalid project selection: %v", errSelection)
+	activatedProjects := make([]string, 0, len(projectSelections))
+	seenProjects := make(map[string]bool)
+	for _, candidateID := range projectSelections {
+		log.Infof("Activating project %s", candidateID)
+		if errSetup := performGeminiCLISetup(ctx, httpClient, storage, candidateID); errSetup != nil {
+			var projectErr *projectSelectionRequiredError
+			if errors.As(errSetup, &projectErr) {
+				log.Error("Failed to start user onboarding: A project ID is required.")
+				showProjectSelectionHelp(storage.Email, projects)
+				return
+			}
+			log.Errorf("Failed to complete user setup: %v", errSetup)
 			return
 		}
-		if len(projectSelections) == 0 {
-			log.Error("No project selected; aborting login.")
-			return
+		finalID := strings.TrimSpace(storage.ProjectID)
+		if finalID == "" {
+			finalID = candidateID
 		}
 
-		seenProjects := make(map[string]bool)
-		for _, candidateID := range projectSelections {
-			log.Infof("Activating project %s", candidateID)
-			if errSetup := performGeminiCLISetup(ctx, httpClient, storage, candidateID); errSetup != nil {
-				if _, ok := errors.AsType[*projectSelectionRequiredError](errSetup); ok {
-					log.Error("Failed to start user onboarding: A project ID is required.")
-					showProjectSelectionHelp(storage.Email, projects)
-					return
-				}
-				log.Errorf("Failed to complete user setup: %v", errSetup)
-				return
-			}
-			finalID := strings.TrimSpace(storage.ProjectID)
-			if finalID == "" {
-				finalID = candidateID
-			}
-
-			if seenProjects[finalID] {
-				log.Infof("Project %s already activated, skipping", finalID)
-				continue
-			}
-			seenProjects[finalID] = true
-			activatedProjects = append(activatedProjects, finalID)
+		// Skip duplicates
+		if seenProjects[finalID] {
+			log.Infof("Project %s already activated, skipping", finalID)
+			continue
 		}
+		seenProjects[finalID] = true
+		activatedProjects = append(activatedProjects, finalID)
 	}
 
 	storage.Auto = false
@@ -260,48 +235,7 @@ func performGeminiCLISetup(ctx context.Context, httpClient *http.Client, storage
 		}
 	}
 	if projectID == "" {
-		// Auto-discovery: try onboardUser without specifying a project
-		// to let Google auto-provision one (matches Gemini CLI headless behavior
-		// and Antigravity's FetchProjectID pattern).
-		autoOnboardReq := map[string]any{
-			"tierId":   tierID,
-			"metadata": metadata,
-		}
-
-		autoCtx, autoCancel := context.WithTimeout(ctx, 30*time.Second)
-		defer autoCancel()
-		for attempt := 1; ; attempt++ {
-			var onboardResp map[string]any
-			if errOnboard := callGeminiCLI(autoCtx, httpClient, "onboardUser", autoOnboardReq, &onboardResp); errOnboard != nil {
-				return fmt.Errorf("auto-discovery onboardUser: %w", errOnboard)
-			}
-
-			if done, okDone := onboardResp["done"].(bool); okDone && done {
-				if resp, okResp := onboardResp["response"].(map[string]any); okResp {
-					switch v := resp["cloudaicompanionProject"].(type) {
-					case string:
-						projectID = strings.TrimSpace(v)
-					case map[string]any:
-						if id, okID := v["id"].(string); okID {
-							projectID = strings.TrimSpace(id)
-						}
-					}
-				}
-				break
-			}
-
-			log.Debugf("Auto-discovery: onboarding in progress, attempt %d...", attempt)
-			select {
-			case <-autoCtx.Done():
-				return &projectSelectionRequiredError{}
-			case <-time.After(2 * time.Second):
-			}
-		}
-
-		if projectID == "" {
-			return &projectSelectionRequiredError{}
-		}
-		log.Infof("Auto-discovered project ID via onboarding: %s", projectID)
+		return &projectSelectionRequiredError{}
 	}
 
 	onboardReqBody := map[string]any{
@@ -683,7 +617,7 @@ func updateAuthRecord(record *cliproxyauth.Auth, storage *gemini.GeminiTokenStor
 		return
 	}
 
-	finalName := gemini.CredentialFileName(storage.Email, storage.ProjectID, true)
+	finalName := gemini.CredentialFileName(storage.Email, storage.ProjectID, false)
 
 	if record.Metadata == nil {
 		record.Metadata = make(map[string]any)
diff --git a/internal/cmd/openai_login.go b/internal/cmd/openai_login.go
index 783a948400..5f2fb162a8 100644
--- a/internal/cmd/openai_login.go
+++ b/internal/cmd/openai_login.go
@@ -54,7 +54,8 @@ func DoCodexLogin(cfg *config.Config, options *LoginOptions) {
 
 	_, savedPath, err := manager.Login(context.Background(), "codex", cfg, authOpts)
 	if err != nil {
-		if authErr, ok := errors.AsType[*codex.AuthenticationError](err); ok {
+		var authErr *codex.AuthenticationError
+		if errors.As(err, &authErr) {
 			log.Error(codex.GetUserFriendlyMessage(authErr))
 			if authErr.Type == codex.ErrPortInUse.Type {
 				os.Exit(codex.ErrPortInUse.Code)
diff --git a/internal/cmd/qwen_login.go b/internal/cmd/qwen_login.go
index 10179fa843..92a57aa5c4 100644
--- a/internal/cmd/qwen_login.go
+++ b/internal/cmd/qwen_login.go
@@ -44,7 +44,8 @@ func DoQwenLogin(cfg *config.Config, options *LoginOptions) {
 
 	_, savedPath, err := manager.Login(context.Background(), "qwen", cfg, authOpts)
 	if err != nil {
-		if emailErr, ok := errors.AsType[*sdkAuth.EmailRequiredError](err); ok {
+		var emailErr *sdkAuth.EmailRequiredError
+		if errors.As(err, &emailErr) {
 			log.Error(emailErr.Error())
 			return
 		}
diff --git a/internal/cmd/run.go b/internal/cmd/run.go
index d8c4f01938..1e9681266c 100644
--- a/internal/cmd/run.go
+++ b/internal/cmd/run.go
@@ -55,34 +55,6 @@ func StartService(cfg *config.Config, configPath string, localPassword string) {
 	}
 }
 
-// StartServiceBackground starts the proxy service in a background goroutine
-// and returns a cancel function for shutdown and a done channel.
-func StartServiceBackground(cfg *config.Config, configPath string, localPassword string) (cancel func(), done <-chan struct{}) {
-	builder := cliproxy.NewBuilder().
-		WithConfig(cfg).
-		WithConfigPath(configPath).
-		WithLocalManagementPassword(localPassword)
-
-	ctx, cancelFn := context.WithCancel(context.Background())
-	doneCh := make(chan struct{})
-
-	service, err := builder.Build()
-	if err != nil {
-		log.Errorf("failed to build proxy service: %v", err)
-		close(doneCh)
-		return cancelFn, doneCh
-	}
-
-	go func() {
-		defer close(doneCh)
-		if err := service.Run(ctx); err != nil && !errors.Is(err, context.Canceled) {
-			log.Errorf("proxy service exited with error: %v", err)
-		}
-	}()
-
-	return cancelFn, doneCh
-}
-
 // WaitForCloudDeploy waits indefinitely for shutdown signals in cloud deploy mode
 // when no configuration file is available.
 func WaitForCloudDeploy() {
diff --git a/internal/config/config.go b/internal/config/config.go
index eb8873844e..1352ffde48 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -18,10 +18,7 @@ import (
 	"gopkg.in/yaml.v3"
 )
 
-const (
-	DefaultPanelGitHubRepository = "https://github.com/router-for-me/Cli-Proxy-API-Management-Center"
-	DefaultPprofAddr             = "127.0.0.1:8316"
-)
+const DefaultPanelGitHubRepository = "https://github.com/router-for-me/Cli-Proxy-API-Management-Center"
 
 // Config represents the application's configuration, loaded from a YAML file.
 type Config struct {
@@ -44,9 +41,6 @@ type Config struct {
 	// Debug enables or disables debug-level logging and other debug features.
 	Debug bool `yaml:"debug" json:"debug"`
 
-	// Pprof config controls the optional pprof HTTP debug server.
-	Pprof PprofConfig `yaml:"pprof" json:"pprof"`
-
 	// CommercialMode disables high-overhead HTTP middleware features to minimize per-request memory usage.
 	CommercialMode bool `yaml:"commercial-mode" json:"commercial-mode"`
 
@@ -84,23 +78,12 @@ type Config struct {
 	// GeminiKey defines Gemini API key configurations with optional routing overrides.
 	GeminiKey []GeminiKey `yaml:"gemini-api-key" json:"gemini-api-key"`
 
-	// KiroKey defines a list of Kiro (AWS CodeWhisperer) configurations.
-	KiroKey []KiroKey `yaml:"kiro" json:"kiro"`
-
-	// KiroPreferredEndpoint sets the global default preferred endpoint for all Kiro providers.
-	// Values: "ide" (default, CodeWhisperer) or "cli" (Amazon Q).
-	KiroPreferredEndpoint string `yaml:"kiro-preferred-endpoint" json:"kiro-preferred-endpoint"`
-
 	// Codex defines a list of Codex API key configurations as specified in the YAML configuration file.
 	CodexKey []CodexKey `yaml:"codex-api-key" json:"codex-api-key"`
 
 	// ClaudeKey defines a list of Claude API key configurations as specified in the YAML configuration file.
 	ClaudeKey []ClaudeKey `yaml:"claude-api-key" json:"claude-api-key"`
 
-	// ClaudeHeaderDefaults configures default header values for Claude API requests.
-	// These are used as fallbacks when the client does not send its own headers.
-	ClaudeHeaderDefaults ClaudeHeaderDefaults `yaml:"claude-header-defaults" json:"claude-header-defaults"`
-
 	// OpenAICompatibility defines OpenAI API compatibility configurations for external providers.
 	OpenAICompatibility []OpenAICompatibility `yaml:"openai-compatibility" json:"openai-compatibility"`
 
@@ -112,12 +95,11 @@ type Config struct {
 	AmpCode AmpCode `yaml:"ampcode" json:"ampcode"`
 
 	// OAuthExcludedModels defines per-provider global model exclusions applied to OAuth/file-backed auth entries.
-	// Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot.
 	OAuthExcludedModels map[string][]string `yaml:"oauth-excluded-models,omitempty" json:"oauth-excluded-models,omitempty"`
 
 	// OAuthModelAlias defines global model name aliases for OAuth/file-backed auth channels.
 	// These aliases affect both model listing and model routing for supported channels:
-	// gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot.
+	// gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
 	//
 	// NOTE: This does not apply to existing per-credential model alias features under:
 	// gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, and ampcode.
@@ -126,23 +108,9 @@ type Config struct {
 	// Payload defines default and override rules for provider payload parameters.
 	Payload PayloadConfig `yaml:"payload" json:"payload"`
 
-	// IncognitoBrowser enables opening OAuth URLs in incognito/private browsing mode.
-	// This is useful when you want to login with a different account without logging out
-	// from your current session. Default: false.
-	IncognitoBrowser bool `yaml:"incognito-browser" json:"incognito-browser"`
-
 	legacyMigrationPending bool `yaml:"-" json:"-"`
 }
 
-// ClaudeHeaderDefaults configures default header values injected into Claude API requests
-// when the client does not send them. Update these when Claude Code releases a new version.
-type ClaudeHeaderDefaults struct {
-	UserAgent      string `yaml:"user-agent" json:"user-agent"`
-	PackageVersion string `yaml:"package-version" json:"package-version"`
-	RuntimeVersion string `yaml:"runtime-version" json:"runtime-version"`
-	Timeout        string `yaml:"timeout" json:"timeout"`
-}
-
 // TLSConfig holds HTTPS server settings.
 type TLSConfig struct {
 	// Enable toggles HTTPS server mode.
@@ -153,14 +121,6 @@ type TLSConfig struct {
 	Key string `yaml:"key" json:"key"`
 }
 
-// PprofConfig holds pprof HTTP server settings.
-type PprofConfig struct {
-	// Enable toggles the pprof HTTP debug server.
-	Enable bool `yaml:"enable" json:"enable"`
-	// Addr is the host:port address for the pprof HTTP server.
-	Addr string `yaml:"addr" json:"addr"`
-}
-
 // RemoteManagement holds management API configuration under 'remote-management'.
 type RemoteManagement struct {
 	// AllowRemote toggles remote (non-localhost) access to management API.
@@ -314,10 +274,6 @@ type CloakConfig struct {
 	// SensitiveWords is a list of words to obfuscate with zero-width characters.
 	// This can help bypass certain content filters.
 	SensitiveWords []string `yaml:"sensitive-words,omitempty" json:"sensitive-words,omitempty"`
-
-	// CacheUserID controls whether Claude user_id values are cached per API key.
-	// When false, a fresh random user_id is generated for every request.
-	CacheUserID *bool `yaml:"cache-user-id,omitempty" json:"cache-user-id,omitempty"`
 }
 
 // ClaudeKey represents the configuration for a Claude API key,
@@ -385,9 +341,6 @@ type CodexKey struct {
 	// If empty, the default Codex API URL will be used.
 	BaseURL string `yaml:"base-url" json:"base-url"`
 
-	// Websockets enables the Responses API websocket transport for this credential.
-	Websockets bool `yaml:"websockets,omitempty" json:"websockets,omitempty"`
-
 	// ProxyURL overrides the global proxy setting for this API key if provided.
 	ProxyURL string `yaml:"proxy-url" json:"proxy-url"`
 
@@ -460,35 +413,6 @@ type GeminiModel struct {
 func (m GeminiModel) GetName() string  { return m.Name }
 func (m GeminiModel) GetAlias() string { return m.Alias }
 
-// KiroKey represents the configuration for Kiro (AWS CodeWhisperer) authentication.
-type KiroKey struct {
-	// TokenFile is the path to the Kiro token file (default: ~/.aws/sso/cache/kiro-auth-token.json)
-	TokenFile string `yaml:"token-file,omitempty" json:"token-file,omitempty"`
-
-	// AccessToken is the OAuth access token for direct configuration.
-	AccessToken string `yaml:"access-token,omitempty" json:"access-token,omitempty"`
-
-	// RefreshToken is the OAuth refresh token for token renewal.
-	RefreshToken string `yaml:"refresh-token,omitempty" json:"refresh-token,omitempty"`
-
-	// ProfileArn is the AWS CodeWhisperer profile ARN.
-	ProfileArn string `yaml:"profile-arn,omitempty" json:"profile-arn,omitempty"`
-
-	// Region is the AWS region (default: us-east-1).
-	Region string `yaml:"region,omitempty" json:"region,omitempty"`
-
-	// ProxyURL optionally overrides the global proxy for this configuration.
-	ProxyURL string `yaml:"proxy-url,omitempty" json:"proxy-url,omitempty"`
-
-	// AgentTaskType sets the Kiro API task type. Known values: "vibe", "dev", "chat".
-	// Leave empty to let API use defaults. Different values may inject different system prompts.
-	AgentTaskType string `yaml:"agent-task-type,omitempty" json:"agent-task-type,omitempty"`
-
-	// PreferredEndpoint sets the preferred Kiro API endpoint/quota.
-	// Values: "codewhisperer" (default, IDE quota) or "amazonq" (CLI quota).
-	PreferredEndpoint string `yaml:"preferred-endpoint,omitempty" json:"preferred-endpoint,omitempty"`
-}
-
 // OpenAICompatibility represents the configuration for OpenAI API compatibility
 // with external providers, allowing model aliases to be routed through OpenAI API format.
 type OpenAICompatibility struct {
@@ -555,15 +479,14 @@ func LoadConfig(configFile string) (*Config, error) {
 // If optional is true and the file is missing, it returns an empty Config.
 // If optional is true and the file is empty or invalid, it returns an empty Config.
 func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
-	// NOTE: Startup oauth-model-alias migration is intentionally disabled.
-	// Reason: avoid mutating config.yaml during server startup.
-	// Re-enable the block below if automatic startup migration is needed again.
-	// if migrated, err := MigrateOAuthModelAlias(configFile); err != nil {
-	// 	// Log warning but don't fail - config loading should still work
-	// 	fmt.Printf("Warning: oauth-model-alias migration failed: %v\n", err)
-	// } else if migrated {
-	// 	fmt.Println("Migrated oauth-model-mappings to oauth-model-alias")
-	// }
+	// Perform oauth-model-alias migration before loading config.
+	// This migrates oauth-model-mappings to oauth-model-alias if needed.
+	if migrated, err := MigrateOAuthModelAlias(configFile); err != nil {
+		// Log warning but don't fail - config loading should still work
+		fmt.Printf("Warning: oauth-model-alias migration failed: %v\n", err)
+	} else if migrated {
+		fmt.Println("Migrated oauth-model-mappings to oauth-model-alias")
+	}
 
 	// Read the entire configuration file into memory.
 	data, err := os.ReadFile(configFile)
@@ -591,11 +514,8 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	cfg.ErrorLogsMaxFiles = 10
 	cfg.UsageStatisticsEnabled = false
 	cfg.DisableCooling = false
-	cfg.Pprof.Enable = false
-	cfg.Pprof.Addr = DefaultPprofAddr
 	cfg.AmpCode.RestrictManagementToLocalhost = false // Default to false: API key auth is sufficient
 	cfg.RemoteManagement.PanelGitHubRepository = DefaultPanelGitHubRepository
-	cfg.IncognitoBrowser = false // Default to normal browser (AWS uses incognito by force)
 	if err = yaml.Unmarshal(data, &cfg); err != nil {
 		if optional {
 			// In cloud deploy mode, if YAML parsing fails, return empty config instead of error.
@@ -604,21 +524,18 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 		return nil, fmt.Errorf("failed to parse config file: %w", err)
 	}
 
-	// NOTE: Startup legacy key migration is intentionally disabled.
-	// Reason: avoid mutating config.yaml during server startup.
-	// Re-enable the block below if automatic startup migration is needed again.
-	// var legacy legacyConfigData
-	// if errLegacy := yaml.Unmarshal(data, &legacy); errLegacy == nil {
-	// 	if cfg.migrateLegacyGeminiKeys(legacy.LegacyGeminiKeys) {
-	// 		cfg.legacyMigrationPending = true
-	// 	}
-	// 	if cfg.migrateLegacyOpenAICompatibilityKeys(legacy.OpenAICompat) {
-	// 		cfg.legacyMigrationPending = true
-	// 	}
-	// 	if cfg.migrateLegacyAmpConfig(&legacy) {
-	// 		cfg.legacyMigrationPending = true
-	// 	}
-	// }
+	var legacy legacyConfigData
+	if errLegacy := yaml.Unmarshal(data, &legacy); errLegacy == nil {
+		if cfg.migrateLegacyGeminiKeys(legacy.LegacyGeminiKeys) {
+			cfg.legacyMigrationPending = true
+		}
+		if cfg.migrateLegacyOpenAICompatibilityKeys(legacy.OpenAICompat) {
+			cfg.legacyMigrationPending = true
+		}
+		if cfg.migrateLegacyAmpConfig(&legacy) {
+			cfg.legacyMigrationPending = true
+		}
+	}
 
 	// Hash remote management key if plaintext is detected (nested)
 	// We consider a value to be already hashed if it looks like a bcrypt hash ($2a$, $2b$, or $2y$ prefix).
@@ -639,11 +556,6 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 		cfg.RemoteManagement.PanelGitHubRepository = DefaultPanelGitHubRepository
 	}
 
-	cfg.Pprof.Addr = strings.TrimSpace(cfg.Pprof.Addr)
-	if cfg.Pprof.Addr == "" {
-		cfg.Pprof.Addr = DefaultPprofAddr
-	}
-
 	if cfg.LogsMaxTotalSizeMB < 0 {
 		cfg.LogsMaxTotalSizeMB = 0
 	}
@@ -652,6 +564,9 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 		cfg.ErrorLogsMaxFiles = 10
 	}
 
+	// Sync request authentication providers with inline API keys for backwards compatibility.
+	syncInlineAccessProvider(&cfg)
+
 	// Sanitize Gemini API key configuration and migrate legacy entries.
 	cfg.SanitizeGeminiKeys()
 
@@ -664,9 +579,6 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	// Sanitize Claude key headers
 	cfg.SanitizeClaudeKeys()
 
-	// Sanitize Kiro keys: trim whitespace from credential fields
-	cfg.SanitizeKiroKeys()
-
 	// Sanitize OpenAI compatibility providers: drop entries without base-url
 	cfg.SanitizeOpenAICompatibility()
 
@@ -679,20 +591,17 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
 	// Validate raw payload rules and drop invalid entries.
 	cfg.SanitizePayloadRules()
 
-	// NOTE: Legacy migration persistence is intentionally disabled together with
-	// startup legacy migration to keep startup read-only for config.yaml.
-	// Re-enable the block below if automatic startup migration is needed again.
-	// if cfg.legacyMigrationPending {
-	// 	fmt.Println("Detected legacy configuration keys, attempting to persist the normalized config...")
-	// 	if !optional && configFile != "" {
-	// 		if err := SaveConfigPreserveComments(configFile, &cfg); err != nil {
-	// 			return nil, fmt.Errorf("failed to persist migrated legacy config: %w", err)
-	// 		}
-	// 		fmt.Println("Legacy configuration normalized and persisted.")
-	// 	} else {
-	// 		fmt.Println("Legacy configuration normalized in memory; persistence skipped.")
-	// 	}
-	// }
+	if cfg.legacyMigrationPending {
+		fmt.Println("Detected legacy configuration keys, attempting to persist the normalized config...")
+		if !optional && configFile != "" {
+			if err := SaveConfigPreserveComments(configFile, &cfg); err != nil {
+				return nil, fmt.Errorf("failed to persist migrated legacy config: %w", err)
+			}
+			fmt.Println("Legacy configuration normalized and persisted.")
+		} else {
+			fmt.Println("Legacy configuration normalized in memory; persistence skipped.")
+		}
+	}
 
 	// Return the populated configuration struct.
 	return &cfg, nil
@@ -756,46 +665,14 @@ func payloadRawString(value any) ([]byte, bool) {
 // SanitizeOAuthModelAlias normalizes and deduplicates global OAuth model name aliases.
 // It trims whitespace, normalizes channel keys to lower-case, drops empty entries,
 // allows multiple aliases per upstream name, and ensures aliases are unique within each channel.
-// It also injects default aliases for channels that have built-in defaults (e.g., kiro)
-// when no user-configured aliases exist for those channels.
 func (cfg *Config) SanitizeOAuthModelAlias() {
-	if cfg == nil {
-		return
-	}
-
-	// Inject channel defaults when the channel is absent in user config.
-	// Presence is checked case-insensitively and includes explicit nil/empty markers.
-	if cfg.OAuthModelAlias == nil {
-		cfg.OAuthModelAlias = make(map[string][]OAuthModelAlias)
-	}
-	hasChannel := func(channel string) bool {
-		for k := range cfg.OAuthModelAlias {
-			if strings.EqualFold(strings.TrimSpace(k), channel) {
-				return true
-			}
-		}
-		return false
-	}
-	if !hasChannel("kiro") {
-		cfg.OAuthModelAlias["kiro"] = defaultKiroAliases()
-	}
-	if !hasChannel("github-copilot") {
-		cfg.OAuthModelAlias["github-copilot"] = defaultGitHubCopilotAliases()
-	}
-
-	if len(cfg.OAuthModelAlias) == 0 {
+	if cfg == nil || len(cfg.OAuthModelAlias) == 0 {
 		return
 	}
 	out := make(map[string][]OAuthModelAlias, len(cfg.OAuthModelAlias))
 	for rawChannel, aliases := range cfg.OAuthModelAlias {
 		channel := strings.ToLower(strings.TrimSpace(rawChannel))
-		if channel == "" {
-			continue
-		}
-		// Preserve channels that were explicitly set to empty/nil – they act
-		// as "disabled" markers so default injection won't re-add them (#222).
-		if len(aliases) == 0 {
-			out[channel] = nil
+		if channel == "" || len(aliases) == 0 {
 			continue
 		}
 		seenAlias := make(map[string]struct{}, len(aliases))
@@ -880,23 +757,6 @@ func (cfg *Config) SanitizeClaudeKeys() {
 	}
 }
 
-// SanitizeKiroKeys trims whitespace from Kiro credential fields.
-func (cfg *Config) SanitizeKiroKeys() {
-	if cfg == nil || len(cfg.KiroKey) == 0 {
-		return
-	}
-	for i := range cfg.KiroKey {
-		entry := &cfg.KiroKey[i]
-		entry.TokenFile = strings.TrimSpace(entry.TokenFile)
-		entry.AccessToken = strings.TrimSpace(entry.AccessToken)
-		entry.RefreshToken = strings.TrimSpace(entry.RefreshToken)
-		entry.ProfileArn = strings.TrimSpace(entry.ProfileArn)
-		entry.Region = strings.TrimSpace(entry.Region)
-		entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
-		entry.PreferredEndpoint = strings.TrimSpace(entry.PreferredEndpoint)
-	}
-}
-
 // SanitizeGeminiKeys deduplicates and normalizes Gemini credentials.
 func (cfg *Config) SanitizeGeminiKeys() {
 	if cfg == nil {
@@ -937,6 +797,18 @@ func normalizeModelPrefix(prefix string) string {
 	return trimmed
 }
 
+func syncInlineAccessProvider(cfg *Config) {
+	if cfg == nil {
+		return
+	}
+	if len(cfg.APIKeys) == 0 {
+		if provider := cfg.ConfigAPIKeyProvider(); provider != nil && len(provider.APIKeys) > 0 {
+			cfg.APIKeys = append([]string(nil), provider.APIKeys...)
+		}
+	}
+	cfg.Access.Providers = nil
+}
+
 // looksLikeBcrypt returns true if the provided string appears to be a bcrypt hash.
 func looksLikeBcrypt(s string) bool {
 	return len(s) > 4 && (s[:4] == "$2a$" || s[:4] == "$2b$" || s[:4] == "$2y$")
@@ -1024,7 +896,7 @@ func hashSecret(secret string) (string, error) {
 // SaveConfigPreserveComments writes the config back to YAML while preserving existing comments
 // and key ordering by loading the original file into a yaml.Node tree and updating values in-place.
 func SaveConfigPreserveComments(configFile string, cfg *Config) error {
-	persistCfg := cfg
+	persistCfg := sanitizeConfigForPersist(cfg)
 	// Load original YAML as a node tree to preserve comments and ordering.
 	data, err := os.ReadFile(configFile)
 	if err != nil {
@@ -1092,6 +964,16 @@ func SaveConfigPreserveComments(configFile string, cfg *Config) error {
 	return err
 }
 
+func sanitizeConfigForPersist(cfg *Config) *Config {
+	if cfg == nil {
+		return nil
+	}
+	clone := *cfg
+	clone.SDKConfig = cfg.SDKConfig
+	clone.SDKConfig.Access = AccessConfig{}
+	return &clone
+}
+
 // SaveConfigPreserveCommentsUpdateNestedScalar updates a nested scalar key path like ["a","b"]
 // while preserving comments and positions.
 func SaveConfigPreserveCommentsUpdateNestedScalar(configFile string, path []string, value string) error {
@@ -1188,13 +1070,8 @@ func getOrCreateMapValue(mapNode *yaml.Node, key string) *yaml.Node {
 
 // mergeMappingPreserve merges keys from src into dst mapping node while preserving
 // key order and comments of existing keys in dst. New keys are only added if their
-// value is non-zero and not a known default to avoid polluting the config with defaults.
-func mergeMappingPreserve(dst, src *yaml.Node, path ...[]string) {
-	var currentPath []string
-	if len(path) > 0 {
-		currentPath = path[0]
-	}
-
+// value is non-zero to avoid polluting the config with defaults.
+func mergeMappingPreserve(dst, src *yaml.Node) {
 	if dst == nil || src == nil {
 		return
 	}
@@ -1208,19 +1085,16 @@ func mergeMappingPreserve(dst, src *yaml.Node, path ...[]string) {
 		sk := src.Content[i]
 		sv := src.Content[i+1]
 		idx := findMapKeyIndex(dst, sk.Value)
-		childPath := appendPath(currentPath, sk.Value)
 		if idx >= 0 {
 			// Merge into existing value node (always update, even to zero values)
 			dv := dst.Content[idx+1]
-			mergeNodePreserve(dv, sv, childPath)
+			mergeNodePreserve(dv, sv)
 		} else {
-			// New key: only add if value is non-zero and not a known default
-			candidate := deepCopyNode(sv)
-			pruneKnownDefaultsInNewNode(childPath, candidate)
-			if isKnownDefaultValue(childPath, candidate) {
+			// New key: only add if value is non-zero to avoid polluting config with defaults
+			if isZeroValueNode(sv) {
 				continue
 			}
-			dst.Content = append(dst.Content, deepCopyNode(sk), candidate)
+			dst.Content = append(dst.Content, deepCopyNode(sk), deepCopyNode(sv))
 		}
 	}
 }
@@ -1228,12 +1102,7 @@ func mergeMappingPreserve(dst, src *yaml.Node, path ...[]string) {
 // mergeNodePreserve merges src into dst for scalars, mappings and sequences while
 // reusing destination nodes to keep comments and anchors. For sequences, it updates
 // in-place by index.
-func mergeNodePreserve(dst, src *yaml.Node, path ...[]string) {
-	var currentPath []string
-	if len(path) > 0 {
-		currentPath = path[0]
-	}
-
+func mergeNodePreserve(dst, src *yaml.Node) {
 	if dst == nil || src == nil {
 		return
 	}
@@ -1242,7 +1111,7 @@ func mergeNodePreserve(dst, src *yaml.Node, path ...[]string) {
 		if dst.Kind != yaml.MappingNode {
 			copyNodeShallow(dst, src)
 		}
-		mergeMappingPreserve(dst, src, currentPath)
+		mergeMappingPreserve(dst, src)
 	case yaml.SequenceNode:
 		// Preserve explicit null style if dst was null and src is empty sequence
 		if dst.Kind == yaml.ScalarNode && dst.Tag == "!!null" && len(src.Content) == 0 {
@@ -1265,7 +1134,7 @@ func mergeNodePreserve(dst, src *yaml.Node, path ...[]string) {
 				dst.Content[i] = deepCopyNode(src.Content[i])
 				continue
 			}
-			mergeNodePreserve(dst.Content[i], src.Content[i], currentPath)
+			mergeNodePreserve(dst.Content[i], src.Content[i])
 			if dst.Content[i] != nil && src.Content[i] != nil &&
 				dst.Content[i].Kind == yaml.MappingNode && src.Content[i].Kind == yaml.MappingNode {
 				pruneMissingMapKeys(dst.Content[i], src.Content[i])
@@ -1307,94 +1176,6 @@ func findMapKeyIndex(mapNode *yaml.Node, key string) int {
 	return -1
 }
 
-// appendPath appends a key to the path, returning a new slice to avoid modifying the original.
-func appendPath(path []string, key string) []string {
-	if len(path) == 0 {
-		return []string{key}
-	}
-	newPath := make([]string, len(path)+1)
-	copy(newPath, path)
-	newPath[len(path)] = key
-	return newPath
-}
-
-// isKnownDefaultValue returns true if the given node at the specified path
-// represents a known default value that should not be written to the config file.
-// This prevents non-zero defaults from polluting the config.
-func isKnownDefaultValue(path []string, node *yaml.Node) bool {
-	// First check if it's a zero value
-	if isZeroValueNode(node) {
-		return true
-	}
-
-	// Match known non-zero defaults by exact dotted path.
-	if len(path) == 0 {
-		return false
-	}
-
-	fullPath := strings.Join(path, ".")
-
-	// Check string defaults
-	if node.Kind == yaml.ScalarNode && node.Tag == "!!str" {
-		switch fullPath {
-		case "pprof.addr":
-			return node.Value == DefaultPprofAddr
-		case "remote-management.panel-github-repository":
-			return node.Value == DefaultPanelGitHubRepository
-		case "routing.strategy":
-			return node.Value == "round-robin"
-		}
-	}
-
-	// Check integer defaults
-	if node.Kind == yaml.ScalarNode && node.Tag == "!!int" {
-		switch fullPath {
-		case "error-logs-max-files":
-			return node.Value == "10"
-		}
-	}
-
-	return false
-}
-
-// pruneKnownDefaultsInNewNode removes default-valued descendants from a new node
-// before it is appended into the destination YAML tree.
-func pruneKnownDefaultsInNewNode(path []string, node *yaml.Node) {
-	if node == nil {
-		return
-	}
-
-	switch node.Kind {
-	case yaml.MappingNode:
-		filtered := make([]*yaml.Node, 0, len(node.Content))
-		for i := 0; i+1 < len(node.Content); i += 2 {
-			keyNode := node.Content[i]
-			valueNode := node.Content[i+1]
-			if keyNode == nil || valueNode == nil {
-				continue
-			}
-
-			childPath := appendPath(path, keyNode.Value)
-			if isKnownDefaultValue(childPath, valueNode) {
-				continue
-			}
-
-			pruneKnownDefaultsInNewNode(childPath, valueNode)
-			if (valueNode.Kind == yaml.MappingNode || valueNode.Kind == yaml.SequenceNode) &&
-				len(valueNode.Content) == 0 {
-				continue
-			}
-
-			filtered = append(filtered, keyNode, valueNode)
-		}
-		node.Content = filtered
-	case yaml.SequenceNode:
-		for _, child := range node.Content {
-			pruneKnownDefaultsInNewNode(path, child)
-		}
-	}
-}
-
 // isZeroValueNode returns true if the YAML node represents a zero/default value
 // that should not be written as a new key to preserve config cleanliness.
 // For mappings and sequences, recursively checks if all children are zero values.
diff --git a/internal/config/oauth_model_alias_migration.go b/internal/config/oauth_model_alias_migration.go
index b5bf2fb3be..5cc8053a16 100644
--- a/internal/config/oauth_model_alias_migration.go
+++ b/internal/config/oauth_model_alias_migration.go
@@ -17,44 +17,6 @@ var antigravityModelConversionTable = map[string]string{
 	"gemini-claude-sonnet-4-5":                "claude-sonnet-4-5",
 	"gemini-claude-sonnet-4-5-thinking":       "claude-sonnet-4-5-thinking",
 	"gemini-claude-opus-4-5-thinking":         "claude-opus-4-5-thinking",
-	"gemini-claude-opus-4-6-thinking":         "claude-opus-4-6-thinking",
-}
-
-// defaultKiroAliases returns the default oauth-model-alias configuration
-// for the kiro channel. Maps kiro-prefixed model names to standard Claude model
-// names so that clients like Claude Code can use standard names directly.
-func defaultKiroAliases() []OAuthModelAlias {
-	return []OAuthModelAlias{
-		// Sonnet 4.5
-		{Name: "kiro-claude-sonnet-4-5", Alias: "claude-sonnet-4-5-20250929", Fork: true},
-		{Name: "kiro-claude-sonnet-4-5", Alias: "claude-sonnet-4-5", Fork: true},
-		// Sonnet 4
-		{Name: "kiro-claude-sonnet-4", Alias: "claude-sonnet-4-20250514", Fork: true},
-		{Name: "kiro-claude-sonnet-4", Alias: "claude-sonnet-4", Fork: true},
-		// Opus 4.6
-		{Name: "kiro-claude-opus-4-6", Alias: "claude-opus-4-6", Fork: true},
-		// Opus 4.5
-		{Name: "kiro-claude-opus-4-5", Alias: "claude-opus-4-5-20251101", Fork: true},
-		{Name: "kiro-claude-opus-4-5", Alias: "claude-opus-4-5", Fork: true},
-		// Haiku 4.5
-		{Name: "kiro-claude-haiku-4-5", Alias: "claude-haiku-4-5-20251001", Fork: true},
-		{Name: "kiro-claude-haiku-4-5", Alias: "claude-haiku-4-5", Fork: true},
-	}
-}
-
-// defaultGitHubCopilotAliases returns default oauth-model-alias entries that
-// expose Claude hyphen-style IDs for GitHub Copilot Claude models.
-// This keeps compatibility with clients (e.g. Claude Code) that use
-// Anthropic-style model IDs like "claude-opus-4-6".
-func defaultGitHubCopilotAliases() []OAuthModelAlias {
-	return []OAuthModelAlias{
-		{Name: "claude-haiku-4.5", Alias: "claude-haiku-4-5", Fork: true},
-		{Name: "claude-opus-4.1", Alias: "claude-opus-4-1", Fork: true},
-		{Name: "claude-opus-4.5", Alias: "claude-opus-4-5", Fork: true},
-		{Name: "claude-opus-4.6", Alias: "claude-opus-4-6", Fork: true},
-		{Name: "claude-sonnet-4.5", Alias: "claude-sonnet-4-5", Fork: true},
-		{Name: "claude-sonnet-4.6", Alias: "claude-sonnet-4-6", Fork: true},
-	}
 }
 
 // defaultAntigravityAliases returns the default oauth-model-alias configuration
@@ -68,7 +30,6 @@ func defaultAntigravityAliases() []OAuthModelAlias {
 		{Name: "claude-sonnet-4-5", Alias: "gemini-claude-sonnet-4-5"},
 		{Name: "claude-sonnet-4-5-thinking", Alias: "gemini-claude-sonnet-4-5-thinking"},
 		{Name: "claude-opus-4-5-thinking", Alias: "gemini-claude-opus-4-5-thinking"},
-		{Name: "claude-opus-4-6-thinking", Alias: "gemini-claude-opus-4-6-thinking"},
 	}
 }
 
diff --git a/internal/config/oauth_model_alias_migration_test.go b/internal/config/oauth_model_alias_migration_test.go
index cd73b9d5d6..db9c0a11c2 100644
--- a/internal/config/oauth_model_alias_migration_test.go
+++ b/internal/config/oauth_model_alias_migration_test.go
@@ -131,9 +131,6 @@ func TestMigrateOAuthModelAlias_ConvertsAntigravityModels(t *testing.T) {
 	if !strings.Contains(content, "claude-opus-4-5-thinking") {
 		t.Fatal("expected missing default alias claude-opus-4-5-thinking to be added")
 	}
-	if !strings.Contains(content, "claude-opus-4-6-thinking") {
-		t.Fatal("expected missing default alias claude-opus-4-6-thinking to be added")
-	}
 }
 
 func TestMigrateOAuthModelAlias_AddsDefaultIfNeitherExists(t *testing.T) {
diff --git a/internal/config/oauth_model_alias_test.go b/internal/config/oauth_model_alias_test.go
index 6d914b5913..a58864740c 100644
--- a/internal/config/oauth_model_alias_test.go
+++ b/internal/config/oauth_model_alias_test.go
@@ -54,208 +54,3 @@ func TestSanitizeOAuthModelAlias_AllowsMultipleAliasesForSameName(t *testing.T)
 		}
 	}
 }
-
-func TestSanitizeOAuthModelAlias_InjectsDefaultKiroAliases(t *testing.T) {
-	// When no kiro aliases are configured, defaults should be injected
-	cfg := &Config{
-		OAuthModelAlias: map[string][]OAuthModelAlias{
-			"codex": {
-				{Name: "gpt-5", Alias: "g5"},
-			},
-		},
-	}
-
-	cfg.SanitizeOAuthModelAlias()
-
-	kiroAliases := cfg.OAuthModelAlias["kiro"]
-	if len(kiroAliases) == 0 {
-		t.Fatal("expected default kiro aliases to be injected")
-	}
-
-	// Check that standard Claude model names are present
-	aliasSet := make(map[string]bool)
-	for _, a := range kiroAliases {
-		aliasSet[a.Alias] = true
-	}
-	expectedAliases := []string{
-		"claude-sonnet-4-5-20250929",
-		"claude-sonnet-4-5",
-		"claude-sonnet-4-20250514",
-		"claude-sonnet-4",
-		"claude-opus-4-6",
-		"claude-opus-4-5-20251101",
-		"claude-opus-4-5",
-		"claude-haiku-4-5-20251001",
-		"claude-haiku-4-5",
-	}
-	for _, expected := range expectedAliases {
-		if !aliasSet[expected] {
-			t.Fatalf("expected default kiro alias %q to be present", expected)
-		}
-	}
-
-	// All should have fork=true
-	for _, a := range kiroAliases {
-		if !a.Fork {
-			t.Fatalf("expected all default kiro aliases to have fork=true, got fork=false for %q", a.Alias)
-		}
-	}
-
-	// Codex aliases should still be preserved
-	if len(cfg.OAuthModelAlias["codex"]) != 1 {
-		t.Fatal("expected codex aliases to be preserved")
-	}
-}
-
-func TestSanitizeOAuthModelAlias_InjectsDefaultGitHubCopilotAliases(t *testing.T) {
-	cfg := &Config{
-		OAuthModelAlias: map[string][]OAuthModelAlias{
-			"codex": {
-				{Name: "gpt-5", Alias: "g5"},
-			},
-		},
-	}
-
-	cfg.SanitizeOAuthModelAlias()
-
-	copilotAliases := cfg.OAuthModelAlias["github-copilot"]
-	if len(copilotAliases) == 0 {
-		t.Fatal("expected default github-copilot aliases to be injected")
-	}
-
-	aliasSet := make(map[string]bool, len(copilotAliases))
-	for _, a := range copilotAliases {
-		aliasSet[a.Alias] = true
-		if !a.Fork {
-			t.Fatalf("expected all default github-copilot aliases to have fork=true, got fork=false for %q", a.Alias)
-		}
-	}
-	expectedAliases := []string{
-		"claude-haiku-4-5",
-		"claude-opus-4-1",
-		"claude-opus-4-5",
-		"claude-opus-4-6",
-		"claude-sonnet-4-5",
-		"claude-sonnet-4-6",
-	}
-	for _, expected := range expectedAliases {
-		if !aliasSet[expected] {
-			t.Fatalf("expected default github-copilot alias %q to be present", expected)
-		}
-	}
-}
-
-func TestSanitizeOAuthModelAlias_DoesNotOverrideUserKiroAliases(t *testing.T) {
-	// When user has configured kiro aliases, defaults should NOT be injected
-	cfg := &Config{
-		OAuthModelAlias: map[string][]OAuthModelAlias{
-			"kiro": {
-				{Name: "kiro-claude-sonnet-4", Alias: "my-custom-sonnet", Fork: true},
-			},
-		},
-	}
-
-	cfg.SanitizeOAuthModelAlias()
-
-	kiroAliases := cfg.OAuthModelAlias["kiro"]
-	if len(kiroAliases) != 1 {
-		t.Fatalf("expected 1 user-configured kiro alias, got %d", len(kiroAliases))
-	}
-	if kiroAliases[0].Alias != "my-custom-sonnet" {
-		t.Fatalf("expected user alias to be preserved, got %q", kiroAliases[0].Alias)
-	}
-}
-
-func TestSanitizeOAuthModelAlias_DoesNotOverrideUserGitHubCopilotAliases(t *testing.T) {
-	cfg := &Config{
-		OAuthModelAlias: map[string][]OAuthModelAlias{
-			"github-copilot": {
-				{Name: "claude-opus-4.6", Alias: "my-opus", Fork: true},
-			},
-		},
-	}
-
-	cfg.SanitizeOAuthModelAlias()
-
-	copilotAliases := cfg.OAuthModelAlias["github-copilot"]
-	if len(copilotAliases) != 1 {
-		t.Fatalf("expected 1 user-configured github-copilot alias, got %d", len(copilotAliases))
-	}
-	if copilotAliases[0].Alias != "my-opus" {
-		t.Fatalf("expected user alias to be preserved, got %q", copilotAliases[0].Alias)
-	}
-}
-
-func TestSanitizeOAuthModelAlias_DoesNotReinjectAfterExplicitDeletion(t *testing.T) {
-	// When user explicitly deletes kiro aliases (key exists with nil value),
-	// defaults should NOT be re-injected on subsequent sanitize calls (#222).
-	cfg := &Config{
-		OAuthModelAlias: map[string][]OAuthModelAlias{
-			"kiro":  nil, // explicitly deleted
-			"codex": {{Name: "gpt-5", Alias: "g5"}},
-		},
-	}
-
-	cfg.SanitizeOAuthModelAlias()
-
-	kiroAliases := cfg.OAuthModelAlias["kiro"]
-	if len(kiroAliases) != 0 {
-		t.Fatalf("expected kiro aliases to remain empty after explicit deletion, got %d aliases", len(kiroAliases))
-	}
-	// The key itself must still be present to prevent re-injection on next reload
-	if _, exists := cfg.OAuthModelAlias["kiro"]; !exists {
-		t.Fatal("expected kiro key to be preserved as nil marker after sanitization")
-	}
-	// Other channels should be unaffected
-	if len(cfg.OAuthModelAlias["codex"]) != 1 {
-		t.Fatal("expected codex aliases to be preserved")
-	}
-}
-
-func TestSanitizeOAuthModelAlias_GitHubCopilotDoesNotReinjectAfterExplicitDeletion(t *testing.T) {
-	cfg := &Config{
-		OAuthModelAlias: map[string][]OAuthModelAlias{
-			"github-copilot": nil, // explicitly deleted
-		},
-	}
-
-	cfg.SanitizeOAuthModelAlias()
-
-	copilotAliases := cfg.OAuthModelAlias["github-copilot"]
-	if len(copilotAliases) != 0 {
-		t.Fatalf("expected github-copilot aliases to remain empty after explicit deletion, got %d aliases", len(copilotAliases))
-	}
-	if _, exists := cfg.OAuthModelAlias["github-copilot"]; !exists {
-		t.Fatal("expected github-copilot key to be preserved as nil marker after sanitization")
-	}
-}
-
-func TestSanitizeOAuthModelAlias_DoesNotReinjectAfterExplicitDeletionEmpty(t *testing.T) {
-	// Same as above but with empty slice instead of nil (PUT with empty body).
-	cfg := &Config{
-		OAuthModelAlias: map[string][]OAuthModelAlias{
-			"kiro": {}, // explicitly set to empty
-		},
-	}
-
-	cfg.SanitizeOAuthModelAlias()
-
-	if len(cfg.OAuthModelAlias["kiro"]) != 0 {
-		t.Fatalf("expected kiro aliases to remain empty, got %d aliases", len(cfg.OAuthModelAlias["kiro"]))
-	}
-	if _, exists := cfg.OAuthModelAlias["kiro"]; !exists {
-		t.Fatal("expected kiro key to be preserved")
-	}
-}
-
-func TestSanitizeOAuthModelAlias_InjectsDefaultKiroWhenEmpty(t *testing.T) {
-	// When OAuthModelAlias is nil, kiro defaults should still be injected
-	cfg := &Config{}
-
-	cfg.SanitizeOAuthModelAlias()
-
-	kiroAliases := cfg.OAuthModelAlias["kiro"]
-	if len(kiroAliases) == 0 {
-		t.Fatal("expected default kiro aliases to be injected when OAuthModelAlias is nil")
-	}
-}
diff --git a/internal/config/sdk_config.go b/internal/config/sdk_config.go
index 9d99c92423..4d4abc37ad 100644
--- a/internal/config/sdk_config.go
+++ b/internal/config/sdk_config.go
@@ -20,9 +20,8 @@ type SDKConfig struct {
 	// APIKeys is a list of keys for authenticating clients to this proxy server.
 	APIKeys []string `yaml:"api-keys" json:"api-keys"`
 
-	// PassthroughHeaders controls whether upstream response headers are forwarded to downstream clients.
-	// Default is false (disabled).
-	PassthroughHeaders bool `yaml:"passthrough-headers" json:"passthrough-headers"`
+	// Access holds request authentication provider configuration.
+	Access AccessConfig `yaml:"auth,omitempty" json:"auth,omitempty"`
 
 	// Streaming configures server-side streaming behavior (keep-alives and safe bootstrap retries).
 	Streaming StreamingConfig `yaml:"streaming" json:"streaming"`
@@ -43,3 +42,65 @@ type StreamingConfig struct {
 	// <= 0 disables bootstrap retries. Default is 0.
 	BootstrapRetries int `yaml:"bootstrap-retries,omitempty" json:"bootstrap-retries,omitempty"`
 }
+
+// AccessConfig groups request authentication providers.
+type AccessConfig struct {
+	// Providers lists configured authentication providers.
+	Providers []AccessProvider `yaml:"providers,omitempty" json:"providers,omitempty"`
+}
+
+// AccessProvider describes a request authentication provider entry.
+type AccessProvider struct {
+	// Name is the instance identifier for the provider.
+	Name string `yaml:"name" json:"name"`
+
+	// Type selects the provider implementation registered via the SDK.
+	Type string `yaml:"type" json:"type"`
+
+	// SDK optionally names a third-party SDK module providing this provider.
+	SDK string `yaml:"sdk,omitempty" json:"sdk,omitempty"`
+
+	// APIKeys lists inline keys for providers that require them.
+	APIKeys []string `yaml:"api-keys,omitempty" json:"api-keys,omitempty"`
+
+	// Config passes provider-specific options to the implementation.
+	Config map[string]any `yaml:"config,omitempty" json:"config,omitempty"`
+}
+
+const (
+	// AccessProviderTypeConfigAPIKey is the built-in provider validating inline API keys.
+	AccessProviderTypeConfigAPIKey = "config-api-key"
+
+	// DefaultAccessProviderName is applied when no provider name is supplied.
+	DefaultAccessProviderName = "config-inline"
+)
+
+// ConfigAPIKeyProvider returns the first inline API key provider if present.
+func (c *SDKConfig) ConfigAPIKeyProvider() *AccessProvider {
+	if c == nil {
+		return nil
+	}
+	for i := range c.Access.Providers {
+		if c.Access.Providers[i].Type == AccessProviderTypeConfigAPIKey {
+			if c.Access.Providers[i].Name == "" {
+				c.Access.Providers[i].Name = DefaultAccessProviderName
+			}
+			return &c.Access.Providers[i]
+		}
+	}
+	return nil
+}
+
+// MakeInlineAPIKeyProvider constructs an inline API key provider configuration.
+// It returns nil when no keys are supplied.
+func MakeInlineAPIKeyProvider(keys []string) *AccessProvider {
+	if len(keys) == 0 {
+		return nil
+	}
+	provider := &AccessProvider{
+		Name:    DefaultAccessProviderName,
+		Type:    AccessProviderTypeConfigAPIKey,
+		APIKeys: append([]string(nil), keys...),
+	}
+	return provider
+}
diff --git a/internal/constant/constant.go b/internal/constant/constant.go
index 9b7d31aab6..58b388a138 100644
--- a/internal/constant/constant.go
+++ b/internal/constant/constant.go
@@ -24,10 +24,4 @@ const (
 
 	// Antigravity represents the Antigravity response format identifier.
 	Antigravity = "antigravity"
-
-	// Kiro represents the AWS CodeWhisperer (Kiro) provider identifier.
-	Kiro = "kiro"
-
-	// Kilo represents the Kilo AI provider identifier.
-	Kilo = "kilo"
 )
diff --git a/internal/logging/global_logger.go b/internal/logging/global_logger.go
index 484ecba7ed..28c9f3b910 100644
--- a/internal/logging/global_logger.go
+++ b/internal/logging/global_logger.go
@@ -85,7 +85,6 @@ func (m *LogFormatter) Format(entry *log.Entry) ([]byte, error) {
 func SetupBaseLogger() {
 	setupOnce.Do(func() {
 		log.SetOutput(os.Stdout)
-		log.SetLevel(log.InfoLevel)
 		log.SetReportCaller(true)
 		log.SetFormatter(&LogFormatter{})
 
@@ -132,10 +131,7 @@ func ResolveLogDirectory(cfg *config.Config) string {
 		return logDir
 	}
 	if !isDirWritable(logDir) {
-		authDir, err := util.ResolveAuthDir(cfg.AuthDir)
-		if err != nil {
-			log.Warnf("Failed to resolve auth-dir %q for log directory: %v", cfg.AuthDir, err)
-		}
+		authDir := strings.TrimSpace(cfg.AuthDir)
 		if authDir != "" {
 			logDir = filepath.Join(authDir, "logs")
 		}
diff --git a/internal/managementasset/updater.go b/internal/managementasset/updater.go
index 7284b7299c..c941da024a 100644
--- a/internal/managementasset/updater.go
+++ b/internal/managementasset/updater.go
@@ -21,7 +21,6 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
 	log "github.com/sirupsen/logrus"
-	"golang.org/x/sync/singleflight"
 )
 
 const (
@@ -29,7 +28,6 @@ const (
 	defaultManagementFallbackURL = "https://cpamc.router-for.me/"
 	managementAssetName          = "management.html"
 	httpUserAgent                = "CLIProxyAPI-management-updater"
-	managementSyncMinInterval    = 30 * time.Second
 	updateCheckInterval          = 3 * time.Hour
 )
 
@@ -39,10 +37,11 @@ const ManagementFileName = managementAssetName
 var (
 	lastUpdateCheckMu   sync.Mutex
 	lastUpdateCheckTime time.Time
+
 	currentConfigPtr    atomic.Pointer[config.Config]
+	disableControlPanel atomic.Bool
 	schedulerOnce       sync.Once
 	schedulerConfigPath atomic.Value
-	sfGroup             singleflight.Group
 )
 
 // SetCurrentConfig stores the latest configuration snapshot for management asset decisions.
@@ -51,7 +50,16 @@ func SetCurrentConfig(cfg *config.Config) {
 		currentConfigPtr.Store(nil)
 		return
 	}
+
+	prevDisabled := disableControlPanel.Load()
 	currentConfigPtr.Store(cfg)
+	disableControlPanel.Store(cfg.RemoteManagement.DisableControlPanel)
+
+	if prevDisabled && !cfg.RemoteManagement.DisableControlPanel {
+		lastUpdateCheckMu.Lock()
+		lastUpdateCheckTime = time.Time{}
+		lastUpdateCheckMu.Unlock()
+	}
 }
 
 // StartAutoUpdater launches a background goroutine that periodically ensures the management asset is up to date.
@@ -84,7 +92,7 @@ func runAutoUpdater(ctx context.Context) {
 			log.Debug("management asset auto-updater skipped: config not yet available")
 			return
 		}
-		if cfg.RemoteManagement.DisableControlPanel {
+		if disableControlPanel.Load() {
 			log.Debug("management asset auto-updater skipped: control panel disabled")
 			return
 		}
@@ -173,106 +181,103 @@ func FilePath(configFilePath string) string {
 }
 
 // EnsureLatestManagementHTML checks the latest management.html asset and updates the local copy when needed.
-// It coalesces concurrent sync attempts and returns whether the asset exists after the sync attempt.
-func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL string, panelRepository string) bool {
+// The function is designed to run in a background goroutine and will never panic.
+// It enforces a 3-hour rate limit to avoid frequent checks on config/auth file changes.
+func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL string, panelRepository string) {
 	if ctx == nil {
 		ctx = context.Background()
 	}
 
+	if disableControlPanel.Load() {
+		log.Debug("management asset sync skipped: control panel disabled by configuration")
+		return
+	}
+
 	staticDir = strings.TrimSpace(staticDir)
 	if staticDir == "" {
 		log.Debug("management asset sync skipped: empty static directory")
-		return false
+		return
 	}
-	localPath := filepath.Join(staticDir, managementAssetName)
 
-	_, _, _ = sfGroup.Do(localPath, func() (interface{}, error) {
-		lastUpdateCheckMu.Lock()
-		now := time.Now()
-		timeSinceLastAttempt := now.Sub(lastUpdateCheckTime)
-		if !lastUpdateCheckTime.IsZero() && timeSinceLastAttempt < managementSyncMinInterval {
-			lastUpdateCheckMu.Unlock()
-			log.Debugf(
-				"management asset sync skipped by throttle: last attempt %v ago (interval %v)",
-				timeSinceLastAttempt.Round(time.Second),
-				managementSyncMinInterval,
-			)
-			return nil, nil
+	localPath := filepath.Join(staticDir, managementAssetName)
+	localFileMissing := false
+	if _, errStat := os.Stat(localPath); errStat != nil {
+		if errors.Is(errStat, os.ErrNotExist) {
+			localFileMissing = true
+		} else {
+			log.WithError(errStat).Debug("failed to stat local management asset")
 		}
-		lastUpdateCheckTime = now
-		lastUpdateCheckMu.Unlock()
+	}
 
-		localFileMissing := false
-		if _, errStat := os.Stat(localPath); errStat != nil {
-			if errors.Is(errStat, os.ErrNotExist) {
-				localFileMissing = true
-			} else {
-				log.WithError(errStat).Debug("failed to stat local management asset")
-			}
-		}
+	// Rate limiting: check only once every 3 hours
+	lastUpdateCheckMu.Lock()
+	now := time.Now()
+	timeSinceLastCheck := now.Sub(lastUpdateCheckTime)
+	if timeSinceLastCheck < updateCheckInterval {
+		lastUpdateCheckMu.Unlock()
+		log.Debugf("management asset update check skipped: last check was %v ago (interval: %v)", timeSinceLastCheck.Round(time.Second), updateCheckInterval)
+		return
+	}
+	lastUpdateCheckTime = now
+	lastUpdateCheckMu.Unlock()
 
-		if errMkdirAll := os.MkdirAll(staticDir, 0o755); errMkdirAll != nil {
-			log.WithError(errMkdirAll).Warn("failed to prepare static directory for management asset")
-			return nil, nil
-		}
+	if errMkdirAll := os.MkdirAll(staticDir, 0o755); errMkdirAll != nil {
+		log.WithError(errMkdirAll).Warn("failed to prepare static directory for management asset")
+		return
+	}
 
-		releaseURL := resolveReleaseURL(panelRepository)
-		client := newHTTPClient(proxyURL)
+	releaseURL := resolveReleaseURL(panelRepository)
+	client := newHTTPClient(proxyURL)
 
-		localHash, err := fileSHA256(localPath)
-		if err != nil {
-			if !errors.Is(err, os.ErrNotExist) {
-				log.WithError(err).Debug("failed to read local management asset hash")
-			}
-			localHash = ""
+	localHash, err := fileSHA256(localPath)
+	if err != nil {
+		if !errors.Is(err, os.ErrNotExist) {
+			log.WithError(err).Debug("failed to read local management asset hash")
 		}
+		localHash = ""
+	}
 
-		asset, remoteHash, err := fetchLatestAsset(ctx, client, releaseURL)
-		if err != nil {
-			if localFileMissing {
-				log.WithError(err).Warn("failed to fetch latest management release information, trying fallback page")
-				if ensureFallbackManagementHTML(ctx, client, localPath) {
-					return nil, nil
-				}
-				return nil, nil
+	asset, remoteHash, err := fetchLatestAsset(ctx, client, releaseURL)
+	if err != nil {
+		if localFileMissing {
+			log.WithError(err).Warn("failed to fetch latest management release information, trying fallback page")
+			if ensureFallbackManagementHTML(ctx, client, localPath) {
+				return
 			}
-			log.WithError(err).Warn("failed to fetch latest management release information")
-			return nil, nil
+			return
 		}
+		log.WithError(err).Warn("failed to fetch latest management release information")
+		return
+	}
 
-		if remoteHash != "" && localHash != "" && strings.EqualFold(remoteHash, localHash) {
-			log.Debug("management asset is already up to date")
-			return nil, nil
-		}
+	if remoteHash != "" && localHash != "" && strings.EqualFold(remoteHash, localHash) {
+		log.Debug("management asset is already up to date")
+		return
+	}
 
-		data, downloadedHash, err := downloadAsset(ctx, client, asset.BrowserDownloadURL)
-		if err != nil {
-			if localFileMissing {
-				log.WithError(err).Warn("failed to download management asset, trying fallback page")
-				if ensureFallbackManagementHTML(ctx, client, localPath) {
-					return nil, nil
-				}
-				return nil, nil
+	data, downloadedHash, err := downloadAsset(ctx, client, asset.BrowserDownloadURL)
+	if err != nil {
+		if localFileMissing {
+			log.WithError(err).Warn("failed to download management asset, trying fallback page")
+			if ensureFallbackManagementHTML(ctx, client, localPath) {
+				return
 			}
-			log.WithError(err).Warn("failed to download management asset")
-			return nil, nil
-		}
-
-		if remoteHash != "" && !strings.EqualFold(remoteHash, downloadedHash) {
-			log.Warnf("remote digest mismatch for management asset: expected %s got %s", remoteHash, downloadedHash)
+			return
 		}
+		log.WithError(err).Warn("failed to download management asset")
+		return
+	}
 
-		if err = atomicWriteFile(localPath, data); err != nil {
-			log.WithError(err).Warn("failed to update management asset on disk")
-			return nil, nil
-		}
+	if remoteHash != "" && !strings.EqualFold(remoteHash, downloadedHash) {
+		log.Warnf("remote digest mismatch for management asset: expected %s got %s", remoteHash, downloadedHash)
+	}
 
-		log.Infof("management asset updated successfully (hash=%s)", downloadedHash)
-		return nil, nil
-	})
+	if err = atomicWriteFile(localPath, data); err != nil {
+		log.WithError(err).Warn("failed to update management asset on disk")
+		return
+	}
 
-	_, err := os.Stat(localPath)
-	return err == nil
+	log.Infof("management asset updated successfully (hash=%s)", downloadedHash)
 }
 
 func ensureFallbackManagementHTML(ctx context.Context, client *http.Client, localPath string) bool {
diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index 1b69021d2c..585bdf8c43 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -19,12 +19,6 @@ import (
 //   - codex
 //   - qwen
 //   - iflow
-//   - kimi
-//   - kiro
-//   - kilo
-//   - github-copilot
-//   - kiro
-//   - amazonq
 //   - antigravity (returns static overrides only)
 func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
 	key := strings.ToLower(strings.TrimSpace(channel))
@@ -45,16 +39,6 @@ func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
 		return GetQwenModels()
 	case "iflow":
 		return GetIFlowModels()
-	case "kimi":
-		return GetKimiModels()
-	case "github-copilot":
-		return GetGitHubCopilotModels()
-	case "kiro":
-		return GetKiroModels()
-	case "kilo":
-		return GetKiloModels()
-	case "amazonq":
-		return GetAmazonQModels()
 	case "antigravity":
 		cfg := GetAntigravityModelConfig()
 		if len(cfg) == 0 {
@@ -99,11 +83,6 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 		GetOpenAIModels(),
 		GetQwenModels(),
 		GetIFlowModels(),
-		GetKimiModels(),
-		GetGitHubCopilotModels(),
-		GetKiroModels(),
-		GetKiloModels(),
-		GetAmazonQModels(),
 	}
 	for _, models := range allModels {
 		for _, m := range models {
@@ -124,639 +103,3 @@ func LookupStaticModelInfo(modelID string) *ModelInfo {
 
 	return nil
 }
-
-// GetGitHubCopilotModels returns the available models for GitHub Copilot.
-// These models are available through the GitHub Copilot API at api.githubcopilot.com.
-func GetGitHubCopilotModels() []*ModelInfo {
-	now := int64(1732752000) // 2024-11-27
-	gpt4oEntries := []struct {
-		ID          string
-		DisplayName string
-		Description string
-	}{
-		{ID: "gpt-4o-2024-11-20", DisplayName: "GPT-4o (2024-11-20)", Description: "OpenAI GPT-4o 2024-11-20 via GitHub Copilot"},
-		{ID: "gpt-4o-2024-08-06", DisplayName: "GPT-4o (2024-08-06)", Description: "OpenAI GPT-4o 2024-08-06 via GitHub Copilot"},
-		{ID: "gpt-4o-2024-05-13", DisplayName: "GPT-4o (2024-05-13)", Description: "OpenAI GPT-4o 2024-05-13 via GitHub Copilot"},
-		{ID: "gpt-4o", DisplayName: "GPT-4o", Description: "OpenAI GPT-4o via GitHub Copilot"},
-		{ID: "gpt-4-o-preview", DisplayName: "GPT-4-o Preview", Description: "OpenAI GPT-4-o Preview via GitHub Copilot"},
-	}
-
-	models := []*ModelInfo{
-		{
-			ID:                  "gpt-4.1",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "GPT-4.1",
-			Description:         "OpenAI GPT-4.1 via GitHub Copilot",
-			ContextLength:       128000,
-			MaxCompletionTokens: 16384,
-		},
-	}
-
-	for _, entry := range gpt4oEntries {
-		models = append(models, &ModelInfo{
-			ID:                  entry.ID,
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         entry.DisplayName,
-			Description:         entry.Description,
-			ContextLength:       128000,
-			MaxCompletionTokens: 16384,
-		})
-	}
-
-	return append(models, []*ModelInfo{
-		{
-			ID:                  "gpt-5",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "GPT-5",
-			Description:         "OpenAI GPT-5 via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32768,
-			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5-mini",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "GPT-5 Mini",
-			Description:         "OpenAI GPT-5 Mini via GitHub Copilot",
-			ContextLength:       128000,
-			MaxCompletionTokens: 16384,
-			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5-codex",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "GPT-5 Codex",
-			Description:         "OpenAI GPT-5 Codex via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32768,
-			SupportedEndpoints:  []string{"/responses"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "GPT-5.1",
-			Description:         "OpenAI GPT-5.1 via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32768,
-			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1-codex",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "GPT-5.1 Codex",
-			Description:         "OpenAI GPT-5.1 Codex via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32768,
-			SupportedEndpoints:  []string{"/responses"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1-codex-mini",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "GPT-5.1 Codex Mini",
-			Description:         "OpenAI GPT-5.1 Codex Mini via GitHub Copilot",
-			ContextLength:       128000,
-			MaxCompletionTokens: 16384,
-			SupportedEndpoints:  []string{"/responses"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
-		},
-		{
-			ID:                  "gpt-5.1-codex-max",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "GPT-5.1 Codex Max",
-			Description:         "OpenAI GPT-5.1 Codex Max via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32768,
-			SupportedEndpoints:  []string{"/responses"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
-		},
-		{
-			ID:                  "gpt-5.2",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "GPT-5.2",
-			Description:         "OpenAI GPT-5.2 via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32768,
-			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
-		},
-		{
-			ID:                  "gpt-5.2-codex",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "GPT-5.2 Codex",
-			Description:         "OpenAI GPT-5.2 Codex via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32768,
-			SupportedEndpoints:  []string{"/responses"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
-		},
-		{
-			ID:                  "gpt-5.3-codex",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "GPT-5.3 Codex",
-			Description:         "OpenAI GPT-5.3 Codex via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32768,
-			SupportedEndpoints:  []string{"/responses"},
-			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
-		},
-		{
-			ID:                  "claude-haiku-4.5",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Claude Haiku 4.5",
-			Description:         "Anthropic Claude Haiku 4.5 via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			SupportedEndpoints:  []string{"/chat/completions"},
-		},
-		{
-			ID:                  "claude-opus-4.1",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Claude Opus 4.1",
-			Description:         "Anthropic Claude Opus 4.1 via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 32000,
-			SupportedEndpoints:  []string{"/chat/completions"},
-		},
-		{
-			ID:                  "claude-opus-4.5",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Claude Opus 4.5",
-			Description:         "Anthropic Claude Opus 4.5 via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			SupportedEndpoints:  []string{"/chat/completions"},
-		},
-		{
-			ID:                  "claude-opus-4.6",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Claude Opus 4.6",
-			Description:         "Anthropic Claude Opus 4.6 via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			SupportedEndpoints:  []string{"/chat/completions"},
-		},
-		{
-			ID:                  "claude-sonnet-4",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Claude Sonnet 4",
-			Description:         "Anthropic Claude Sonnet 4 via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			SupportedEndpoints:  []string{"/chat/completions"},
-		},
-		{
-			ID:                  "claude-sonnet-4.5",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Claude Sonnet 4.5",
-			Description:         "Anthropic Claude Sonnet 4.5 via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			SupportedEndpoints:  []string{"/chat/completions"},
-		},
-		{
-			ID:                  "claude-sonnet-4.6",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Claude Sonnet 4.6",
-			Description:         "Anthropic Claude Sonnet 4.6 via GitHub Copilot",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			SupportedEndpoints:  []string{"/chat/completions"},
-		},
-		{
-			ID:                  "gemini-2.5-pro",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Gemini 2.5 Pro",
-			Description:         "Google Gemini 2.5 Pro via GitHub Copilot",
-			ContextLength:       1048576,
-			MaxCompletionTokens: 65536,
-		},
-		{
-			ID:                  "gemini-3-pro-preview",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Gemini 3 Pro (Preview)",
-			Description:         "Google Gemini 3 Pro Preview via GitHub Copilot",
-			ContextLength:       1048576,
-			MaxCompletionTokens: 65536,
-		},
-		{
-			ID:                  "gemini-3.1-pro-preview",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Gemini 3.1 Pro (Preview)",
-			Description:         "Google Gemini 3.1 Pro Preview via GitHub Copilot",
-			ContextLength:       1048576,
-			MaxCompletionTokens: 65536,
-		},
-		{
-			ID:                  "gemini-3-flash-preview",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Gemini 3 Flash (Preview)",
-			Description:         "Google Gemini 3 Flash Preview via GitHub Copilot",
-			ContextLength:       1048576,
-			MaxCompletionTokens: 65536,
-		},
-		{
-			ID:                  "grok-code-fast-1",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Grok Code Fast 1",
-			Description:         "xAI Grok Code Fast 1 via GitHub Copilot",
-			ContextLength:       128000,
-			MaxCompletionTokens: 16384,
-		},
-		{
-			ID:                  "oswe-vscode-prime",
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "github-copilot",
-			Type:                "github-copilot",
-			DisplayName:         "Raptor mini (Preview)",
-			Description:         "Raptor mini via GitHub Copilot",
-			ContextLength:       128000,
-			MaxCompletionTokens: 16384,
-			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
-		},
-	}...)
-}
-
-// GetKiroModels returns the Kiro (AWS CodeWhisperer) model definitions
-func GetKiroModels() []*ModelInfo {
-	return []*ModelInfo{
-		// --- Base Models ---
-		{
-			ID:                  "kiro-auto",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Auto",
-			Description:         "Automatic model selection by Kiro",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-claude-opus-4-6",
-			Object:              "model",
-			Created:             1736899200, // 2025-01-15
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Claude Opus 4.6",
-			Description:         "Claude Opus 4.6 via Kiro (2.2x credit)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-claude-sonnet-4-6",
-			Object:              "model",
-			Created:             1739836800, // 2025-02-18
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Claude Sonnet 4.6",
-			Description:         "Claude Sonnet 4.6 via Kiro (1.3x credit)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-claude-opus-4-5",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Claude Opus 4.5",
-			Description:         "Claude Opus 4.5 via Kiro (2.2x credit)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-claude-sonnet-4-5",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Claude Sonnet 4.5",
-			Description:         "Claude Sonnet 4.5 via Kiro (1.3x credit)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-claude-sonnet-4",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Claude Sonnet 4",
-			Description:         "Claude Sonnet 4 via Kiro (1.3x credit)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-claude-haiku-4-5",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Claude Haiku 4.5",
-			Description:         "Claude Haiku 4.5 via Kiro (0.4x credit)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		// --- 第三方模型 (通过 Kiro 接入) ---
-		{
-			ID:                  "kiro-deepseek-3-2",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro DeepSeek 3.2",
-			Description:         "DeepSeek 3.2 via Kiro",
-			ContextLength:       128000,
-			MaxCompletionTokens: 32768,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-minimax-m2-1",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro MiniMax M2.1",
-			Description:         "MiniMax M2.1 via Kiro",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-qwen3-coder-next",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Qwen3 Coder Next",
-			Description:         "Qwen3 Coder Next via Kiro",
-			ContextLength:       128000,
-			MaxCompletionTokens: 32768,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-gpt-4o",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro GPT-4o",
-			Description:         "OpenAI GPT-4o via Kiro",
-			ContextLength:       128000,
-			MaxCompletionTokens: 16384,
-		},
-		{
-			ID:                  "kiro-gpt-4",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro GPT-4",
-			Description:         "OpenAI GPT-4 via Kiro",
-			ContextLength:       128000,
-			MaxCompletionTokens: 8192,
-		},
-		{
-			ID:                  "kiro-gpt-4-turbo",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro GPT-4 Turbo",
-			Description:         "OpenAI GPT-4 Turbo via Kiro",
-			ContextLength:       128000,
-			MaxCompletionTokens: 16384,
-		},
-		{
-			ID:                  "kiro-gpt-3-5-turbo",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro GPT-3.5 Turbo",
-			Description:         "OpenAI GPT-3.5 Turbo via Kiro",
-			ContextLength:       16384,
-			MaxCompletionTokens: 4096,
-		},
-		// --- Agentic Variants (Optimized for coding agents with chunked writes) ---
-		{
-			ID:                  "kiro-claude-opus-4-6-agentic",
-			Object:              "model",
-			Created:             1736899200, // 2025-01-15
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Claude Opus 4.6 (Agentic)",
-			Description:         "Claude Opus 4.6 optimized for coding agents (chunked writes)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-claude-sonnet-4-6-agentic",
-			Object:              "model",
-			Created:             1739836800, // 2025-02-18
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Claude Sonnet 4.6 (Agentic)",
-			Description:         "Claude Sonnet 4.6 optimized for coding agents (chunked writes)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-claude-opus-4-5-agentic",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Claude Opus 4.5 (Agentic)",
-			Description:         "Claude Opus 4.5 optimized for coding agents (chunked writes)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-claude-sonnet-4-5-agentic",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Claude Sonnet 4.5 (Agentic)",
-			Description:         "Claude Sonnet 4.5 optimized for coding agents (chunked writes)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-claude-sonnet-4-agentic",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Claude Sonnet 4 (Agentic)",
-			Description:         "Claude Sonnet 4 optimized for coding agents (chunked writes)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kiro-claude-haiku-4-5-agentic",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Claude Haiku 4.5 (Agentic)",
-			Description:         "Claude Haiku 4.5 optimized for coding agents (chunked writes)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-	}
-}
-
-// GetAmazonQModels returns the Amazon Q (AWS CodeWhisperer) model definitions.
-// These models use the same API as Kiro and share the same executor.
-func GetAmazonQModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                  "amazonq-auto",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro", // Uses Kiro executor - same API
-			DisplayName:         "Amazon Q Auto",
-			Description:         "Automatic model selection by Amazon Q",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-		},
-		{
-			ID:                  "amazonq-claude-opus-4.5",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Amazon Q Claude Opus 4.5",
-			Description:         "Claude Opus 4.5 via Amazon Q (2.2x credit)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-		},
-		{
-			ID:                  "amazonq-claude-sonnet-4.5",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Amazon Q Claude Sonnet 4.5",
-			Description:         "Claude Sonnet 4.5 via Amazon Q (1.3x credit)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-		},
-		{
-			ID:                  "amazonq-claude-sonnet-4",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Amazon Q Claude Sonnet 4",
-			Description:         "Claude Sonnet 4 via Amazon Q (1.3x credit)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-		},
-		{
-			ID:                  "amazonq-claude-haiku-4.5",
-			Object:              "model",
-			Created:             1732752000,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Amazon Q Claude Haiku 4.5",
-			Description:         "Claude Haiku 4.5 via Amazon Q (0.4x credit)",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-		},
-	}
-}
diff --git a/internal/registry/model_definitions_static_data.go b/internal/registry/model_definitions_static_data.go
index 18a1a3a14f..cf5f14025a 100644
--- a/internal/registry/model_definitions_static_data.go
+++ b/internal/registry/model_definitions_static_data.go
@@ -15,7 +15,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.5 Haiku",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
+			// Thinking: not supported for Haiku models
 		},
 		{
 			ID:                  "claude-sonnet-4-5-20250929",
@@ -28,41 +28,6 @@ func GetClaudeModels() []*ModelInfo {
 			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
 		},
-		{
-			ID:                  "claude-sonnet-4-6",
-			Object:              "model",
-			Created:             1771372800, // 2026-02-17
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.6 Sonnet",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-opus-4-6",
-			Object:              "model",
-			Created:             1770318000, // 2026-02-05
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.6 Opus",
-			Description:         "Premium model combining maximum intelligence with practical performance",
-			ContextLength:       1000000,
-			MaxCompletionTokens: 128000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
-		},
-		{
-			ID:                  "claude-sonnet-4-6",
-			Object:              "model",
-			Created:             1771286400, // 2026-02-17
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.6 Sonnet",
-			Description:         "Best combination of speed and intelligence",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false},
-		},
 		{
 			ID:                  "claude-opus-4-5-20251101",
 			Object:              "model",
@@ -196,21 +161,6 @@ func GetGeminiModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
-		{
-			ID:                         "gemini-3.1-pro-preview",
-			Object:                     "model",
-			Created:                    1771459200,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3.1-pro-preview",
-			Version:                    "3.1",
-			DisplayName:                "Gemini 3.1 Pro Preview",
-			Description:                "Gemini 3.1 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
 		{
 			ID:                         "gemini-3-flash-preview",
 			Object:                     "model",
@@ -321,21 +271,6 @@ func GetGeminiVertexModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
 		},
-		{
-			ID:                         "gemini-3.1-pro-preview",
-			Object:                     "model",
-			Created:                    1771459200,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3.1-pro-preview",
-			Version:                    "3.1",
-			DisplayName:                "Gemini 3.1 Pro Preview",
-			Description:                "Gemini 3.1 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
 		{
 			ID:                         "gemini-3-pro-image-preview",
 			Object:                     "model",
@@ -478,21 +413,6 @@ func GetGeminiCLIModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
-		{
-			ID:                         "gemini-3.1-pro-preview",
-			Object:                     "model",
-			Created:                    1771459200,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3.1-pro-preview",
-			Version:                    "3.1",
-			DisplayName:                "Gemini 3.1 Pro Preview",
-			Description:                "Gemini 3.1 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
-		},
 		{
 			ID:                         "gemini-3-flash-preview",
 			Object:                     "model",
@@ -574,21 +494,6 @@ func GetAIStudioModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
-		{
-			ID:                         "gemini-3.1-pro-preview",
-			Object:                     "model",
-			Created:                    1771459200,
-			OwnedBy:                    "google",
-			Type:                       "gemini",
-			Name:                       "models/gemini-3.1-pro-preview",
-			Version:                    "3.1",
-			DisplayName:                "Gemini 3.1 Pro Preview",
-			Description:                "Gemini 3.1 Pro Preview",
-			InputTokenLimit:            1048576,
-			OutputTokenLimit:           65536,
-			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		},
 		{
 			ID:                         "gemini-3-flash-preview",
 			Object:                     "model",
@@ -811,34 +716,6 @@ func GetOpenAIModels() []*ModelInfo {
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
 		},
-		{
-			ID:                  "gpt-5.3-codex",
-			Object:              "model",
-			Created:             1770307200,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.3",
-			DisplayName:         "GPT 5.3 Codex",
-			Description:         "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
-		},
-		{
-			ID:                  "gpt-5.3-codex-spark",
-			Object:              "model",
-			Created:             1770912000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.3",
-			DisplayName:         "GPT 5.3 Codex Spark",
-			Description:         "Ultra-fast coding model.",
-			ContextLength:       128000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
-		},
 	}
 }
 
@@ -871,19 +748,6 @@ func GetQwenModels() []*ModelInfo {
 			MaxCompletionTokens: 2048,
 			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
 		},
-		{
-			ID:                  "coder-model",
-			Object:              "model",
-			Created:             1771171200,
-			OwnedBy:             "qwen",
-			Type:                "qwen",
-			Version:             "3.5",
-			DisplayName:         "Qwen 3.5 Plus",
-			Description:         "efficient hybrid model with leading coding performance",
-			ContextLength:       1048576,
-			MaxCompletionTokens: 65536,
-			SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"},
-		},
 		{
 			ID:                  "vision-model",
 			Object:              "model",
@@ -924,7 +788,6 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
 		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport},
 		{ID: "glm-4.7", DisplayName: "GLM-4.7", Description: "Zhipu GLM 4.7 general model", Created: 1766448000, Thinking: iFlowThinkingSupport},
-		{ID: "glm-5", DisplayName: "GLM-5", Description: "Zhipu GLM 5 general model", Created: 1770768000, Thinking: iFlowThinkingSupport},
 		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
 		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200},
 		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
@@ -939,9 +802,7 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
 		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: iFlowThinkingSupport},
 		{ID: "minimax-m2.1", DisplayName: "MiniMax-M2.1", Description: "MiniMax M2.1", Created: 1766448000, Thinking: iFlowThinkingSupport},
-		{ID: "minimax-m2.5", DisplayName: "MiniMax-M2.5", Description: "MiniMax M2.5", Created: 1770825600, Thinking: iFlowThinkingSupport},
 		{ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200},
-		{ID: "kimi-k2.5", DisplayName: "Kimi-K2.5", Description: "Moonshot Kimi K2.5", Created: 1769443200, Thinking: iFlowThinkingSupport},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
 	for _, entry := range entries {
@@ -975,56 +836,11 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 		"gemini-2.5-flash-lite":      {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
 		"gemini-3-pro-high":          {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
 		"gemini-3-pro-image":         {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
-		"gemini-3.1-pro-high":        {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
 		"gemini-3-flash":             {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
+		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"claude-opus-4-6-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"claude-sonnet-4-5":          {MaxCompletionTokens: 64000},
-		"claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"claude-sonnet-4-6":          {MaxCompletionTokens: 64000},
-		"claude-sonnet-4-6-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"gpt-oss-120b-medium":        {},
 		"tab_flash_lite_preview":     {},
 	}
 }
-
-// GetKimiModels returns the standard Kimi (Moonshot AI) model definitions
-func GetKimiModels() []*ModelInfo {
-	return []*ModelInfo{
-		{
-			ID:                  "kimi-k2",
-			Object:              "model",
-			Created:             1752192000, // 2025-07-11
-			OwnedBy:             "moonshot",
-			Type:                "kimi",
-			DisplayName:         "Kimi K2",
-			Description:         "Kimi K2 - Moonshot AI's flagship coding model",
-			ContextLength:       131072,
-			MaxCompletionTokens: 32768,
-		},
-		{
-			ID:                  "kimi-k2-thinking",
-			Object:              "model",
-			Created:             1762387200, // 2025-11-06
-			OwnedBy:             "moonshot",
-			Type:                "kimi",
-			DisplayName:         "Kimi K2 Thinking",
-			Description:         "Kimi K2 Thinking - Extended reasoning model",
-			ContextLength:       131072,
-			MaxCompletionTokens: 32768,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-		{
-			ID:                  "kimi-k2.5",
-			Object:              "model",
-			Created:             1769472000, // 2026-01-26
-			OwnedBy:             "moonshot",
-			Type:                "kimi",
-			DisplayName:         "Kimi K2.5",
-			Description:         "Kimi K2.5 - Latest Moonshot AI coding model with improved capabilities",
-			ContextLength:       131072,
-			MaxCompletionTokens: 32768,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		},
-	}
-}
diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go
index 3fa2a3b5cc..edb1f124d9 100644
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -47,8 +47,6 @@ type ModelInfo struct {
 	MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
 	// SupportedParameters lists supported parameters
 	SupportedParameters []string `json:"supported_parameters,omitempty"`
-	// SupportedEndpoints lists supported API endpoints (e.g., "/chat/completions", "/responses").
-	SupportedEndpoints []string `json:"supported_endpoints,omitempty"`
 
 	// Thinking holds provider-specific reasoning/thinking budget capabilities.
 	// This is optional and currently used for Gemini thinking budget normalization.
@@ -501,9 +499,6 @@ func cloneModelInfo(model *ModelInfo) *ModelInfo {
 	if len(model.SupportedParameters) > 0 {
 		copyModel.SupportedParameters = append([]string(nil), model.SupportedParameters...)
 	}
-	if len(model.SupportedEndpoints) > 0 {
-		copyModel.SupportedEndpoints = append([]string(nil), model.SupportedEndpoints...)
-	}
 	return &copyModel
 }
 
@@ -601,7 +596,8 @@ func (r *ModelRegistry) SetModelQuotaExceeded(clientID, modelID string) {
 	defer r.mutex.Unlock()
 
 	if registration, exists := r.models[modelID]; exists {
-		registration.QuotaExceededClients[clientID] = new(time.Now())
+		now := time.Now()
+		registration.QuotaExceededClients[clientID] = &now
 		log.Debugf("Marked model %s as quota exceeded for client %s", modelID, clientID)
 	}
 }
@@ -1028,13 +1024,9 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 		if len(model.SupportedParameters) > 0 {
 			result["supported_parameters"] = model.SupportedParameters
 		}
-		if len(model.SupportedEndpoints) > 0 {
-			result["supported_endpoints"] = model.SupportedEndpoints
-		}
 		return result
 
-	case "claude", "kiro", "antigravity":
-		// Claude, Kiro, and Antigravity all use Claude-compatible format for Claude Code client
+	case "claude":
 		result := map[string]any{
 			"id":       model.ID,
 			"object":   "model",
@@ -1049,19 +1041,6 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 		if model.DisplayName != "" {
 			result["display_name"] = model.DisplayName
 		}
-		// Add thinking support for Claude Code client
-		// Claude Code checks for "thinking" field (simple boolean) to enable tab toggle
-		// Also add "extended_thinking" for detailed budget info
-		if model.Thinking != nil {
-			result["thinking"] = true
-			result["extended_thinking"] = map[string]any{
-				"supported":       true,
-				"min":             model.Thinking.Min,
-				"max":             model.Thinking.Max,
-				"zero_allowed":    model.Thinking.ZeroAllowed,
-				"dynamic_allowed": model.Thinking.DynamicAllowed,
-			}
-		}
 		return result
 
 	case "gemini":
diff --git a/internal/routing/adapter.go b/internal/routing/adapter.go
new file mode 100644
index 0000000000..1d90b0fed5
--- /dev/null
+++ b/internal/routing/adapter.go
@@ -0,0 +1,39 @@
+// Package routing provides adapter to integrate with existing codebase.
+package routing
+
+import (
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+)
+
+// Adapter bridges the new routing layer with existing auth manager.
+type Adapter struct {
+	router *Router
+	exec   *Executor
+}
+
+// NewAdapter creates a new adapter with the given configuration and auth manager.
+func NewAdapter(cfg *config.Config, authManager *coreauth.Manager) *Adapter {
+	registry := NewRegistry()
+	
+	// TODO: Register OAuth providers from authManager
+	// TODO: Register API key providers from cfg
+	
+	router := NewRouter(registry, cfg)
+	exec := NewExecutor(router)
+	
+	return &Adapter{
+		router: router,
+		exec:   exec,
+	}
+}
+
+// Router returns the underlying router.
+func (a *Adapter) Router() *Router {
+	return a.router
+}
+
+// Executor returns the underlying executor.
+func (a *Adapter) Executor() *Executor {
+	return a.exec
+}
diff --git a/internal/routing/ctxkeys/keys.go b/internal/routing/ctxkeys/keys.go
new file mode 100644
index 0000000000..5838d54d2f
--- /dev/null
+++ b/internal/routing/ctxkeys/keys.go
@@ -0,0 +1,11 @@
+package ctxkeys
+
+type key string
+
+const (
+	MappedModel     key = "mapped_model"
+	FallbackModels  key = "fallback_models"
+	RouteCandidates key = "route_candidates"
+	RoutingDecision key = "routing_decision"
+	MappingApplied  key = "mapping_applied"
+)
diff --git a/internal/routing/executor.go b/internal/routing/executor.go
new file mode 100644
index 0000000000..30b5750b0b
--- /dev/null
+++ b/internal/routing/executor.go
@@ -0,0 +1,111 @@
+package routing
+
+import (
+	"context"
+	"errors"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	log "github.com/sirupsen/logrus"
+)
+
+// Executor handles request execution with fallback support.
+type Executor struct {
+	router *Router
+}
+
+// NewExecutor creates a new executor with the given router.
+func NewExecutor(router *Router) *Executor {
+	return &Executor{router: router}
+}
+
+// Execute sends the request through the routing decision.
+func (e *Executor) Execute(ctx context.Context, req executor.Request) (executor.Response, error) {
+	decision := e.router.Resolve(req.Model)
+	
+	log.Debugf("routing: %s -> %s (%d candidates)", 
+		decision.RequestedModel, 
+		decision.ResolvedModel, 
+		len(decision.Candidates))
+
+	var lastErr error
+	tried := make(map[string]struct{})
+
+	for i, candidate := range decision.Candidates {
+		key := candidate.Provider.Name() + "/" + candidate.Model
+		if _, ok := tried[key]; ok {
+			continue
+		}
+		tried[key] = struct{}{}
+
+		log.Debugf("routing: trying candidate %d/%d: %s with model %s",
+			i+1, len(decision.Candidates), candidate.Provider.Name(), candidate.Model)
+
+		req.Model = candidate.Model
+		resp, err := candidate.Provider.Execute(ctx, candidate.Model, req)
+		if err == nil {
+			return resp, nil
+		}
+
+		lastErr = err
+		log.Debugf("routing: candidate failed: %v", err)
+
+		// Check if it's a fatal error (not retryable)
+		if isFatalError(err) {
+			break
+		}
+	}
+
+	if lastErr != nil {
+		return executor.Response{}, lastErr
+	}
+	return executor.Response{}, errors.New("no available providers")
+}
+
+// ExecuteStream sends a streaming request through the routing decision.
+func (e *Executor) ExecuteStream(ctx context.Context, req executor.Request) (<-chan executor.StreamChunk, error) {
+	decision := e.router.Resolve(req.Model)
+
+	log.Debugf("routing stream: %s -> %s (%d candidates)",
+		decision.RequestedModel,
+		decision.ResolvedModel,
+		len(decision.Candidates))
+
+	var lastErr error
+	tried := make(map[string]struct{})
+
+	for i, candidate := range decision.Candidates {
+		key := candidate.Provider.Name() + "/" + candidate.Model
+		if _, ok := tried[key]; ok {
+			continue
+		}
+		tried[key] = struct{}{}
+
+		log.Debugf("routing stream: trying candidate %d/%d: %s with model %s",
+			i+1, len(decision.Candidates), candidate.Provider.Name(), candidate.Model)
+
+		req.Model = candidate.Model
+		chunks, err := candidate.Provider.ExecuteStream(ctx, candidate.Model, req)
+		if err == nil {
+			return chunks, nil
+		}
+
+		lastErr = err
+		log.Debugf("routing stream: candidate failed: %v", err)
+
+		if isFatalError(err) {
+			break
+		}
+	}
+
+	if lastErr != nil {
+		return nil, lastErr
+	}
+	return nil, errors.New("no available providers")
+}
+
+// isFatalError returns true if the error is not retryable.
+func isFatalError(err error) bool {
+	// TODO: implement based on error type
+	// For now, all errors are retryable
+	return false
+}
diff --git a/internal/routing/extractor.go b/internal/routing/extractor.go
new file mode 100644
index 0000000000..94fe969ac9
--- /dev/null
+++ b/internal/routing/extractor.go
@@ -0,0 +1,59 @@
+package routing
+
+import (
+	"strings"
+
+	"github.com/tidwall/gjson"
+)
+
+// ModelExtractor extracts model names from request data.
+type ModelExtractor interface {
+	// Extract returns the model name from the request body and gin parameters.
+	// The ginParams map contains route parameters like "action" and "path".
+	Extract(body []byte, ginParams map[string]string) (string, error)
+}
+
+// DefaultModelExtractor is the standard implementation of ModelExtractor.
+type DefaultModelExtractor struct{}
+
+// NewModelExtractor creates a new DefaultModelExtractor.
+func NewModelExtractor() *DefaultModelExtractor {
+	return &DefaultModelExtractor{}
+}
+
+// Extract extracts the model name from the request.
+// It checks in order:
+// 1. JSON body "model" field (OpenAI, Claude format)
+// 2. "action" parameter for Gemini standard format (e.g., "gemini-pro:generateContent")
+// 3. "path" parameter for AMP CLI Gemini format (e.g., "/publishers/google/models/gemini-3-pro:streamGenerateContent")
+func (e *DefaultModelExtractor) Extract(body []byte, ginParams map[string]string) (string, error) {
+	// First try to parse from JSON body (OpenAI, Claude, etc.)
+	if result := gjson.GetBytes(body, "model"); result.Exists() && result.Type == gjson.String {
+		return result.String(), nil
+	}
+
+	// For Gemini requests, model is in the URL path
+	// Standard format: /models/{model}:generateContent -> :action parameter
+	if action, ok := ginParams["action"]; ok && action != "" {
+		// Split by colon to get model name (e.g., "gemini-pro:generateContent" -> "gemini-pro")
+		parts := strings.Split(action, ":")
+		if len(parts) > 0 && parts[0] != "" {
+			return parts[0], nil
+		}
+	}
+
+	// AMP CLI format: /publishers/google/models/{model}:method -> *path parameter
+	// Example: /publishers/google/models/gemini-3-pro-preview:streamGenerateContent
+	if path, ok := ginParams["path"]; ok && path != "" {
+		// Look for /models/{model}:method pattern
+		if idx := strings.Index(path, "/models/"); idx >= 0 {
+			modelPart := path[idx+8:] // Skip "/models/"
+			// Split by colon to get model name
+			if colonIdx := strings.Index(modelPart, ":"); colonIdx > 0 {
+				return modelPart[:colonIdx], nil
+			}
+		}
+	}
+
+	return "", nil
+}
diff --git a/internal/routing/extractor_test.go b/internal/routing/extractor_test.go
new file mode 100644
index 0000000000..485b4831b1
--- /dev/null
+++ b/internal/routing/extractor_test.go
@@ -0,0 +1,214 @@
+package routing
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestModelExtractor_ExtractFromJSONBody(t *testing.T) {
+	extractor := NewModelExtractor()
+
+	tests := []struct {
+		name     string
+		body     []byte
+		want     string
+		wantErr  bool
+	}{
+		{
+			name: "extract from JSON body with model field",
+			body: []byte(`{"model":"gpt-4.1"}`),
+			want: "gpt-4.1",
+		},
+		{
+			name: "extract claude model from JSON body",
+			body: []byte(`{"model":"claude-3-5-sonnet-20241022"}`),
+			want: "claude-3-5-sonnet-20241022",
+		},
+		{
+			name: "extract with additional fields",
+			body: []byte(`{"model":"gpt-4","messages":[{"role":"user","content":"hello"}]}`),
+			want: "gpt-4",
+		},
+		{
+			name: "empty body returns empty",
+			body: []byte{},
+			want: "",
+		},
+		{
+			name: "no model field returns empty",
+			body: []byte(`{"messages":[]}`),
+			want: "",
+		},
+		{
+			name: "model is not string returns empty",
+			body: []byte(`{"model":123}`),
+			want: "",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := extractor.Extract(tt.body, nil)
+			if tt.wantErr {
+				assert.Error(t, err)
+				return
+			}
+			assert.NoError(t, err)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestModelExtractor_ExtractFromGeminiActionParam(t *testing.T) {
+	extractor := NewModelExtractor()
+
+	tests := []struct {
+		name      string
+		body      []byte
+		ginParams map[string]string
+		want      string
+	}{
+		{
+			name:      "extract from action parameter - gemini-pro",
+			body:      []byte(`{}`),
+			ginParams: map[string]string{"action": "gemini-pro:generateContent"},
+			want:      "gemini-pro",
+		},
+		{
+			name:      "extract from action parameter - gemini-ultra",
+			body:      []byte(`{}`),
+			ginParams: map[string]string{"action": "gemini-ultra:chat"},
+			want:      "gemini-ultra",
+		},
+		{
+			name:      "empty action returns empty",
+			body:      []byte(`{}`),
+			ginParams: map[string]string{"action": ""},
+			want:      "",
+		},
+		{
+			name:      "action without colon returns full value",
+			body:      []byte(`{}`),
+			ginParams: map[string]string{"action": "gemini-model"},
+			want:      "gemini-model",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := extractor.Extract(tt.body, tt.ginParams)
+			assert.NoError(t, err)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestModelExtractor_ExtractFromGeminiV1Beta1Path(t *testing.T) {
+	extractor := NewModelExtractor()
+
+	tests := []struct {
+		name      string
+		body      []byte
+		ginParams map[string]string
+		want      string
+	}{
+		{
+			name:      "extract from v1beta1 path - gemini-3-pro",
+			body:      []byte(`{}`),
+			ginParams: map[string]string{"path": "/publishers/google/models/gemini-3-pro:streamGenerateContent"},
+			want:      "gemini-3-pro",
+		},
+		{
+			name:      "extract from v1beta1 path with preview",
+			body:      []byte(`{}`),
+			ginParams: map[string]string{"path": "/publishers/google/models/gemini-3-pro-preview:generateContent"},
+			want:      "gemini-3-pro-preview",
+		},
+		{
+			name:      "path without models segment returns empty",
+			body:      []byte(`{}`),
+			ginParams: map[string]string{"path": "/publishers/google/gemini-3-pro:streamGenerateContent"},
+			want:      "",
+		},
+		{
+			name:      "empty path returns empty",
+			body:      []byte(`{}`),
+			ginParams: map[string]string{"path": ""},
+			want:      "",
+		},
+		{
+			name:      "path with /models/ but no colon returns empty",
+			body:      []byte(`{}`),
+			ginParams: map[string]string{"path": "/publishers/google/models/gemini-3-pro"},
+			want:      "",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := extractor.Extract(tt.body, tt.ginParams)
+			assert.NoError(t, err)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestModelExtractor_ExtractPriority(t *testing.T) {
+	extractor := NewModelExtractor()
+
+	// JSON body takes priority over gin params
+	t.Run("JSON body takes priority over action param", func(t *testing.T) {
+		body := []byte(`{"model":"gpt-4"}`)
+		params := map[string]string{"action": "gemini-pro:generateContent"}
+		got, err := extractor.Extract(body, params)
+		assert.NoError(t, err)
+		assert.Equal(t, "gpt-4", got)
+	})
+
+	// Action param takes priority over path param
+	t.Run("action param takes priority over path param", func(t *testing.T) {
+		body := []byte(`{}`)
+		params := map[string]string{
+			"action": "gemini-action:generate",
+			"path":   "/publishers/google/models/gemini-path:streamGenerateContent",
+		}
+		got, err := extractor.Extract(body, params)
+		assert.NoError(t, err)
+		assert.Equal(t, "gemini-action", got)
+	})
+}
+
+func TestModelExtractor_NoModelFound(t *testing.T) {
+	extractor := NewModelExtractor()
+
+	tests := []struct {
+		name      string
+		body      []byte
+		ginParams map[string]string
+	}{
+		{
+			name:      "empty body and no params",
+			body:      []byte{},
+			ginParams: nil,
+		},
+		{
+			name:      "body without model and no params",
+			body:      []byte(`{"messages":[]}`),
+			ginParams: map[string]string{},
+		},
+		{
+			name:      "irrelevant params only",
+			body:      []byte(`{}`),
+			ginParams: map[string]string{"other": "value"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := extractor.Extract(tt.body, tt.ginParams)
+			assert.NoError(t, err)
+			assert.Empty(t, got)
+		})
+	}
+}
diff --git a/internal/routing/provider.go b/internal/routing/provider.go
new file mode 100644
index 0000000000..8e1606c850
--- /dev/null
+++ b/internal/routing/provider.go
@@ -0,0 +1,80 @@
+// Package routing provides unified model routing for all provider types.
+package routing
+
+import (
+	"context"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+)
+
+// ProviderType indicates the type of provider.
+type ProviderType string
+
+const (
+	ProviderTypeOAuth  ProviderType = "oauth"
+	ProviderTypeAPIKey ProviderType = "api_key"
+	ProviderTypeVertex ProviderType = "vertex"
+)
+
+// Provider is the unified interface for all provider types (OAuth, API key, etc.).
+type Provider interface {
+	// Name returns the unique provider identifier.
+	Name() string
+
+	// Type returns the provider type.
+	Type() ProviderType
+
+	// SupportsModel returns true if this provider can handle the given model.
+	SupportsModel(model string) bool
+
+	// Available returns true if the provider is available for the model (not quota exceeded).
+	Available(model string) bool
+
+	// Priority returns the priority for this provider (lower = tried first).
+	Priority() int
+
+	// Execute sends the request to the provider.
+	Execute(ctx context.Context, model string, req executor.Request) (executor.Response, error)
+
+	// ExecuteStream sends a streaming request to the provider.
+	ExecuteStream(ctx context.Context, model string, req executor.Request) (<-chan executor.StreamChunk, error)
+}
+
+// ProviderCandidate represents a provider + model combination to try.
+type ProviderCandidate struct {
+	Provider Provider
+	Model    string // The actual model name to use (may be different from requested due to aliasing)
+}
+
+// Registry manages all available providers.
+type Registry struct {
+	providers []Provider
+}
+
+// NewRegistry creates a new provider registry.
+func NewRegistry() *Registry {
+	return &Registry{
+		providers: make([]Provider, 0),
+	}
+}
+
+// Register adds a provider to the registry.
+func (r *Registry) Register(p Provider) {
+	r.providers = append(r.providers, p)
+}
+
+// FindProviders returns all providers that support the given model and are available.
+func (r *Registry) FindProviders(model string) []Provider {
+	var result []Provider
+	for _, p := range r.providers {
+		if p.SupportsModel(model) && p.Available(model) {
+			result = append(result, p)
+		}
+	}
+	return result
+}
+
+// All returns all registered providers.
+func (r *Registry) All() []Provider {
+	return r.providers
+}
diff --git a/internal/routing/providers/apikey.go b/internal/routing/providers/apikey.go
new file mode 100644
index 0000000000..4603702dc6
--- /dev/null
+++ b/internal/routing/providers/apikey.go
@@ -0,0 +1,156 @@
+package providers
+
+import (
+	"context"
+	"errors"
+	"net/http"
+	"strings"
+	"sync"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/routing"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+)
+
+// APIKeyProvider wraps API key configs as routing.Provider.
+type APIKeyProvider struct {
+	name     string
+	provider string // claude, gemini, codex, vertex
+	keys     []APIKeyEntry
+	mu       sync.RWMutex
+	client   HTTPClient
+}
+
+// APIKeyEntry represents a single API key configuration.
+type APIKeyEntry struct {
+	APIKey  string
+	BaseURL string
+	Models  []config.ClaudeModel // Using ClaudeModel as generic model alias
+}
+
+// HTTPClient interface for making HTTP requests.
+type HTTPClient interface {
+	Do(req *http.Request) (*http.Response, error)
+}
+
+// NewAPIKeyProvider creates a new API key provider.
+func NewAPIKeyProvider(name, provider string, client HTTPClient) *APIKeyProvider {
+	return &APIKeyProvider{
+		name:     name,
+		provider: provider,
+		keys:     make([]APIKeyEntry, 0),
+		client:   client,
+	}
+}
+
+// Name returns the provider name.
+func (p *APIKeyProvider) Name() string {
+	return p.name
+}
+
+// Type returns ProviderTypeAPIKey.
+func (p *APIKeyProvider) Type() routing.ProviderType {
+	return routing.ProviderTypeAPIKey
+}
+
+// SupportsModel checks if the model is supported by this provider.
+func (p *APIKeyProvider) SupportsModel(model string) bool {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+
+	for _, key := range p.keys {
+		for _, m := range key.Models {
+			if strings.EqualFold(m.Alias, model) || strings.EqualFold(m.Name, model) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// Available always returns true for API keys (unless explicitly disabled).
+func (p *APIKeyProvider) Available(model string) bool {
+	return p.SupportsModel(model)
+}
+
+// Priority returns the priority (API key is lower priority than OAuth).
+func (p *APIKeyProvider) Priority() int {
+	return 20
+}
+
+// Execute sends the request using the API key.
+func (p *APIKeyProvider) Execute(ctx context.Context, model string, req executor.Request) (executor.Response, error) {
+	key := p.selectKey(model)
+	if key == nil {
+		return executor.Response{}, ErrNoMatchingAPIKey
+	}
+
+	// Resolve the actual model name from alias
+	actualModel := p.resolveModel(key, model)
+
+	// Execute via HTTP client
+	return p.executeHTTP(ctx, key, actualModel, req)
+}
+
+// ExecuteStream sends a streaming request.
+func (p *APIKeyProvider) ExecuteStream(ctx context.Context, model string, req executor.Request) (
+	<-chan executor.StreamChunk, error) {
+	key := p.selectKey(model)
+	if key == nil {
+		return nil, ErrNoMatchingAPIKey
+	}
+
+	actualModel := p.resolveModel(key, model)
+	return p.executeHTTPStream(ctx, key, actualModel, req)
+}
+
+// AddKey adds an API key entry.
+func (p *APIKeyProvider) AddKey(entry APIKeyEntry) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.keys = append(p.keys, entry)
+}
+
+// selectKey selects a key that supports the model.
+func (p *APIKeyProvider) selectKey(model string) *APIKeyEntry {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+
+	for _, key := range p.keys {
+		for _, m := range key.Models {
+			if strings.EqualFold(m.Alias, model) || strings.EqualFold(m.Name, model) {
+				return &key
+			}
+		}
+	}
+	return nil
+}
+
+// resolveModel resolves alias to actual model name.
+func (p *APIKeyProvider) resolveModel(key *APIKeyEntry, requested string) string {
+	for _, m := range key.Models {
+		if strings.EqualFold(m.Alias, requested) {
+			return m.Name
+		}
+	}
+	return requested
+}
+
+// executeHTTP makes the HTTP request.
+func (p *APIKeyProvider) executeHTTP(ctx context.Context, key *APIKeyEntry, model string, req executor.Request) (executor.Response, error) {
+	// TODO: implement actual HTTP execution
+	// This is a placeholder - actual implementation would build HTTP request
+	return executor.Response{}, errors.New("not yet implemented")
+}
+
+// executeHTTPStream makes a streaming HTTP request.
+func (p *APIKeyProvider) executeHTTPStream(ctx context.Context, key *APIKeyEntry, model string, req executor.Request) (
+	<-chan executor.StreamChunk, error) {
+	// TODO: implement actual HTTP streaming
+	return nil, errors.New("not yet implemented")
+}
+
+// Errors
+var (
+	ErrNoMatchingAPIKey = errors.New("no API key supports the requested model")
+)
diff --git a/internal/routing/providers/oauth.go b/internal/routing/providers/oauth.go
new file mode 100644
index 0000000000..ae0c09e28d
--- /dev/null
+++ b/internal/routing/providers/oauth.go
@@ -0,0 +1,132 @@
+package providers
+
+import (
+	"context"
+	"errors"
+	"sync"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/routing"
+	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+)
+
+// OAuthProvider wraps OAuth-based auths as routing.Provider.
+type OAuthProvider struct {
+	name      string
+	auths     []*coreauth.Auth
+	mu        sync.RWMutex
+	executor  coreauth.ProviderExecutor
+}
+
+// NewOAuthProvider creates a new OAuth provider.
+func NewOAuthProvider(name string, exec coreauth.ProviderExecutor) *OAuthProvider {
+	return &OAuthProvider{
+		name:     name,
+		auths:    make([]*coreauth.Auth, 0),
+		executor: exec,
+	}
+}
+
+// Name returns the provider name.
+func (p *OAuthProvider) Name() string {
+	return p.name
+}
+
+// Type returns ProviderTypeOAuth.
+func (p *OAuthProvider) Type() routing.ProviderType {
+	return routing.ProviderTypeOAuth
+}
+
+// SupportsModel checks if any auth supports the model.
+func (p *OAuthProvider) SupportsModel(model string) bool {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+
+	// OAuth providers typically support models via oauth-model-alias
+	// The actual model support is determined at execution time
+	return true
+}
+
+// Available checks if there's an available auth for the model.
+func (p *OAuthProvider) Available(model string) bool {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+
+	for _, auth := range p.auths {
+		if p.isAuthAvailable(auth, model) {
+			return true
+		}
+	}
+	return false
+}
+
+// Priority returns the priority (OAuth is preferred over API key).
+func (p *OAuthProvider) Priority() int {
+	return 10
+}
+
+// Execute sends the request using an available OAuth auth.
+func (p *OAuthProvider) Execute(ctx context.Context, model string, req executor.Request) (executor.Response, error) {
+	auth := p.selectAuth(model)
+	if auth == nil {
+		return executor.Response{}, ErrNoAvailableAuth
+	}
+
+	return p.executor.Execute(ctx, auth, req, executor.Options{})
+}
+
+// ExecuteStream sends a streaming request.
+func (p *OAuthProvider) ExecuteStream(ctx context.Context, model string, req executor.Request) (<-chan executor.StreamChunk, error) {
+	auth := p.selectAuth(model)
+	if auth == nil {
+		return nil, ErrNoAvailableAuth
+	}
+
+	return p.executor.ExecuteStream(ctx, auth, req, executor.Options{})
+}
+
+// AddAuth adds an auth to this provider.
+func (p *OAuthProvider) AddAuth(auth *coreauth.Auth) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.auths = append(p.auths, auth)
+}
+
+// RemoveAuth removes an auth from this provider.
+func (p *OAuthProvider) RemoveAuth(authID string) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	filtered := make([]*coreauth.Auth, 0, len(p.auths))
+	for _, auth := range p.auths {
+		if auth.ID != authID {
+			filtered = append(filtered, auth)
+		}
+	}
+	p.auths = filtered
+}
+
+// isAuthAvailable checks if an auth is available for the model.
+func (p *OAuthProvider) isAuthAvailable(auth *coreauth.Auth, model string) bool {
+	// TODO: integrate with model_registry for quota checking
+	// For now, just check if auth exists
+	return auth != nil
+}
+
+// selectAuth selects an available auth for the model.
+func (p *OAuthProvider) selectAuth(model string) *coreauth.Auth {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+
+	for _, auth := range p.auths {
+		if p.isAuthAvailable(auth, model) {
+			return auth
+		}
+	}
+	return nil
+}
+
+// Errors
+var (
+	ErrNoAvailableAuth = errors.New("no available OAuth auth for model")
+)
diff --git a/internal/routing/rewriter.go b/internal/routing/rewriter.go
new file mode 100644
index 0000000000..d0c027716a
--- /dev/null
+++ b/internal/routing/rewriter.go
@@ -0,0 +1,159 @@
+package routing
+
+import (
+	"bytes"
+	"net/http"
+	"strings"
+
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+	log "github.com/sirupsen/logrus"
+)
+
+// ModelRewriter handles model name rewriting in requests and responses.
+type ModelRewriter interface {
+	// RewriteRequestBody rewrites the model field in a JSON request body.
+	// Returns the modified body or the original if no rewrite was needed.
+	RewriteRequestBody(body []byte, newModel string) ([]byte, error)
+
+	// WrapResponseWriter wraps an http.ResponseWriter to rewrite model names in the response.
+	// Returns the wrapped writer and a cleanup function that must be called after the response is complete.
+	WrapResponseWriter(w http.ResponseWriter, requestedModel, resolvedModel string) (http.ResponseWriter, func())
+}
+
+// DefaultModelRewriter is the standard implementation of ModelRewriter.
+type DefaultModelRewriter struct{}
+
+// NewModelRewriter creates a new DefaultModelRewriter.
+func NewModelRewriter() *DefaultModelRewriter {
+	return &DefaultModelRewriter{}
+}
+
+// RewriteRequestBody replaces the model name in a JSON request body.
+func (r *DefaultModelRewriter) RewriteRequestBody(body []byte, newModel string) ([]byte, error) {
+	if !gjson.GetBytes(body, "model").Exists() {
+		return body, nil
+	}
+	result, err := sjson.SetBytes(body, "model", newModel)
+	if err != nil {
+		return body, err
+	}
+	return result, nil
+}
+
+// WrapResponseWriter wraps a response writer to rewrite model names.
+// The cleanup function must be called after the handler completes to flush any buffered data.
+func (r *DefaultModelRewriter) WrapResponseWriter(w http.ResponseWriter, requestedModel, resolvedModel string) (http.ResponseWriter, func()) {
+	rw := &responseRewriter{
+		ResponseWriter:  w,
+		body:            &bytes.Buffer{},
+		requestedModel:  requestedModel,
+		resolvedModel:   resolvedModel,
+	}
+	return rw, func() { rw.flush() }
+}
+
+// responseRewriter wraps http.ResponseWriter to intercept and modify the response body.
+type responseRewriter struct {
+	http.ResponseWriter
+	body           *bytes.Buffer
+	requestedModel string
+	resolvedModel  string
+	isStreaming    bool
+	wroteHeader    bool
+	flushed        bool
+}
+
+// Write intercepts response writes and buffers them for model name replacement.
+func (rw *responseRewriter) Write(data []byte) (int, error) {
+	// Ensure header is written
+	if !rw.wroteHeader {
+		rw.WriteHeader(http.StatusOK)
+	}
+
+	// Detect streaming on first write
+	if rw.body.Len() == 0 && !rw.isStreaming {
+		contentType := rw.Header().Get("Content-Type")
+		rw.isStreaming = strings.Contains(contentType, "text/event-stream") ||
+			strings.Contains(contentType, "stream")
+	}
+
+	if rw.isStreaming {
+		n, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
+		if err == nil {
+			if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
+				flusher.Flush()
+			}
+		}
+		return n, err
+	}
+	return rw.body.Write(data)
+}
+
+// WriteHeader captures the status code and delegates to the underlying writer.
+func (rw *responseRewriter) WriteHeader(code int) {
+	if !rw.wroteHeader {
+		rw.wroteHeader = true
+		rw.ResponseWriter.WriteHeader(code)
+	}
+}
+
+// flush writes the buffered response with model names rewritten.
+func (rw *responseRewriter) flush() {
+	if rw.flushed {
+		return
+	}
+	rw.flushed = true
+
+	if rw.isStreaming {
+		if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
+			flusher.Flush()
+		}
+		return
+	}
+	if rw.body.Len() > 0 {
+		data := rw.rewriteModelInResponse(rw.body.Bytes())
+		if _, err := rw.ResponseWriter.Write(data); err != nil {
+			log.Warnf("response rewriter: failed to write rewritten response: %v", err)
+		}
+	}
+}
+
+// modelFieldPaths lists all JSON paths where model name may appear.
+var modelFieldPaths = []string{"model", "modelVersion", "response.modelVersion", "message.model"}
+
+// rewriteModelInResponse replaces all occurrences of the resolved model with the requested model.
+func (rw *responseRewriter) rewriteModelInResponse(data []byte) []byte {
+	if rw.requestedModel == "" || rw.resolvedModel == "" || rw.requestedModel == rw.resolvedModel {
+		return data
+	}
+
+	for _, path := range modelFieldPaths {
+		if gjson.GetBytes(data, path).Exists() {
+			data, _ = sjson.SetBytes(data, path, rw.requestedModel)
+		}
+	}
+	return data
+}
+
+// rewriteStreamChunk rewrites model names in SSE stream chunks.
+func (rw *responseRewriter) rewriteStreamChunk(chunk []byte) []byte {
+	if rw.requestedModel == "" || rw.resolvedModel == "" || rw.requestedModel == rw.resolvedModel {
+		return chunk
+	}
+
+	// SSE format: "data: {json}\n\n"
+	lines := bytes.Split(chunk, []byte("\n"))
+	for i, line := range lines {
+		if bytes.HasPrefix(line, []byte("data: ")) {
+			jsonData := bytes.TrimPrefix(line, []byte("data: "))
+			if len(jsonData) > 0 && jsonData[0] == '{' {
+				// Rewrite JSON in the data line
+				rewritten := rw.rewriteModelInResponse(jsonData)
+				lines[i] = append([]byte("data: "), rewritten...)
+			}
+		}
+	}
+
+	return bytes.Join(lines, []byte("\n"))
+}
diff --git a/internal/routing/rewriter_test.go b/internal/routing/rewriter_test.go
new file mode 100644
index 0000000000..d628f71076
--- /dev/null
+++ b/internal/routing/rewriter_test.go
@@ -0,0 +1,342 @@
+package routing
+
+import (
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestModelRewriter_RewriteRequestBody(t *testing.T) {
+	rewriter := NewModelRewriter()
+
+	tests := []struct {
+		name       string
+		body       []byte
+		newModel   string
+		wantModel  string
+		wantChange bool
+	}{
+		{
+			name:       "rewrites model field in JSON body",
+			body:       []byte(`{"model":"gpt-4.1","messages":[]}`),
+			newModel:   "claude-local",
+			wantModel:  "claude-local",
+			wantChange: true,
+		},
+		{
+			name:       "rewrites with empty body returns empty",
+			body:       []byte{},
+			newModel:   "gpt-4",
+			wantModel:  "",
+			wantChange: false,
+		},
+		{
+			name:       "handles missing model field gracefully",
+			body:       []byte(`{"messages":[{"role":"user"}]}`),
+			newModel:   "gpt-4",
+			wantModel:  "",
+			wantChange: false,
+		},
+		{
+			name:       "preserves other fields when rewriting",
+			body:       []byte(`{"model":"old-model","temperature":0.7,"max_tokens":100}`),
+			newModel:   "new-model",
+			wantModel:  "new-model",
+			wantChange: true,
+		},
+		{
+			name:       "handles nested JSON structure",
+			body:       []byte(`{"model":"gpt-4","messages":[{"role":"user","content":"hello"}],"stream":true}`),
+			newModel:   "claude-3-opus",
+			wantModel:  "claude-3-opus",
+			wantChange: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := rewriter.RewriteRequestBody(tt.body, tt.newModel)
+			require.NoError(t, err)
+
+			if tt.wantChange {
+				assert.NotEqual(t, string(tt.body), string(result), "body should have been modified")
+			}
+
+			if tt.wantModel != "" {
+				// Parse result and check model field
+				model, _ := NewModelExtractor().Extract(result, nil)
+				assert.Equal(t, tt.wantModel, model)
+			}
+		})
+	}
+}
+
+func TestModelRewriter_WrapResponseWriter(t *testing.T) {
+	rewriter := NewModelRewriter()
+
+	t.Run("response writer wraps without error", func(t *testing.T) {
+		recorder := httptest.NewRecorder()
+		wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+		require.NotNil(t, wrapped)
+		require.NotNil(t, cleanup)
+		defer cleanup()
+	})
+
+	t.Run("rewrites model in non-streaming response", func(t *testing.T) {
+		recorder := httptest.NewRecorder()
+		wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+
+		// Write a response with the resolved model
+		response := []byte(`{"model":"claude-local","content":"hello"}`)
+		wrapped.Header().Set("Content-Type", "application/json")
+		_, err := wrapped.Write(response)
+		require.NoError(t, err)
+
+		// Cleanup triggers the rewrite
+		cleanup()
+
+		// Check the response was rewritten to the requested model
+		body := recorder.Body.Bytes()
+		assert.Contains(t, string(body), `"model":"gpt-4"`)
+		assert.NotContains(t, string(body), `"model":"claude-local"`)
+	})
+
+	t.Run("no-op when requested equals resolved", func(t *testing.T) {
+		recorder := httptest.NewRecorder()
+		wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "gpt-4")
+
+		response := []byte(`{"model":"gpt-4","content":"hello"}`)
+		wrapped.Header().Set("Content-Type", "application/json")
+		_, err := wrapped.Write(response)
+		require.NoError(t, err)
+
+		cleanup()
+
+		body := recorder.Body.Bytes()
+		assert.Contains(t, string(body), `"model":"gpt-4"`)
+	})
+
+	t.Run("rewrites modelVersion field", func(t *testing.T) {
+		recorder := httptest.NewRecorder()
+		wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+
+		response := []byte(`{"modelVersion":"claude-local","content":"hello"}`)
+		wrapped.Header().Set("Content-Type", "application/json")
+		_, err := wrapped.Write(response)
+		require.NoError(t, err)
+
+		cleanup()
+
+		body := recorder.Body.Bytes()
+		assert.Contains(t, string(body), `"modelVersion":"gpt-4"`)
+	})
+
+	t.Run("handles streaming responses", func(t *testing.T) {
+		recorder := httptest.NewRecorder()
+		wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+
+		// Set streaming content type
+		wrapped.Header().Set("Content-Type", "text/event-stream")
+
+		// Write SSE chunks with resolved model
+		chunk1 := []byte("data: {\"model\":\"claude-local\",\"delta\":\"hello\"}\n\n")
+		_, err := wrapped.Write(chunk1)
+		require.NoError(t, err)
+
+		chunk2 := []byte("data: {\"model\":\"claude-local\",\"delta\":\" world\"}\n\n")
+		_, err = wrapped.Write(chunk2)
+		require.NoError(t, err)
+
+		cleanup()
+
+		// For streaming, data is written immediately with rewrites
+		body := recorder.Body.Bytes()
+		assert.Contains(t, string(body), `"model":"gpt-4"`)
+		assert.NotContains(t, string(body), `"model":"claude-local"`)
+	})
+
+	t.Run("empty body handled gracefully", func(t *testing.T) {
+		recorder := httptest.NewRecorder()
+		wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+
+		wrapped.Header().Set("Content-Type", "application/json")
+		// Don't write anything
+
+		cleanup()
+
+		body := recorder.Body.Bytes()
+		assert.Empty(t, body)
+	})
+
+	t.Run("preserves other JSON fields", func(t *testing.T) {
+		recorder := httptest.NewRecorder()
+		wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+
+		response := []byte(`{"model":"claude-local","temperature":0.7,"usage":{"prompt_tokens":10}}`)
+		wrapped.Header().Set("Content-Type", "application/json")
+		_, err := wrapped.Write(response)
+		require.NoError(t, err)
+
+		cleanup()
+
+		body := recorder.Body.Bytes()
+		assert.Contains(t, string(body), `"temperature":0.7`)
+		assert.Contains(t, string(body), `"prompt_tokens":10`)
+	})
+}
+
+func TestResponseRewriter_ImplementsInterfaces(t *testing.T) {
+	rewriter := NewModelRewriter()
+	recorder := httptest.NewRecorder()
+	wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+	defer cleanup()
+
+	// Should implement http.ResponseWriter
+	assert.Implements(t, (*http.ResponseWriter)(nil), wrapped)
+
+	// Should preserve header access
+	wrapped.Header().Set("X-Custom", "value")
+	assert.Equal(t, "value", recorder.Header().Get("X-Custom"))
+
+	// Should write status
+	wrapped.WriteHeader(http.StatusCreated)
+	assert.Equal(t, http.StatusCreated, recorder.Code)
+}
+
+func TestResponseRewriter_Flush(t *testing.T) {
+	t.Run("flush writes buffered content", func(t *testing.T) {
+		rewriter := NewModelRewriter()
+		recorder := httptest.NewRecorder()
+		wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+
+		response := []byte(`{"model":"claude-local","content":"test"}`)
+		wrapped.Header().Set("Content-Type", "application/json")
+		wrapped.Write(response)
+
+		// Before cleanup, response should be empty (buffered)
+		assert.Empty(t, recorder.Body.Bytes())
+
+		// After cleanup, response should be written
+		cleanup()
+		assert.NotEmpty(t, recorder.Body.Bytes())
+	})
+
+	t.Run("multiple flush calls are safe", func(t *testing.T) {
+		rewriter := NewModelRewriter()
+		recorder := httptest.NewRecorder()
+		wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+
+		response := []byte(`{"model":"claude-local"}`)
+		wrapped.Header().Set("Content-Type", "application/json")
+		wrapped.Write(response)
+
+		// First cleanup
+		cleanup()
+		firstBody := recorder.Body.Bytes()
+
+		// Second cleanup should not write again
+		cleanup()
+		secondBody := recorder.Body.Bytes()
+
+		assert.Equal(t, firstBody, secondBody)
+	})
+}
+
+func TestResponseRewriter_StreamingWithDataLines(t *testing.T) {
+	rewriter := NewModelRewriter()
+	recorder := httptest.NewRecorder()
+	wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+
+	wrapped.Header().Set("Content-Type", "text/event-stream")
+
+	// SSE format with multiple data lines
+	chunk := []byte("data: {\"model\":\"claude-local\"}\n\ndata: {\"model\":\"claude-local\",\"done\":true}\n\n")
+	wrapped.Write(chunk)
+
+	cleanup()
+
+	body := recorder.Body.Bytes()
+	// Both data lines should have model rewritten
+	assert.Contains(t, string(body), `"model":"gpt-4"`)
+	assert.NotContains(t, string(body), `"model":"claude-local"`)
+}
+
+func TestModelRewriter_RoundTrip(t *testing.T) {
+	// Simulate a full request -> response cycle with model rewriting
+	rewriter := NewModelRewriter()
+
+	// Step 1: Rewrite request body
+	originalRequest := []byte(`{"model":"gpt-4","messages":[{"role":"user","content":"hello"}]}`)
+	rewrittenRequest, err := rewriter.RewriteRequestBody(originalRequest, "claude-local")
+	require.NoError(t, err)
+
+	// Verify request was rewritten
+	extractor := NewModelExtractor()
+	requestModel, _ := extractor.Extract(rewrittenRequest, nil)
+	assert.Equal(t, "claude-local", requestModel)
+
+	// Step 2: Simulate response with resolved model
+	recorder := httptest.NewRecorder()
+	wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+
+	response := []byte(`{"model":"claude-local","content":"Hello! How can I help?"}`)
+	wrapped.Header().Set("Content-Type", "application/json")
+	wrapped.Write(response)
+	cleanup()
+
+	// Verify response was rewritten back
+	body, _ := io.ReadAll(recorder.Result().Body)
+	responseModel, _ := extractor.Extract(body, nil)
+	assert.Equal(t, "gpt-4", responseModel)
+}
+
+func TestModelRewriter_NonJSONBody(t *testing.T) {
+	rewriter := NewModelRewriter()
+
+	// Binary/non-JSON body should be returned unchanged
+	body := []byte{0x00, 0x01, 0x02, 0x03}
+	result, err := rewriter.RewriteRequestBody(body, "gpt-4")
+	require.NoError(t, err)
+	assert.Equal(t, body, result)
+}
+
+func TestModelRewriter_InvalidJSON(t *testing.T) {
+	rewriter := NewModelRewriter()
+
+	// Invalid JSON without model field should be returned unchanged
+	body := []byte(`not valid json`)
+	result, err := rewriter.RewriteRequestBody(body, "gpt-4")
+	require.NoError(t, err)
+	assert.Equal(t, body, result)
+}
+
+func TestResponseRewriter_StatusCodePreserved(t *testing.T) {
+	rewriter := NewModelRewriter()
+	recorder := httptest.NewRecorder()
+	wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+
+	wrapped.WriteHeader(http.StatusAccepted)
+	wrapped.Write([]byte(`{"model":"claude-local"}`))
+	cleanup()
+
+	assert.Equal(t, http.StatusAccepted, recorder.Code)
+}
+
+func TestResponseRewriter_HeaderFlushed(t *testing.T) {
+	rewriter := NewModelRewriter()
+	recorder := httptest.NewRecorder()
+	wrapped, cleanup := rewriter.WrapResponseWriter(recorder, "gpt-4", "claude-local")
+
+	wrapped.Header().Set("Content-Type", "application/json")
+	wrapped.Header().Set("X-Request-ID", "abc123")
+	wrapped.Write([]byte(`{"model":"claude-local"}`))
+	cleanup()
+
+	result := recorder.Result()
+	assert.Equal(t, "application/json", result.Header.Get("Content-Type"))
+	assert.Equal(t, "abc123", result.Header.Get("X-Request-ID"))
+}
diff --git a/internal/routing/router.go b/internal/routing/router.go
new file mode 100644
index 0000000000..543c7ecf7d
--- /dev/null
+++ b/internal/routing/router.go
@@ -0,0 +1,317 @@
+package routing
+
+import (
+	"context"
+	"sort"
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+)
+
+// Router resolves models to provider candidates.
+type Router struct {
+	registry      *Registry
+	modelMappings map[string]string      // normalized from -> to
+	oauthAliases  map[string][]string    // normalized model -> []alias
+}
+
+// NewRouter creates a new router with the given configuration.
+func NewRouter(registry *Registry, cfg *config.Config) *Router {
+	r := &Router{
+		registry:      registry,
+		modelMappings: make(map[string]string),
+		oauthAliases:  make(map[string][]string),
+	}
+
+	if cfg != nil {
+		r.loadModelMappings(cfg.AmpCode.ModelMappings)
+		r.loadOAuthAliases(cfg.OAuthModelAlias)
+	}
+
+	return r
+}
+
+// LegacyRoutingDecision contains the resolved routing information.
+// Deprecated: Will be replaced by RoutingDecision from types.go in T-013.
+type LegacyRoutingDecision struct {
+	RequestedModel string              // Original model from request
+	ResolvedModel  string              // After model-mappings
+	Candidates     []ProviderCandidate // Ordered list of providers to try
+}
+
+// Resolve determines the routing decision for the requested model.
+// Deprecated: Will be updated to use RoutingRequest and return *RoutingDecision in T-013.
+func (r *Router) Resolve(requestedModel string) *LegacyRoutingDecision {
+	// 1. Extract thinking suffix
+	suffixResult := thinking.ParseSuffix(requestedModel)
+	baseModel := suffixResult.ModelName
+
+	// 2. Apply model-mappings
+	targetModel := r.applyMappings(baseModel)
+
+	// 3. Find primary providers
+	candidates := r.findCandidates(targetModel, suffixResult)
+
+	// 4. Add fallback aliases
+	for _, alias := range r.oauthAliases[strings.ToLower(targetModel)] {
+		candidates = append(candidates, r.findCandidates(alias, suffixResult)...)
+	}
+
+	// 5. Sort by priority
+	sort.Slice(candidates, func(i, j int) bool {
+		return candidates[i].Provider.Priority() < candidates[j].Provider.Priority()
+	})
+
+	return &LegacyRoutingDecision{
+		RequestedModel: requestedModel,
+		ResolvedModel:  targetModel,
+		Candidates:     candidates,
+	}
+}
+
+// ResolveV2 determines the routing decision for a routing request.
+// It uses the new RoutingRequest and RoutingDecision types.
+func (r *Router) ResolveV2(req RoutingRequest) *RoutingDecision {
+	// 1. Extract thinking suffix
+	suffixResult := thinking.ParseSuffix(req.RequestedModel)
+	baseModel := suffixResult.ModelName
+	thinkingSuffix := ""
+	if suffixResult.HasSuffix {
+		thinkingSuffix = "(" + suffixResult.RawSuffix + ")"
+	}
+
+	// 2. Check for local providers
+	localCandidates := r.findLocalCandidates(baseModel, suffixResult)
+
+	// 3. Apply model-mappings if needed
+	mappedModel := r.applyMappings(baseModel)
+	mappingCandidates := r.findLocalCandidates(mappedModel, suffixResult)
+
+	// 4. Determine route type based on preferences and availability
+	var decision *RoutingDecision
+
+	if req.ForceModelMapping && mappedModel != baseModel && len(mappingCandidates) > 0 {
+		// FORCE MODE: Use mapping even if local provider exists
+		decision = r.buildMappingDecision(req.RequestedModel, mappedModel, mappingCandidates, thinkingSuffix, mappingCandidates[1:])
+	} else if req.PreferLocalProvider && len(localCandidates) > 0 {
+		// DEFAULT MODE with local preference: Use local provider first
+		decision = r.buildLocalProviderDecision(req.RequestedModel, localCandidates, thinkingSuffix)
+	} else if len(localCandidates) > 0 {
+		// DEFAULT MODE: Local provider available
+		decision = r.buildLocalProviderDecision(req.RequestedModel, localCandidates, thinkingSuffix)
+	} else if mappedModel != baseModel && len(mappingCandidates) > 0 {
+		// DEFAULT MODE: No local provider, but mapping available
+		decision = r.buildMappingDecision(req.RequestedModel, mappedModel, mappingCandidates, thinkingSuffix, mappingCandidates[1:])
+	} else {
+		// No local provider, no mapping - use amp credits proxy
+		decision = &RoutingDecision{
+			RouteType:     RouteTypeAmpCredits,
+			ResolvedModel: req.RequestedModel,
+			ShouldProxy:   true,
+		}
+	}
+
+	return decision
+}
+
+// findLocalCandidates finds local provider candidates for a model.
+// If the internal registry is empty, it falls back to the global model registry.
+func (r *Router) findLocalCandidates(model string, suffixResult thinking.SuffixResult) []ProviderCandidate {
+	var candidates []ProviderCandidate
+
+	// Check internal registry first
+	registryProviders := r.registry.All()
+	if len(registryProviders) > 0 {
+		for _, p := range registryProviders {
+			if !p.SupportsModel(model) {
+				continue
+			}
+
+			// Apply thinking suffix if needed
+			actualModel := model
+			if suffixResult.HasSuffix && !thinking.ParseSuffix(model).HasSuffix {
+				actualModel = model + "(" + suffixResult.RawSuffix + ")"
+			}
+
+			if p.Available(actualModel) {
+				candidates = append(candidates, ProviderCandidate{
+					Provider: p,
+					Model:    actualModel,
+				})
+			}
+		}
+	} else {
+		// Fallback to global model registry (same logic as FallbackHandler)
+		// This ensures compatibility when the wrapper is initialized with an empty registry
+		providers := registry.GetGlobalRegistry().GetModelProviders(model)
+		if len(providers) > 0 {
+			actualModel := model
+			if suffixResult.HasSuffix && !thinking.ParseSuffix(model).HasSuffix {
+				actualModel = model + "(" + suffixResult.RawSuffix + ")"
+			}
+			// Create a synthetic provider candidate for each provider
+			for _, providerName := range providers {
+				candidates = append(candidates, ProviderCandidate{
+					Provider: &globalRegistryProvider{name: providerName, model: actualModel},
+					Model:    actualModel,
+				})
+			}
+		}
+	}
+
+	// Sort by priority
+	sort.Slice(candidates, func(i, j int) bool {
+		return candidates[i].Provider.Priority() < candidates[j].Provider.Priority()
+	})
+
+	return candidates
+}
+
+// globalRegistryProvider is a synthetic Provider implementation that wraps
+// a provider name from the global model registry. It is used only for routing
+// decisions when the internal registry is empty - actual execution goes through
+// the normal handler path, not through this provider's Execute methods.
+type globalRegistryProvider struct {
+	name  string
+	model string
+}
+
+func (p *globalRegistryProvider) Name() string                   { return p.name }
+func (p *globalRegistryProvider) Type() ProviderType             { return ProviderTypeOAuth }
+func (p *globalRegistryProvider) Priority() int                  { return 0 }
+func (p *globalRegistryProvider) SupportsModel(string) bool      { return true }
+func (p *globalRegistryProvider) Available(string) bool          { return true }
+
+// Execute is not used for globalRegistryProvider - routing wrapper calls the handler directly.
+func (p *globalRegistryProvider) Execute(ctx context.Context, model string, req executor.Request) (executor.Response, error) {
+	return executor.Response{}, nil
+}
+
+// ExecuteStream is not used for globalRegistryProvider - routing wrapper calls the handler directly.
+func (p *globalRegistryProvider) ExecuteStream(ctx context.Context, model string, req executor.Request) (<-chan executor.StreamChunk, error) {
+	return nil, nil
+}
+
+// buildLocalProviderDecision creates a decision for local provider routing.
+func (r *Router) buildLocalProviderDecision(requestedModel string, candidates []ProviderCandidate, thinkingSuffix string) *RoutingDecision {
+	resolvedModel := requestedModel
+	if thinkingSuffix != "" {
+		// Ensure thinking suffix is preserved
+		sr := thinking.ParseSuffix(requestedModel)
+		if !sr.HasSuffix {
+			resolvedModel = requestedModel + thinkingSuffix
+		}
+	}
+
+	var fallbackModels []string
+	if len(candidates) > 1 {
+		for _, c := range candidates[1:] {
+			fallbackModels = append(fallbackModels, c.Model)
+		}
+	}
+
+	return &RoutingDecision{
+		RouteType:      RouteTypeLocalProvider,
+		ResolvedModel:  resolvedModel,
+		ProviderName:   candidates[0].Provider.Name(),
+		FallbackModels: fallbackModels,
+		ShouldProxy:    false,
+	}
+}
+
+// buildMappingDecision creates a decision for model mapping routing.
+func (r *Router) buildMappingDecision(requestedModel, mappedModel string, candidates []ProviderCandidate, thinkingSuffix string, fallbackCandidates []ProviderCandidate) *RoutingDecision {
+	// Apply thinking suffix to resolved model if needed
+	resolvedModel := mappedModel
+	if thinkingSuffix != "" {
+		sr := thinking.ParseSuffix(mappedModel)
+		if !sr.HasSuffix {
+			resolvedModel = mappedModel + thinkingSuffix
+		}
+	}
+
+	var fallbackModels []string
+	for _, c := range fallbackCandidates {
+		fallbackModels = append(fallbackModels, c.Model)
+	}
+
+	// Also add oauth aliases as fallbacks
+	baseMapped := thinking.ParseSuffix(mappedModel).ModelName
+	for _, alias := range r.oauthAliases[strings.ToLower(baseMapped)] {
+		// Check if this alias has providers
+		aliasCandidates := r.findLocalCandidates(alias, thinking.SuffixResult{ModelName: alias})
+		for _, c := range aliasCandidates {
+			fallbackModels = append(fallbackModels, c.Model)
+		}
+	}
+
+	return &RoutingDecision{
+		RouteType:      RouteTypeModelMapping,
+		ResolvedModel:  resolvedModel,
+		ProviderName:   candidates[0].Provider.Name(),
+		FallbackModels: fallbackModels,
+		ShouldProxy:    false,
+	}
+}
+
+// applyMappings applies model-mappings configuration.
+func (r *Router) applyMappings(model string) string {
+	key := strings.ToLower(strings.TrimSpace(model))
+	if mapped, ok := r.modelMappings[key]; ok {
+		return mapped
+	}
+	return model
+}
+
+// findCandidates finds all provider candidates for a model.
+func (r *Router) findCandidates(model string, suffixResult thinking.SuffixResult) []ProviderCandidate {
+	var candidates []ProviderCandidate
+
+	for _, p := range r.registry.All() {
+		if !p.SupportsModel(model) {
+			continue
+		}
+
+		// Apply thinking suffix if needed
+		actualModel := model
+		if suffixResult.HasSuffix && !thinking.ParseSuffix(model).HasSuffix {
+			actualModel = model + "(" + suffixResult.RawSuffix + ")"
+		}
+
+		if p.Available(actualModel) {
+			candidates = append(candidates, ProviderCandidate{
+				Provider: p,
+				Model:    actualModel,
+			})
+		}
+	}
+
+	return candidates
+}
+
+// loadModelMappings loads model-mappings from config.
+func (r *Router) loadModelMappings(mappings []config.AmpModelMapping) {
+	for _, m := range mappings {
+		from := strings.ToLower(strings.TrimSpace(m.From))
+		to := strings.TrimSpace(m.To)
+		if from != "" && to != "" {
+			r.modelMappings[from] = to
+		}
+	}
+}
+
+// loadOAuthAliases loads oauth-model-alias from config.
+func (r *Router) loadOAuthAliases(aliases map[string][]config.OAuthModelAlias) {
+	for _, entries := range aliases {
+		for _, entry := range entries {
+			name := strings.ToLower(strings.TrimSpace(entry.Name))
+			alias := strings.TrimSpace(entry.Alias)
+			if name != "" && alias != "" && name != alias {
+				r.oauthAliases[name] = append(r.oauthAliases[name], alias)
+			}
+		}
+	}
+}
diff --git a/internal/routing/router_test.go b/internal/routing/router_test.go
new file mode 100644
index 0000000000..c3674d01be
--- /dev/null
+++ b/internal/routing/router_test.go
@@ -0,0 +1,202 @@
+package routing
+
+import (
+	"context"
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	globalRegistry "github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	"github.com/stretchr/testify/assert"
+)
+
+// mockProvider is a test double for Provider.
+type mockProvider struct {
+	name           string
+	providerType   ProviderType
+	supportsModels map[string]bool
+	available      bool
+	priority       int
+}
+
+func (m *mockProvider) Name() string                             { return m.name }
+func (m *mockProvider) Type() ProviderType                       { return m.providerType }
+func (m *mockProvider) SupportsModel(model string) bool          { return m.supportsModels[model] }
+func (m *mockProvider) Available(model string) bool              { return m.available }
+func (m *mockProvider) Priority() int                            { return m.priority }
+func (m *mockProvider) Execute(ctx context.Context, model string, req executor.Request) (executor.Response, error) {
+	return executor.Response{}, nil
+}
+func (m *mockProvider) ExecuteStream(ctx context.Context, model string, req executor.Request) (<-chan executor.StreamChunk, error) {
+	return nil, nil
+}
+
+func TestRouter_Resolve_ModelMappings(t *testing.T) {
+	registry := NewRegistry()
+	
+	// Add a provider
+	p := &mockProvider{
+		name:           "test-provider",
+		providerType:   ProviderTypeOAuth,
+		supportsModels: map[string]bool{"target-model": true},
+		available:      true,
+		priority:       1,
+	}
+	registry.Register(p)
+	
+	// Create router with model mapping
+	cfg := &config.Config{
+		AmpCode: config.AmpCode{
+			ModelMappings: []config.AmpModelMapping{
+				{From: "user-model", To: "target-model"},
+			},
+		},
+	}
+	router := NewRouter(registry, cfg)
+	
+	// Resolve
+	decision := router.Resolve("user-model")
+	
+	assert.Equal(t, "user-model", decision.RequestedModel)
+	assert.Equal(t, "target-model", decision.ResolvedModel)
+	assert.Len(t, decision.Candidates, 1)
+	assert.Equal(t, "target-model", decision.Candidates[0].Model)
+}
+
+func TestRouter_Resolve_OAuthAliases(t *testing.T) {
+	registry := NewRegistry()
+	
+	// Add providers
+	p1 := &mockProvider{
+		name:           "oauth-1",
+		providerType:   ProviderTypeOAuth,
+		supportsModels: map[string]bool{"primary-model": true},
+		available:      true,
+		priority:       1,
+	}
+	p2 := &mockProvider{
+		name:           "oauth-2",
+		providerType:   ProviderTypeOAuth,
+		supportsModels: map[string]bool{"fallback-model": true},
+		available:      true,
+		priority:       2,
+	}
+	registry.Register(p1)
+	registry.Register(p2)
+	
+	// Create router with oauth aliases
+	cfg := &config.Config{
+		OAuthModelAlias: map[string][]config.OAuthModelAlias{
+			"test-channel": {
+				{Name: "primary-model", Alias: "fallback-model"},
+			},
+		},
+	}
+	router := NewRouter(registry, cfg)
+	
+	// Resolve
+	decision := router.Resolve("primary-model")
+	
+	assert.Equal(t, "primary-model", decision.ResolvedModel)
+	assert.Len(t, decision.Candidates, 2)
+	// Primary should come first (lower priority value)
+	assert.Equal(t, "primary-model", decision.Candidates[0].Model)
+	assert.Equal(t, "fallback-model", decision.Candidates[1].Model)
+}
+
+func TestRouter_Resolve_NoProviders(t *testing.T) {
+	registry := NewRegistry()
+	cfg := &config.Config{}
+	router := NewRouter(registry, cfg)
+	
+	decision := router.Resolve("unknown-model")
+	
+	assert.Equal(t, "unknown-model", decision.ResolvedModel)
+	assert.Empty(t, decision.Candidates)
+}
+
+// === Global Registry Fallback Tests (T-027) ===
+// These tests verify that when the internal registry is empty,
+// the router falls back to the global model registry.
+// This is the core fix for the thinking signature 400 error.
+
+func TestRouter_GlobalRegistryFallback_LocalProvider(t *testing.T) {
+	// This test requires registering a model in the global registry.
+	// We use a model that's already registered via api-key config in production.
+	// For isolated testing, we can skip if global registry is not populated.
+	
+	globalReg := globalRegistry.GetGlobalRegistry()
+	modelCount := globalReg.GetModelCount("claude-sonnet-4-20250514")
+	
+	if modelCount == 0 {
+		t.Skip("Global registry not populated - run with server context")
+	}
+	
+	// Empty internal registry
+	emptyRegistry := NewRegistry()
+	cfg := &config.Config{}
+	router := NewRouter(emptyRegistry, cfg)
+	
+	req := RoutingRequest{
+		RequestedModel:      "claude-sonnet-4-20250514",
+		PreferLocalProvider: true,
+	}
+	decision := router.ResolveV2(req)
+	
+	// Should find provider from global registry
+	assert.Equal(t, RouteTypeLocalProvider, decision.RouteType)
+	assert.Equal(t, "claude-sonnet-4-20250514", decision.ResolvedModel)
+	assert.False(t, decision.ShouldProxy)
+}
+
+func TestRouter_GlobalRegistryFallback_ModelMapping(t *testing.T) {
+	// This test verifies that model mapping works with global registry fallback.
+	
+	globalReg := globalRegistry.GetGlobalRegistry()
+	modelCount := globalReg.GetModelCount("claude-opus-4-5-thinking")
+	
+	if modelCount == 0 {
+		t.Skip("Global registry not populated - run with server context")
+	}
+	
+	// Empty internal registry
+	emptyRegistry := NewRegistry()
+	cfg := &config.Config{
+		AmpCode: config.AmpCode{
+			ModelMappings: []config.AmpModelMapping{
+				{From: "claude-opus-4-5-20251101", To: "claude-opus-4-5-thinking"},
+			},
+		},
+	}
+	router := NewRouter(emptyRegistry, cfg)
+	
+	req := RoutingRequest{
+		RequestedModel:      "claude-opus-4-5-20251101",
+		PreferLocalProvider: true,
+	}
+	decision := router.ResolveV2(req)
+	
+	// Should find mapped model from global registry
+	assert.Equal(t, RouteTypeModelMapping, decision.RouteType)
+	assert.Equal(t, "claude-opus-4-5-thinking", decision.ResolvedModel)
+	assert.False(t, decision.ShouldProxy)
+}
+
+func TestRouter_GlobalRegistryFallback_AmpCreditsWhenNotFound(t *testing.T) {
+	// Empty internal registry
+	emptyRegistry := NewRegistry()
+	cfg := &config.Config{}
+	router := NewRouter(emptyRegistry, cfg)
+	
+	// Use a model that definitely doesn't exist anywhere
+	req := RoutingRequest{
+		RequestedModel:      "nonexistent-model-12345",
+		PreferLocalProvider: true,
+	}
+	decision := router.ResolveV2(req)
+	
+	// Should fall back to AMP credits proxy
+	assert.Equal(t, RouteTypeAmpCredits, decision.RouteType)
+	assert.Equal(t, "nonexistent-model-12345", decision.ResolvedModel)
+	assert.True(t, decision.ShouldProxy)
+}
diff --git a/internal/routing/router_v2_test.go b/internal/routing/router_v2_test.go
new file mode 100644
index 0000000000..903b7aa855
--- /dev/null
+++ b/internal/routing/router_v2_test.go
@@ -0,0 +1,245 @@
+package routing
+
+import (
+	"context"
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestRouter_DefaultMode_PrefersLocal(t *testing.T) {
+	// Setup: Create a router with a mock provider that supports "gpt-4"
+	registry := NewRegistry()
+	mockProvider := &MockProvider{
+		name:            "openai",
+		supportedModels: []string{"gpt-4"},
+		available:       true,
+		priority:        1,
+	}
+	registry.Register(mockProvider)
+
+	cfg := &config.Config{
+		AmpCode: config.AmpCode{
+			ModelMappings: []config.AmpModelMapping{
+				{From: "gpt-4", To: "claude-local"},
+			},
+		},
+	}
+
+	router := NewRouter(registry, cfg)
+
+	// Test: Request gpt-4 when local provider exists
+	req := RoutingRequest{
+		RequestedModel:      "gpt-4",
+		PreferLocalProvider: true,
+		ForceModelMapping:   false,
+	}
+
+	decision := router.ResolveV2(req)
+
+	// Assert: Should return LOCAL_PROVIDER, not MODEL_MAPPING
+	assert.Equal(t, RouteTypeLocalProvider, decision.RouteType)
+	assert.Equal(t, "gpt-4", decision.ResolvedModel)
+	assert.Equal(t, "openai", decision.ProviderName)
+	assert.False(t, decision.ShouldProxy)
+}
+
+func TestRouter_DefaultMode_MapsWhenNoLocal(t *testing.T) {
+	// Setup: Create a router with NO provider for "gpt-4" but a mapping to "claude-local"
+	// which has a provider
+	registry := NewRegistry()
+	mockProvider := &MockProvider{
+		name:            "anthropic",
+		supportedModels: []string{"claude-local"},
+		available:       true,
+		priority:        1,
+	}
+	registry.Register(mockProvider)
+
+	cfg := &config.Config{
+		AmpCode: config.AmpCode{
+			ModelMappings: []config.AmpModelMapping{
+				{From: "gpt-4", To: "claude-local"},
+			},
+		},
+	}
+
+	router := NewRouter(registry, cfg)
+
+	// Test: Request gpt-4 when no local provider exists, but mapping exists
+	req := RoutingRequest{
+		RequestedModel:      "gpt-4",
+		PreferLocalProvider: true,
+		ForceModelMapping:   false,
+	}
+
+	decision := router.ResolveV2(req)
+
+	// Assert: Should return MODEL_MAPPING
+	assert.Equal(t, RouteTypeModelMapping, decision.RouteType)
+	assert.Equal(t, "claude-local", decision.ResolvedModel)
+	assert.Equal(t, "anthropic", decision.ProviderName)
+	assert.False(t, decision.ShouldProxy)
+}
+
+func TestRouter_DefaultMode_AmpCreditsWhenNoLocalOrMapping(t *testing.T) {
+	// Setup: Create a router with no providers and no mappings
+	registry := NewRegistry()
+
+	cfg := &config.Config{
+		AmpCode: config.AmpCode{
+			ModelMappings: []config.AmpModelMapping{},
+		},
+	}
+
+	router := NewRouter(registry, cfg)
+
+	// Test: Request a model with no local provider and no mapping
+	req := RoutingRequest{
+		RequestedModel:      "unknown-model",
+		PreferLocalProvider: true,
+		ForceModelMapping:   false,
+	}
+
+	decision := router.ResolveV2(req)
+
+	// Assert: Should return AMP_CREDITS with ShouldProxy=true
+	assert.Equal(t, RouteTypeAmpCredits, decision.RouteType)
+	assert.Equal(t, "unknown-model", decision.ResolvedModel)
+	assert.True(t, decision.ShouldProxy)
+	assert.Empty(t, decision.ProviderName)
+}
+
+func TestRouter_ForceMode_MapsEvenWithLocal(t *testing.T) {
+	// Setup: Create a router with BOTH a local provider for "gpt-4" AND a mapping from "gpt-4" to "claude-local"
+	// The mapping target "claude-local" also has a provider
+	registry := NewRegistry()
+
+	// Local provider for gpt-4
+	openaiProvider := &MockProvider{
+		name:            "openai",
+		supportedModels: []string{"gpt-4"},
+		available:       true,
+		priority:        1,
+	}
+	registry.Register(openaiProvider)
+
+	// Local provider for the mapped model
+	anthropicProvider := &MockProvider{
+		name:            "anthropic",
+		supportedModels: []string{"claude-local"},
+		available:       true,
+		priority:        2,
+	}
+	registry.Register(anthropicProvider)
+
+	cfg := &config.Config{
+		AmpCode: config.AmpCode{
+			ModelMappings: []config.AmpModelMapping{
+				{From: "gpt-4", To: "claude-local"},
+			},
+		},
+	}
+
+	router := NewRouter(registry, cfg)
+
+	// Test: Request gpt-4 with ForceModelMapping=true
+	// Even though gpt-4 has a local provider, mapping should take precedence
+	req := RoutingRequest{
+		RequestedModel:      "gpt-4",
+		PreferLocalProvider: false,
+		ForceModelMapping:   true,
+	}
+
+	decision := router.ResolveV2(req)
+
+	// Assert: Should return MODEL_MAPPING, not LOCAL_PROVIDER
+	assert.Equal(t, RouteTypeModelMapping, decision.RouteType)
+	assert.Equal(t, "claude-local", decision.ResolvedModel)
+	assert.Equal(t, "anthropic", decision.ProviderName)
+	assert.False(t, decision.ShouldProxy)
+}
+
+func TestRouter_ThinkingSuffix_Preserved(t *testing.T) {
+	// Setup: Create a router with mapping and provider for mapped model
+	registry := NewRegistry()
+
+	mockProvider := &MockProvider{
+		name:            "anthropic",
+		supportedModels: []string{"claude-local"},
+		available:       true,
+		priority:        1,
+	}
+	registry.Register(mockProvider)
+
+	cfg := &config.Config{
+		AmpCode: config.AmpCode{
+			ModelMappings: []config.AmpModelMapping{
+				{From: "claude-3-5-sonnet", To: "claude-local"},
+			},
+		},
+	}
+
+	router := NewRouter(registry, cfg)
+
+	// Test: Request claude-3-5-sonnet with thinking suffix
+	req := RoutingRequest{
+		RequestedModel:      "claude-3-5-sonnet(thinking:foo)",
+		PreferLocalProvider: true,
+		ForceModelMapping:   false,
+	}
+
+	decision := router.ResolveV2(req)
+
+	// Assert: Thinking suffix should be preserved in resolved model
+	assert.Equal(t, RouteTypeModelMapping, decision.RouteType)
+	assert.Equal(t, "claude-local(thinking:foo)", decision.ResolvedModel)
+	assert.Equal(t, "anthropic", decision.ProviderName)
+}
+
+// MockProvider is a mock implementation of Provider for testing
+type MockProvider struct {
+	name            string
+	providerType    ProviderType
+	supportedModels []string
+	available       bool
+	priority        int
+}
+
+func (m *MockProvider) Name() string {
+	return m.name
+}
+
+func (m *MockProvider) Type() ProviderType {
+	if m.providerType == "" {
+		return ProviderTypeOAuth
+	}
+	return m.providerType
+}
+
+func (m *MockProvider) SupportsModel(model string) bool {
+	for _, supported := range m.supportedModels {
+		if supported == model {
+			return true
+		}
+	}
+	return false
+}
+
+func (m *MockProvider) Available(model string) bool {
+	return m.available
+}
+
+func (m *MockProvider) Priority() int {
+	return m.priority
+}
+
+func (m *MockProvider) Execute(ctx context.Context, model string, req executor.Request) (executor.Response, error) {
+	return executor.Response{}, nil
+}
+
+func (m *MockProvider) ExecuteStream(ctx context.Context, model string, req executor.Request) (<-chan executor.StreamChunk, error) {
+	return nil, nil
+}
diff --git a/internal/routing/testutil/fake_handler.go b/internal/routing/testutil/fake_handler.go
new file mode 100644
index 0000000000..160aaad8b1
--- /dev/null
+++ b/internal/routing/testutil/fake_handler.go
@@ -0,0 +1,113 @@
+package testutil
+
+import (
+	"io"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+)
+
+// FakeHandlerRecorder records handler invocations for testing.
+type FakeHandlerRecorder struct {
+	Called        bool
+	CallCount     int
+	RequestBody   []byte
+	RequestHeader http.Header
+	ContextKeys   map[string]interface{}
+	ResponseStatus int
+	ResponseBody  []byte
+}
+
+// NewFakeHandlerRecorder creates a new fake handler recorder.
+func NewFakeHandlerRecorder() *FakeHandlerRecorder {
+	return &FakeHandlerRecorder{
+		ContextKeys:    make(map[string]interface{}),
+		ResponseStatus: http.StatusOK,
+		ResponseBody:   []byte(`{"status":"handled"}`),
+	}
+}
+
+// GinHandler returns a gin.HandlerFunc that records the invocation.
+func (f *FakeHandlerRecorder) GinHandler() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		f.record(c)
+		c.Data(f.ResponseStatus, "application/json", f.ResponseBody)
+	}
+}
+
+// GinHandlerWithModel returns a gin.HandlerFunc that records the invocation and returns the model from context.
+// Useful for testing response rewriting in model mapping scenarios.
+func (f *FakeHandlerRecorder) GinHandlerWithModel() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		f.record(c)
+		// Return a response with the model field that would be in the actual API response
+		// If ResponseBody was explicitly set (not default), use that; otherwise generate from context
+		var body []byte
+		if mappedModel, exists := c.Get("mapped_model"); exists {
+			body = []byte(`{"model":"` + mappedModel.(string) + `","status":"handled"}`)
+		} else {
+			body = f.ResponseBody
+		}
+		c.Data(f.ResponseStatus, "application/json", body)
+	}
+}
+
+// HTTPHandler returns an http.HandlerFunc that records the invocation.
+func (f *FakeHandlerRecorder) HTTPHandler() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		body, _ := io.ReadAll(r.Body)
+		f.Called = true
+		f.CallCount++
+		f.RequestBody = body
+		f.RequestHeader = r.Header.Clone()
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(f.ResponseStatus)
+		w.Write(f.ResponseBody)
+	}
+}
+
+// record captures the request details from gin context.
+func (f *FakeHandlerRecorder) record(c *gin.Context) {
+	f.Called = true
+	f.CallCount++
+
+	body, _ := io.ReadAll(c.Request.Body)
+	f.RequestBody = body
+	f.RequestHeader = c.Request.Header.Clone()
+
+	// Capture common context keys used by routing
+	if val, exists := c.Get("mapped_model"); exists {
+		f.ContextKeys["mapped_model"] = val
+	}
+	if val, exists := c.Get("fallback_models"); exists {
+		f.ContextKeys["fallback_models"] = val
+	}
+	if val, exists := c.Get("route_type"); exists {
+		f.ContextKeys["route_type"] = val
+	}
+}
+
+// Reset clears the recorder state.
+func (f *FakeHandlerRecorder) Reset() {
+	f.Called = false
+	f.CallCount = 0
+	f.RequestBody = nil
+	f.RequestHeader = nil
+	f.ContextKeys = make(map[string]interface{})
+}
+
+// GetContextKey returns a captured context key value.
+func (f *FakeHandlerRecorder) GetContextKey(key string) (interface{}, bool) {
+	val, ok := f.ContextKeys[key]
+	return val, ok
+}
+
+// WasCalled returns true if the handler was called.
+func (f *FakeHandlerRecorder) WasCalled() bool {
+	return f.Called
+}
+
+// GetCallCount returns the number of times the handler was called.
+func (f *FakeHandlerRecorder) GetCallCount() int {
+	return f.CallCount
+}
diff --git a/internal/routing/testutil/fake_proxy.go b/internal/routing/testutil/fake_proxy.go
new file mode 100644
index 0000000000..3deea5a546
--- /dev/null
+++ b/internal/routing/testutil/fake_proxy.go
@@ -0,0 +1,83 @@
+package testutil
+
+import (
+	"io"
+	"net/http"
+	"net/http/httptest"
+)
+
+// CloseNotifierRecorder wraps httptest.ResponseRecorder with CloseNotify support.
+// This is needed because ReverseProxy requires http.CloseNotifier.
+type CloseNotifierRecorder struct {
+	*httptest.ResponseRecorder
+	closeChan chan bool
+}
+
+// NewCloseNotifierRecorder creates a ResponseRecorder that implements CloseNotifier.
+func NewCloseNotifierRecorder() *CloseNotifierRecorder {
+	return &CloseNotifierRecorder{
+		ResponseRecorder: httptest.NewRecorder(),
+		closeChan:        make(chan bool, 1),
+	}
+}
+
+// CloseNotify implements http.CloseNotifier.
+func (c *CloseNotifierRecorder) CloseNotify() <-chan bool {
+	return c.closeChan
+}
+
+// FakeProxyRecorder records proxy invocations for testing.
+type FakeProxyRecorder struct {
+	Called         bool
+	CallCount      int
+	RequestBody    []byte
+	RequestHeaders http.Header
+	ResponseStatus int
+	ResponseBody   []byte
+}
+
+// NewFakeProxyRecorder creates a new fake proxy recorder.
+func NewFakeProxyRecorder() *FakeProxyRecorder {
+	return &FakeProxyRecorder{
+		ResponseStatus: http.StatusOK,
+		ResponseBody:   []byte(`{"status":"proxied"}`),
+	}
+}
+
+// ServeHTTP implements http.Handler to act as a reverse proxy.
+func (f *FakeProxyRecorder) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+	f.Called = true
+	f.CallCount++
+	f.RequestHeaders = r.Header.Clone()
+
+	body, err := io.ReadAll(r.Body)
+	if err == nil {
+		f.RequestBody = body
+	}
+
+	w.WriteHeader(f.ResponseStatus)
+	w.Write(f.ResponseBody)
+}
+
+// GetCallCount returns the number of times the proxy was called.
+func (f *FakeProxyRecorder) GetCallCount() int {
+	return f.CallCount
+}
+
+// Reset clears the recorder state.
+func (f *FakeProxyRecorder) Reset() {
+	f.Called = false
+	f.CallCount = 0
+	f.RequestBody = nil
+	f.RequestHeaders = nil
+}
+
+// ToHandler returns the recorder as an http.Handler for use with httptest.
+func (f *FakeProxyRecorder) ToHandler() http.Handler {
+	return http.HandlerFunc(f.ServeHTTP)
+}
+
+// CreateTestServer creates an httptest server with this fake proxy.
+func (f *FakeProxyRecorder) CreateTestServer() *httptest.Server {
+	return httptest.NewServer(f.ToHandler())
+}
diff --git a/internal/routing/types.go b/internal/routing/types.go
new file mode 100644
index 0000000000..30c5061005
--- /dev/null
+++ b/internal/routing/types.go
@@ -0,0 +1,62 @@
+package routing
+
+// RouteType represents the type of routing decision made for a request.
+type RouteType string
+
+const (
+	// RouteTypeLocalProvider indicates the request is handled by a local OAuth provider (free).
+	RouteTypeLocalProvider RouteType = "LOCAL_PROVIDER"
+	// RouteTypeModelMapping indicates the request was remapped to another available model (free).
+	RouteTypeModelMapping RouteType = "MODEL_MAPPING"
+	// RouteTypeAmpCredits indicates the request is forwarded to ampcode.com (uses Amp credits).
+	RouteTypeAmpCredits RouteType = "AMP_CREDITS"
+	// RouteTypeNoProvider indicates no provider or fallback available.
+	RouteTypeNoProvider RouteType = "NO_PROVIDER"
+)
+
+// RoutingRequest contains the information needed to make a routing decision.
+type RoutingRequest struct {
+	// RequestedModel is the model name from the incoming request.
+	RequestedModel string
+	// PreferLocalProvider indicates whether to prefer local providers over mappings.
+	// When true, check local providers first before applying model mappings.
+	PreferLocalProvider bool
+	// ForceModelMapping indicates whether to force model mapping even if local provider exists.
+	// When true, apply model mappings first and skip local provider checks.
+	ForceModelMapping bool
+}
+
+// RoutingDecision contains the result of a routing decision.
+type RoutingDecision struct {
+	// RouteType indicates the type of routing decision.
+	RouteType RouteType
+	// ResolvedModel is the final model name after any mappings.
+	ResolvedModel string
+	// ProviderName is the name of the selected provider (if any).
+	ProviderName string
+	// FallbackModels is a list of alternative models to try if the primary fails.
+	FallbackModels []string
+	// ShouldProxy indicates whether the request should be proxied to ampcode.com.
+	ShouldProxy bool
+}
+
+// NewRoutingDecision creates a new RoutingDecision with the given parameters.
+func NewRoutingDecision(routeType RouteType, resolvedModel, providerName string, fallbackModels []string, shouldProxy bool) *RoutingDecision {
+	return &RoutingDecision{
+		RouteType:      routeType,
+		ResolvedModel:  resolvedModel,
+		ProviderName:   providerName,
+		FallbackModels: fallbackModels,
+		ShouldProxy:    shouldProxy,
+	}
+}
+
+// IsLocal returns true if the decision routes to a local provider.
+func (d *RoutingDecision) IsLocal() bool {
+	return d.RouteType == RouteTypeLocalProvider || d.RouteType == RouteTypeModelMapping
+}
+
+// HasFallbacks returns true if there are fallback models available.
+func (d *RoutingDecision) HasFallbacks() bool {
+	return len(d.FallbackModels) > 0
+}
diff --git a/internal/routing/wrapper.go b/internal/routing/wrapper.go
new file mode 100644
index 0000000000..90d10eea08
--- /dev/null
+++ b/internal/routing/wrapper.go
@@ -0,0 +1,270 @@
+package routing
+
+import (
+	"bufio"
+	"bytes"
+	"io"
+	"net"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/routing/ctxkeys"
+	"github.com/sirupsen/logrus"
+)
+
+// ProxyFunc is the function type for proxying requests.
+type ProxyFunc func(c *gin.Context)
+
+// ModelRoutingWrapper wraps HTTP handlers with unified model routing logic.
+// It replaces the FallbackHandler logic with a Router-based approach.
+type ModelRoutingWrapper struct {
+	router    *Router
+	extractor ModelExtractor
+	rewriter  ModelRewriter
+	proxyFunc ProxyFunc
+	logger    *logrus.Logger
+}
+
+// NewModelRoutingWrapper creates a new ModelRoutingWrapper with the given dependencies.
+// If extractor is nil, a DefaultModelExtractor is used.
+// If rewriter is nil, a DefaultModelRewriter is used.
+// proxyFunc is called for AMP_CREDITS route type; if nil, the handler will be called instead.
+func NewModelRoutingWrapper(router *Router, extractor ModelExtractor, rewriter ModelRewriter, proxyFunc ProxyFunc) *ModelRoutingWrapper {
+	if extractor == nil {
+		extractor = NewModelExtractor()
+	}
+	if rewriter == nil {
+		rewriter = NewModelRewriter()
+	}
+	return &ModelRoutingWrapper{
+		router:    router,
+		extractor: extractor,
+		rewriter:  rewriter,
+		proxyFunc: proxyFunc,
+		logger:    logrus.New(),
+	}
+}
+
+// SetLogger sets the logger for the wrapper.
+func (w *ModelRoutingWrapper) SetLogger(logger *logrus.Logger) {
+	w.logger = logger
+}
+
+// Wrap wraps a gin.HandlerFunc with model routing logic.
+// The returned handler will:
+// 1. Extract the model from the request
+// 2. Get a routing decision from the Router
+// 3. Handle the request according to the decision type (LOCAL_PROVIDER, MODEL_MAPPING, AMP_CREDITS)
+func (w *ModelRoutingWrapper) Wrap(handler gin.HandlerFunc) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		// Read request body
+		bodyBytes, err := io.ReadAll(c.Request.Body)
+		if err != nil {
+			w.logger.Errorf("routing wrapper: failed to read request body: %v", err)
+			handler(c)
+			return
+		}
+
+		// Extract model from request
+		ginParams := map[string]string{
+			"action": c.Param("action"),
+			"path":   c.Param("path"),
+		}
+		modelName, err := w.extractor.Extract(bodyBytes, ginParams)
+		if err != nil {
+			w.logger.Warnf("routing wrapper: failed to extract model: %v", err)
+			c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
+			handler(c)
+			return
+		}
+
+		if modelName == "" {
+			// No model found, proceed with original handler
+			c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
+			handler(c)
+			return
+		}
+
+		// Get routing decision
+		req := RoutingRequest{
+			RequestedModel:      modelName,
+			PreferLocalProvider: true,
+			ForceModelMapping:   false, // TODO: Get from config
+		}
+		decision := w.router.ResolveV2(req)
+
+		// Store decision in context for downstream handlers
+		c.Set(string(ctxkeys.RoutingDecision), decision)
+
+		// Handle based on route type
+		switch decision.RouteType {
+		case RouteTypeLocalProvider:
+			w.handleLocalProvider(c, handler, bodyBytes, decision)
+		case RouteTypeModelMapping:
+			w.handleModelMapping(c, handler, bodyBytes, decision)
+		case RouteTypeAmpCredits:
+			w.handleAmpCredits(c, handler, bodyBytes)
+		default:
+			// No provider available
+			c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
+			handler(c)
+		}
+	}
+}
+
+// handleLocalProvider handles the LOCAL_PROVIDER route type.
+func (w *ModelRoutingWrapper) handleLocalProvider(c *gin.Context, handler gin.HandlerFunc, bodyBytes []byte, decision *RoutingDecision) {
+	// Filter Anthropic-Beta header for local provider
+	filterAnthropicBetaHeader(c)
+
+	// Restore body with original content
+	c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
+
+	// Call handler
+	handler(c)
+}
+
+// handleModelMapping handles the MODEL_MAPPING route type.
+func (w *ModelRoutingWrapper) handleModelMapping(c *gin.Context, handler gin.HandlerFunc, bodyBytes []byte, decision *RoutingDecision) {
+	// Rewrite request body with mapped model
+	rewrittenBody, err := w.rewriter.RewriteRequestBody(bodyBytes, decision.ResolvedModel)
+	if err != nil {
+		w.logger.Warnf("routing wrapper: failed to rewrite request body: %v", err)
+		rewrittenBody = bodyBytes
+	}
+	_ = rewrittenBody
+
+	// Store mapped model in context
+	c.Set(string(ctxkeys.MappedModel), decision.ResolvedModel)
+
+	// Store fallback models in context if present
+	if len(decision.FallbackModels) > 0 {
+		c.Set(string(ctxkeys.FallbackModels), decision.FallbackModels)
+	}
+
+	// Filter Anthropic-Beta header for local provider
+	filterAnthropicBetaHeader(c)
+
+	// Restore body with rewritten content
+	c.Request.Body = io.NopCloser(bytes.NewReader(rewrittenBody))
+
+	// Wrap response writer to rewrite model back
+	wrappedWriter, cleanup := w.rewriter.WrapResponseWriter(c.Writer, decision.ResolvedModel, decision.ResolvedModel)
+	c.Writer = &ginResponseWriterAdapter{ResponseWriter: wrappedWriter, original: c.Writer}
+
+	// Call handler
+	handler(c)
+
+	// Cleanup (flush response rewriting)
+	cleanup()
+}
+
+// handleAmpCredits handles the AMP_CREDITS route type.
+// It calls the proxy function directly if available, otherwise passes to handler.
+// Does NOT filter headers or rewrite body - proxy handles everything.
+func (w *ModelRoutingWrapper) handleAmpCredits(c *gin.Context, handler gin.HandlerFunc, bodyBytes []byte) {
+	// Restore body with original content (no rewriting for proxy)
+	c.Request.Body = io.NopCloser(bytes.NewReader(bodyBytes))
+
+	// Call proxy function if available, otherwise fall back to handler
+	if w.proxyFunc != nil {
+		w.proxyFunc(c)
+	} else {
+		handler(c)
+	}
+}
+
+// filterAnthropicBetaHeader filters Anthropic-Beta header for local providers.
+func filterAnthropicBetaHeader(c *gin.Context) {
+	if betaHeader := c.Request.Header.Get("Anthropic-Beta"); betaHeader != "" {
+		filtered := filterBetaFeatures(betaHeader, "context-1m-2025-08-07")
+		if filtered != "" {
+			c.Request.Header.Set("Anthropic-Beta", filtered)
+		} else {
+			c.Request.Header.Del("Anthropic-Beta")
+		}
+	}
+}
+
+// filterBetaFeatures removes specified beta features from the header.
+func filterBetaFeatures(betaHeader, featureToRemove string) string {
+	// Simple implementation - can be enhanced
+	if betaHeader == featureToRemove {
+		return ""
+	}
+	return betaHeader
+}
+
+// ginResponseWriterAdapter adapts http.ResponseWriter to gin.ResponseWriter.
+type ginResponseWriterAdapter struct {
+	http.ResponseWriter
+	original gin.ResponseWriter
+}
+
+func (a *ginResponseWriterAdapter) WriteHeader(code int) {
+	a.ResponseWriter.WriteHeader(code)
+}
+
+func (a *ginResponseWriterAdapter) Write(data []byte) (int, error) {
+	return a.ResponseWriter.Write(data)
+}
+
+func (a *ginResponseWriterAdapter) Header() http.Header {
+	return a.ResponseWriter.Header()
+}
+
+// CloseNotify implements http.CloseNotifier.
+func (a *ginResponseWriterAdapter) CloseNotify() <-chan bool {
+	if notifier, ok := a.ResponseWriter.(http.CloseNotifier); ok {
+		return notifier.CloseNotify()
+	}
+	return a.original.CloseNotify()
+}
+
+// Flush implements http.Flusher.
+func (a *ginResponseWriterAdapter) Flush() {
+	if flusher, ok := a.ResponseWriter.(http.Flusher); ok {
+		flusher.Flush()
+	}
+}
+
+// Hijack implements http.Hijacker.
+func (a *ginResponseWriterAdapter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
+	if hijacker, ok := a.ResponseWriter.(http.Hijacker); ok {
+		return hijacker.Hijack()
+	}
+	return a.original.Hijack()
+}
+
+// Status returns the HTTP status code.
+func (a *ginResponseWriterAdapter) Status() int {
+	return a.original.Status()
+}
+
+// Size returns the number of bytes already written into the response http body.
+func (a *ginResponseWriterAdapter) Size() int {
+	return a.original.Size()
+}
+
+// Written returns whether or not the response for this context has been written.
+func (a *ginResponseWriterAdapter) Written() bool {
+	return a.original.Written()
+}
+
+// WriteHeaderNow forces WriteHeader to be called.
+func (a *ginResponseWriterAdapter) WriteHeaderNow() {
+	a.original.WriteHeaderNow()
+}
+
+// WriteString writes the given string into the response body.
+func (a *ginResponseWriterAdapter) WriteString(s string) (int, error) {
+	return a.Write([]byte(s))
+}
+
+// Pusher returns the http.Pusher for server push.
+func (a *ginResponseWriterAdapter) Pusher() http.Pusher {
+	if pusher, ok := a.ResponseWriter.(http.Pusher); ok {
+		return pusher
+	}
+	return nil
+}
diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go
index b1e23860cf..317090d058 100644
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -141,7 +141,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 		URL:       endpoint,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
-		Body:      body.payload,
+		Body:      bytes.Clone(body.payload),
 		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
@@ -156,20 +156,20 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
 	}
 	recordAPIResponseMetadata(ctx, e.cfg, wsResp.Status, wsResp.Headers.Clone())
 	if len(wsResp.Body) > 0 {
-		appendAPIResponseChunk(ctx, e.cfg, wsResp.Body)
+		appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(wsResp.Body))
 	}
 	if wsResp.Status < 200 || wsResp.Status >= 300 {
 		return resp, statusErr{code: wsResp.Status, msg: string(wsResp.Body)}
 	}
 	reporter.publish(ctx, parseGeminiUsage(wsResp.Body))
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, wsResp.Body, &param)
-	resp = cliproxyexecutor.Response{Payload: ensureColonSpacedJSON([]byte(out)), Headers: wsResp.Headers.Clone()}
+	out := sdktranslator.TranslateNonStream(ctx, body.toFormat, opts.SourceFormat, req.Model, bytes.Clone(opts.OriginalRequest), bytes.Clone(translatedReq), bytes.Clone(wsResp.Body), &param)
+	resp = cliproxyexecutor.Response{Payload: ensureColonSpacedJSON([]byte(out))}
 	return resp, nil
 }
 
 // ExecuteStream performs a streaming request to the AI Studio API.
-func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
+func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	if opts.Alt == "responses/compact" {
 		return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
 	}
@@ -199,7 +199,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		URL:       endpoint,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
-		Body:      body.payload,
+		Body:      bytes.Clone(body.payload),
 		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
@@ -225,7 +225,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		}
 		var body bytes.Buffer
 		if len(firstEvent.Payload) > 0 {
-			appendAPIResponseChunk(ctx, e.cfg, firstEvent.Payload)
+			appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(firstEvent.Payload))
 			body.Write(firstEvent.Payload)
 		}
 		if firstEvent.Type == wsrelay.MessageTypeStreamEnd {
@@ -244,7 +244,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 				metadataLogged = true
 			}
 			if len(event.Payload) > 0 {
-				appendAPIResponseChunk(ctx, e.cfg, event.Payload)
+				appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(event.Payload))
 				body.Write(event.Payload)
 			}
 			if event.Type == wsrelay.MessageTypeStreamEnd {
@@ -254,6 +254,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 		return nil, statusErr{code: firstEvent.Status, msg: body.String()}
 	}
 	out := make(chan cliproxyexecutor.StreamChunk)
+	stream = out
 	go func(first wsrelay.StreamEvent) {
 		defer close(out)
 		var param any
@@ -273,12 +274,12 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 				}
 			case wsrelay.MessageTypeStreamChunk:
 				if len(event.Payload) > 0 {
-					appendAPIResponseChunk(ctx, e.cfg, event.Payload)
+					appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(event.Payload))
 					filtered := FilterSSEUsageMetadata(event.Payload)
 					if detail, ok := parseGeminiStreamUsage(filtered); ok {
 						reporter.publish(ctx, detail)
 					}
-					lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, filtered, &param)
+					lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, bytes.Clone(opts.OriginalRequest), translatedReq, bytes.Clone(filtered), &param)
 					for i := range lines {
 						out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON([]byte(lines[i]))}
 					}
@@ -292,9 +293,9 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 					metadataLogged = true
 				}
 				if len(event.Payload) > 0 {
-					appendAPIResponseChunk(ctx, e.cfg, event.Payload)
+					appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(event.Payload))
 				}
-				lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, opts.OriginalRequest, translatedReq, event.Payload, &param)
+				lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, bytes.Clone(opts.OriginalRequest), translatedReq, bytes.Clone(event.Payload), &param)
 				for i := range lines {
 					out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON([]byte(lines[i]))}
 				}
@@ -317,7 +318,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
 			}
 		}
 	}(firstEvent)
-	return &cliproxyexecutor.StreamResult{Headers: firstEvent.Headers.Clone(), Chunks: out}, nil
+	return stream, nil
 }
 
 // CountTokens counts tokens for the given request using the AI Studio API.
@@ -349,7 +350,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 		URL:       endpoint,
 		Method:    http.MethodPost,
 		Headers:   wsReq.Headers.Clone(),
-		Body:      body.payload,
+		Body:      bytes.Clone(body.payload),
 		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
@@ -363,7 +364,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 	}
 	recordAPIResponseMetadata(ctx, e.cfg, resp.Status, resp.Headers.Clone())
 	if len(resp.Body) > 0 {
-		appendAPIResponseChunk(ctx, e.cfg, resp.Body)
+		appendAPIResponseChunk(ctx, e.cfg, bytes.Clone(resp.Body))
 	}
 	if resp.Status < 200 || resp.Status >= 300 {
 		return cliproxyexecutor.Response{}, statusErr{code: resp.Status, msg: string(resp.Body)}
@@ -372,7 +373,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A
 	if totalTokens <= 0 {
 		return cliproxyexecutor.Response{}, fmt.Errorf("wsrelay: totalTokens missing in response")
 	}
-	translated := sdktranslator.TranslateTokenCount(ctx, body.toFormat, opts.SourceFormat, totalTokens, resp.Body)
+	translated := sdktranslator.TranslateTokenCount(ctx, body.toFormat, opts.SourceFormat, totalTokens, bytes.Clone(resp.Body))
 	return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
 }
 
@@ -392,13 +393,12 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
-	payload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, stream)
+	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
 	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
 		return nil, translatedPayload{}, err
diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index 652cb472a0..b4ca327545 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -133,13 +133,12 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
 
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -231,8 +230,8 @@ attemptLoop:
 
 			reporter.publish(ctx, parseAntigravityUsage(bodyBytes))
 			var param any
-			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, bodyBytes, &param)
-			resp = cliproxyexecutor.Response{Payload: []byte(converted), Headers: httpResp.Header.Clone()}
+			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bodyBytes, &param)
+			resp = cliproxyexecutor.Response{Payload: []byte(converted)}
 			reporter.ensurePublished(ctx)
 			return resp, nil
 		}
@@ -275,13 +274,12 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
 
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
 	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -435,8 +433,8 @@ attemptLoop:
 
 			reporter.publish(ctx, parseAntigravityUsage(resp.Payload))
 			var param any
-			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, resp.Payload, &param)
-			resp = cliproxyexecutor.Response{Payload: []byte(converted), Headers: httpResp.Header.Clone()}
+			converted := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, resp.Payload, &param)
+			resp = cliproxyexecutor.Response{Payload: []byte(converted)}
 			reporter.ensurePublished(ctx)
 
 			return resp, nil
@@ -645,7 +643,7 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte {
 }
 
 // ExecuteStream performs a streaming request to the Antigravity API.
-func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
+func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	if opts.Alt == "responses/compact" {
 		return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
 	}
@@ -667,13 +665,12 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("antigravity")
 
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
 	translated, err = thinking.ApplyThinking(translated, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -775,6 +772,7 @@ attemptLoop:
 			}
 
 			out := make(chan cliproxyexecutor.StreamChunk)
+			stream = out
 			go func(resp *http.Response) {
 				defer close(out)
 				defer func() {
@@ -802,12 +800,12 @@ attemptLoop:
 						reporter.publish(ctx, detail)
 					}
 
-					chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, bytes.Clone(payload), &param)
+					chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(payload), &param)
 					for i := range chunks {
 						out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 					}
 				}
-				tail := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, []byte("[DONE]"), &param)
+				tail := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, []byte("[DONE]"), &param)
 				for i := range tail {
 					out <- cliproxyexecutor.StreamChunk{Payload: []byte(tail[i])}
 				}
@@ -819,7 +817,7 @@ attemptLoop:
 					reporter.ensurePublished(ctx)
 				}
 			}(httpResp)
-			return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
+			return stream, nil
 		}
 
 		switch {
@@ -874,7 +872,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 
 	// Prepare payload once (doesn't depend on baseURL)
-	payload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	payload, err := thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -967,7 +965,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 		if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices {
 			count := gjson.GetBytes(bodyBytes, "totalTokens").Int()
 			translated := sdktranslator.TranslateTokenCount(respCtx, to, from, count, bodyBytes)
-			return cliproxyexecutor.Response{Payload: []byte(translated), Headers: httpResp.Header.Clone()}, nil
+			return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
 		}
 
 		lastStatus = httpResp.StatusCode
@@ -1006,12 +1004,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *config.Config) []*registry.ModelInfo {
 	exec := &AntigravityExecutor{cfg: cfg}
 	token, updatedAuth, errToken := exec.ensureAccessToken(ctx, auth)
-	if errToken != nil {
-		log.Warnf("antigravity executor: fetch models failed for %s: token error: %v", auth.ID, errToken)
-		return nil
-	}
-	if token == "" {
-		log.Warnf("antigravity executor: fetch models failed for %s: got empty token", auth.ID)
+	if errToken != nil || token == "" {
 		return nil
 	}
 	if updatedAuth != nil {
@@ -1025,7 +1018,6 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 		modelsURL := baseURL + antigravityModelsPath
 		httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, modelsURL, bytes.NewReader([]byte(`{}`)))
 		if errReq != nil {
-			log.Warnf("antigravity executor: fetch models failed for %s: create request error: %v", auth.ID, errReq)
 			return nil
 		}
 		httpReq.Header.Set("Content-Type", "application/json")
@@ -1038,14 +1030,12 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 		httpResp, errDo := httpClient.Do(httpReq)
 		if errDo != nil {
 			if errors.Is(errDo, context.Canceled) || errors.Is(errDo, context.DeadlineExceeded) {
-				log.Warnf("antigravity executor: fetch models failed for %s: context canceled: %v", auth.ID, errDo)
 				return nil
 			}
 			if idx+1 < len(baseURLs) {
 				log.Debugf("antigravity executor: models request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
 				continue
 			}
-			log.Warnf("antigravity executor: fetch models failed for %s: request error: %v", auth.ID, errDo)
 			return nil
 		}
 
@@ -1058,7 +1048,6 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 				log.Debugf("antigravity executor: models read error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
 				continue
 			}
-			log.Warnf("antigravity executor: fetch models failed for %s: read body error: %v", auth.ID, errRead)
 			return nil
 		}
 		if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
@@ -1066,13 +1055,11 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 				log.Debugf("antigravity executor: models request rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
 				continue
 			}
-			log.Warnf("antigravity executor: fetch models failed for %s: unexpected status %d, body: %s", auth.ID, httpResp.StatusCode, string(bodyBytes))
 			return nil
 		}
 
 		result := gjson.GetBytes(bodyBytes, "models")
 		if !result.Exists() {
-			log.Warnf("antigravity executor: fetch models failed for %s: no models field in response, body: %s", auth.ID, string(bodyBytes))
 			return nil
 		}
 
@@ -1293,40 +1280,51 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 	payload = geminiToAntigravity(modelName, payload, projectID)
 	payload, _ = sjson.SetBytes(payload, "model", modelName)
 
-	useAntigravitySchema := strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high")
-	payloadStr := string(payload)
-	paths := make([]string, 0)
-	util.Walk(gjson.Parse(payloadStr), "", "parametersJsonSchema", &paths)
-	for _, p := range paths {
-		payloadStr, _ = util.RenameKey(payloadStr, p, p[:len(p)-len("parametersJsonSchema")]+"parameters")
-	}
+	if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high") {
+		strJSON := string(payload)
+		paths := make([]string, 0)
+		util.Walk(gjson.ParseBytes(payload), "", "parametersJsonSchema", &paths)
+		for _, p := range paths {
+			strJSON, _ = util.RenameKey(strJSON, p, p[:len(p)-len("parametersJsonSchema")]+"parameters")
+		}
 
-	if useAntigravitySchema {
-		payloadStr = util.CleanJSONSchemaForAntigravity(payloadStr)
+		// Use the centralized schema cleaner to handle unsupported keywords,
+		// const->enum conversion, and flattening of types/anyOf.
+		strJSON = util.CleanJSONSchemaForAntigravity(strJSON)
+		payload = []byte(strJSON)
 	} else {
-		payloadStr = util.CleanJSONSchemaForGemini(payloadStr)
+		strJSON := string(payload)
+		paths := make([]string, 0)
+		util.Walk(gjson.Parse(strJSON), "", "parametersJsonSchema", &paths)
+		for _, p := range paths {
+			strJSON, _ = util.RenameKey(strJSON, p, p[:len(p)-len("parametersJsonSchema")]+"parameters")
+		}
+		// Clean tool schemas for Gemini to remove unsupported JSON Schema keywords
+		// without adding empty-schema placeholders.
+		strJSON = util.CleanJSONSchemaForGemini(strJSON)
+		payload = []byte(strJSON)
 	}
 
-	if useAntigravitySchema {
-		systemInstructionPartsResult := gjson.Get(payloadStr, "request.systemInstruction.parts")
-		payloadStr, _ = sjson.Set(payloadStr, "request.systemInstruction.role", "user")
-		payloadStr, _ = sjson.Set(payloadStr, "request.systemInstruction.parts.0.text", systemInstruction)
-		payloadStr, _ = sjson.Set(payloadStr, "request.systemInstruction.parts.1.text", fmt.Sprintf("Please ignore following [ignore]%s[/ignore]", systemInstruction))
+	if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high") {
+		systemInstructionPartsResult := gjson.GetBytes(payload, "request.systemInstruction.parts")
+		payload, _ = sjson.SetBytes(payload, "request.systemInstruction.role", "user")
+		payload, _ = sjson.SetBytes(payload, "request.systemInstruction.parts.0.text", systemInstruction)
+		payload, _ = sjson.SetBytes(payload, "request.systemInstruction.parts.1.text", fmt.Sprintf("Please ignore following [ignore]%s[/ignore]", systemInstruction))
 
 		if systemInstructionPartsResult.Exists() && systemInstructionPartsResult.IsArray() {
 			for _, partResult := range systemInstructionPartsResult.Array() {
-				payloadStr, _ = sjson.SetRaw(payloadStr, "request.systemInstruction.parts.-1", partResult.Raw)
+				payload, _ = sjson.SetRawBytes(payload, "request.systemInstruction.parts.-1", []byte(partResult.Raw))
 			}
 		}
 	}
 
 	if strings.Contains(modelName, "claude") {
-		payloadStr, _ = sjson.Set(payloadStr, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
+		payload, _ = sjson.SetBytes(payload, "request.toolConfig.functionCallingConfig.mode", "VALIDATED")
 	} else {
-		payloadStr, _ = sjson.Delete(payloadStr, "request.generationConfig.maxOutputTokens")
+		payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.maxOutputTokens")
 	}
 
-	httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), strings.NewReader(payloadStr))
+	httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), bytes.NewReader(payload))
 	if errReq != nil {
 		return nil, errReq
 	}
@@ -1348,15 +1346,11 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		authLabel = auth.Label
 		authType, authValue = auth.AccountInfo()
 	}
-	var payloadLog []byte
-	if e.cfg != nil && e.cfg.RequestLog {
-		payloadLog = []byte(payloadStr)
-	}
 	recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
 		URL:       requestURL.String(),
 		Method:    http.MethodPost,
 		Headers:   httpReq.Header.Clone(),
-		Body:      payloadLog,
+		Body:      payload,
 		Provider:  e.Identifier(),
 		AuthID:    authID,
 		AuthLabel: authLabel,
diff --git a/internal/runtime/executor/cache_helpers.go b/internal/runtime/executor/cache_helpers.go
index 1e32f43a06..b6de886d12 100644
--- a/internal/runtime/executor/cache_helpers.go
+++ b/internal/runtime/executor/cache_helpers.go
@@ -29,7 +29,6 @@ func startCodexCacheCleanup() {
 	go func() {
 		ticker := time.NewTicker(codexCacheCleanupInterval)
 		defer ticker.Stop()
-
 		for range ticker.C {
 			purgeExpiredCodexCache()
 		}
@@ -39,10 +38,8 @@ func startCodexCacheCleanup() {
 // purgeExpiredCodexCache removes entries that have expired.
 func purgeExpiredCodexCache() {
 	now := time.Now()
-
 	codexCacheMu.Lock()
 	defer codexCacheMu.Unlock()
-
 	for key, cache := range codexCacheMap {
 		if cache.Expire.Before(now) {
 			delete(codexCacheMap, key)
@@ -69,10 +66,3 @@ func setCodexCache(key string, cache codexCache) {
 	codexCacheMap[key] = cache
 	codexCacheMu.Unlock()
 }
-
-// deleteCodexCache deletes a cache entry.
-func deleteCodexCache(key string) {
-	codexCacheMu.Lock()
-	delete(codexCacheMap, key)
-	codexCacheMu.Unlock()
-}
diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
index 681e7b8d22..5b76d02ae2 100644
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -9,7 +9,6 @@ import (
 	"fmt"
 	"io"
 	"net/http"
-	"runtime"
 	"strings"
 	"time"
 
@@ -101,13 +100,12 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, stream)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
@@ -117,7 +115,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 
 	// Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation)
 	// based on client type and configuration.
-	body = applyCloaking(ctx, e.cfg, auth, body, baseModel, apiKey)
+	body = applyCloaking(ctx, e.cfg, auth, body, baseModel)
 
 	requestedModel := payloadRequestedModel(opts, req.Model)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
@@ -135,7 +133,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	extraBetas, body = extractAndRemoveBetas(body)
 	bodyForTranslation := body
 	bodyForUpstream := body
-	if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() {
+	if isClaudeOAuthToken(apiKey) {
 		bodyForUpstream = applyClaudeToolPrefix(body, claudeToolPrefix)
 	}
 
@@ -144,7 +142,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	if err != nil {
 		return resp, err
 	}
-	applyClaudeHeaders(httpReq, auth, apiKey, false, extraBetas, e.cfg)
+	applyClaudeHeaders(httpReq, auth, apiKey, false, extraBetas)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -209,7 +207,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	} else {
 		reporter.publish(ctx, parseClaudeUsage(data))
 	}
-	if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() {
+	if isClaudeOAuthToken(apiKey) {
 		data = stripClaudeToolPrefixFromResponse(data, claudeToolPrefix)
 	}
 	var param any
@@ -218,16 +216,16 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		to,
 		from,
 		req.Model,
-		opts.OriginalRequest,
+		bytes.Clone(opts.OriginalRequest),
 		bodyForTranslation,
 		data,
 		&param,
 	)
-	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
 
-func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
+func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	if opts.Alt == "responses/compact" {
 		return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
 	}
@@ -242,13 +240,12 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	defer reporter.trackFailure(ctx, &err)
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("claude")
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
@@ -258,7 +255,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 
 	// Apply cloaking (system prompt injection, fake user ID, sensitive word obfuscation)
 	// based on client type and configuration.
-	body = applyCloaking(ctx, e.cfg, auth, body, baseModel, apiKey)
+	body = applyCloaking(ctx, e.cfg, auth, body, baseModel)
 
 	requestedModel := payloadRequestedModel(opts, req.Model)
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
@@ -276,7 +273,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	extraBetas, body = extractAndRemoveBetas(body)
 	bodyForTranslation := body
 	bodyForUpstream := body
-	if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() {
+	if isClaudeOAuthToken(apiKey) {
 		bodyForUpstream = applyClaudeToolPrefix(body, claudeToolPrefix)
 	}
 
@@ -285,7 +282,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	if err != nil {
 		return nil, err
 	}
-	applyClaudeHeaders(httpReq, auth, apiKey, true, extraBetas, e.cfg)
+	applyClaudeHeaders(httpReq, auth, apiKey, true, extraBetas)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -330,6 +327,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 		return nil, err
 	}
 	out := make(chan cliproxyexecutor.StreamChunk)
+	stream = out
 	go func() {
 		defer close(out)
 		defer func() {
@@ -348,7 +346,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 				if detail, ok := parseClaudeStreamUsage(line); ok {
 					reporter.publish(ctx, detail)
 				}
-				if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() {
+				if isClaudeOAuthToken(apiKey) {
 					line = stripClaudeToolPrefixFromStreamLine(line, claudeToolPrefix)
 				}
 				// Forward the line as-is to preserve SSE format
@@ -375,7 +373,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 			if detail, ok := parseClaudeStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() {
+			if isClaudeOAuthToken(apiKey) {
 				line = stripClaudeToolPrefixFromStreamLine(line, claudeToolPrefix)
 			}
 			chunks := sdktranslator.TranslateStream(
@@ -383,7 +381,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 				to,
 				from,
 				req.Model,
-				opts.OriginalRequest,
+				bytes.Clone(opts.OriginalRequest),
 				bodyForTranslation,
 				bytes.Clone(line),
 				&param,
@@ -398,7 +396,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
 	}()
-	return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
+	return stream, nil
 }
 
 func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
@@ -413,7 +411,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("claude")
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, stream)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	if !strings.HasPrefix(baseModel, "claude-3-5-haiku") {
@@ -423,7 +421,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	// Extract betas from body and convert to header (for count_tokens too)
 	var extraBetas []string
 	extraBetas, body = extractAndRemoveBetas(body)
-	if isClaudeOAuthToken(apiKey) && !auth.ToolPrefixDisabled() {
+	if isClaudeOAuthToken(apiKey) {
 		body = applyClaudeToolPrefix(body, claudeToolPrefix)
 	}
 
@@ -432,7 +430,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
-	applyClaudeHeaders(httpReq, auth, apiKey, false, extraBetas, e.cfg)
+	applyClaudeHeaders(httpReq, auth, apiKey, false, extraBetas)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -487,7 +485,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	count := gjson.GetBytes(data, "input_tokens").Int()
 	out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
-	return cliproxyexecutor.Response{Payload: []byte(out), Headers: resp.Header.Clone()}, nil
+	return cliproxyexecutor.Response{Payload: []byte(out)}, nil
 }
 
 func (e *ClaudeExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*cliproxyauth.Auth, error) {
@@ -638,49 +636,7 @@ func decodeResponseBody(body io.ReadCloser, contentEncoding string) (io.ReadClos
 	return body, nil
 }
 
-// mapStainlessOS maps runtime.GOOS to Stainless SDK OS names.
-func mapStainlessOS() string {
-	switch runtime.GOOS {
-	case "darwin":
-		return "MacOS"
-	case "windows":
-		return "Windows"
-	case "linux":
-		return "Linux"
-	case "freebsd":
-		return "FreeBSD"
-	default:
-		return "Other::" + runtime.GOOS
-	}
-}
-
-// mapStainlessArch maps runtime.GOARCH to Stainless SDK architecture names.
-func mapStainlessArch() string {
-	switch runtime.GOARCH {
-	case "amd64":
-		return "x64"
-	case "arm64":
-		return "arm64"
-	case "386":
-		return "x86"
-	default:
-		return "other::" + runtime.GOARCH
-	}
-}
-
-func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, stream bool, extraBetas []string, cfg *config.Config) {
-	hdrDefault := func(cfgVal, fallback string) string {
-		if cfgVal != "" {
-			return cfgVal
-		}
-		return fallback
-	}
-
-	var hd config.ClaudeHeaderDefaults
-	if cfg != nil {
-		hd = cfg.ClaudeHeaderDefaults
-	}
-
+func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, stream bool, extraBetas []string) {
 	useAPIKey := auth != nil && auth.Attributes != nil && strings.TrimSpace(auth.Attributes["api_key"]) != ""
 	isAnthropicBase := r.URL != nil && strings.EqualFold(r.URL.Scheme, "https") && strings.EqualFold(r.URL.Host, "api.anthropic.com")
 	if isAnthropicBase && useAPIKey {
@@ -727,17 +683,16 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
 	misc.EnsureHeader(r.Header, ginHeaders, "Anthropic-Version", "2023-06-01")
 	misc.EnsureHeader(r.Header, ginHeaders, "Anthropic-Dangerous-Direct-Browser-Access", "true")
 	misc.EnsureHeader(r.Header, ginHeaders, "X-App", "cli")
-	// Values below match Claude Code 2.1.44 / @anthropic-ai/sdk 0.74.0 (captured 2026-02-17).
 	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Helper-Method", "stream")
 	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Retry-Count", "0")
-	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Runtime-Version", hdrDefault(hd.RuntimeVersion, "v24.3.0"))
-	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Package-Version", hdrDefault(hd.PackageVersion, "0.74.0"))
+	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Runtime-Version", "v24.3.0")
+	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Package-Version", "0.55.1")
 	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Runtime", "node")
 	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Lang", "js")
-	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Arch", mapStainlessArch())
-	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Os", mapStainlessOS())
-	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Timeout", hdrDefault(hd.Timeout, "600"))
-	misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", hdrDefault(hd.UserAgent, "claude-cli/2.1.44 (external, sdk-cli)"))
+	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Arch", "arm64")
+	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Os", "MacOS")
+	misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Timeout", "60")
+	misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", "claude-cli/1.0.83 (external, cli)")
 	r.Header.Set("Connection", "keep-alive")
 	r.Header.Set("Accept-Encoding", "gzip, deflate, br, zstd")
 	if stream {
@@ -745,8 +700,6 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
 	} else {
 		r.Header.Set("Accept", "application/json")
 	}
-	// Keep OS/Arch mapping dynamic (not configurable).
-	// They intentionally continue to derive from runtime.GOOS/runtime.GOARCH.
 	var attrs map[string]string
 	if auth != nil {
 		attrs = auth.Attributes
@@ -798,21 +751,11 @@ func applyClaudeToolPrefix(body []byte, prefix string) []byte {
 		return body
 	}
 
-	// Collect built-in tool names (those with a non-empty "type" field) so we can
-	// skip them consistently in both tools and message history.
-	builtinTools := map[string]bool{}
-	for _, name := range []string{"web_search", "code_execution", "text_editor", "computer"} {
-		builtinTools[name] = true
-	}
-
 	if tools := gjson.GetBytes(body, "tools"); tools.Exists() && tools.IsArray() {
 		tools.ForEach(func(index, tool gjson.Result) bool {
 			// Skip built-in tools (web_search, code_execution, etc.) which have
 			// a "type" field and require their name to remain unchanged.
 			if tool.Get("type").Exists() && tool.Get("type").String() != "" {
-				if n := tool.Get("name").String(); n != "" {
-					builtinTools[n] = true
-				}
 				return true
 			}
 			name := tool.Get("name").String()
@@ -827,7 +770,7 @@ func applyClaudeToolPrefix(body []byte, prefix string) []byte {
 
 	if gjson.GetBytes(body, "tool_choice.type").String() == "tool" {
 		name := gjson.GetBytes(body, "tool_choice.name").String()
-		if name != "" && !strings.HasPrefix(name, prefix) && !builtinTools[name] {
+		if name != "" && !strings.HasPrefix(name, prefix) {
 			body, _ = sjson.SetBytes(body, "tool_choice.name", prefix+name)
 		}
 	}
@@ -839,38 +782,15 @@ func applyClaudeToolPrefix(body []byte, prefix string) []byte {
 				return true
 			}
 			content.ForEach(func(contentIndex, part gjson.Result) bool {
-				partType := part.Get("type").String()
-				switch partType {
-				case "tool_use":
-					name := part.Get("name").String()
-					if name == "" || strings.HasPrefix(name, prefix) || builtinTools[name] {
-						return true
-					}
-					path := fmt.Sprintf("messages.%d.content.%d.name", msgIndex.Int(), contentIndex.Int())
-					body, _ = sjson.SetBytes(body, path, prefix+name)
-				case "tool_reference":
-					toolName := part.Get("tool_name").String()
-					if toolName == "" || strings.HasPrefix(toolName, prefix) || builtinTools[toolName] {
-						return true
-					}
-					path := fmt.Sprintf("messages.%d.content.%d.tool_name", msgIndex.Int(), contentIndex.Int())
-					body, _ = sjson.SetBytes(body, path, prefix+toolName)
-				case "tool_result":
-					// Handle nested tool_reference blocks inside tool_result.content[]
-					nestedContent := part.Get("content")
-					if nestedContent.Exists() && nestedContent.IsArray() {
-						nestedContent.ForEach(func(nestedIndex, nestedPart gjson.Result) bool {
-							if nestedPart.Get("type").String() == "tool_reference" {
-								nestedToolName := nestedPart.Get("tool_name").String()
-								if nestedToolName != "" && !strings.HasPrefix(nestedToolName, prefix) && !builtinTools[nestedToolName] {
-									nestedPath := fmt.Sprintf("messages.%d.content.%d.content.%d.tool_name", msgIndex.Int(), contentIndex.Int(), nestedIndex.Int())
-									body, _ = sjson.SetBytes(body, nestedPath, prefix+nestedToolName)
-								}
-							}
-							return true
-						})
-					}
+				if part.Get("type").String() != "tool_use" {
+					return true
+				}
+				name := part.Get("name").String()
+				if name == "" || strings.HasPrefix(name, prefix) {
+					return true
 				}
+				path := fmt.Sprintf("messages.%d.content.%d.name", msgIndex.Int(), contentIndex.Int())
+				body, _ = sjson.SetBytes(body, path, prefix+name)
 				return true
 			})
 			return true
@@ -889,38 +809,15 @@ func stripClaudeToolPrefixFromResponse(body []byte, prefix string) []byte {
 		return body
 	}
 	content.ForEach(func(index, part gjson.Result) bool {
-		partType := part.Get("type").String()
-		switch partType {
-		case "tool_use":
-			name := part.Get("name").String()
-			if !strings.HasPrefix(name, prefix) {
-				return true
-			}
-			path := fmt.Sprintf("content.%d.name", index.Int())
-			body, _ = sjson.SetBytes(body, path, strings.TrimPrefix(name, prefix))
-		case "tool_reference":
-			toolName := part.Get("tool_name").String()
-			if !strings.HasPrefix(toolName, prefix) {
-				return true
-			}
-			path := fmt.Sprintf("content.%d.tool_name", index.Int())
-			body, _ = sjson.SetBytes(body, path, strings.TrimPrefix(toolName, prefix))
-		case "tool_result":
-			// Handle nested tool_reference blocks inside tool_result.content[]
-			nestedContent := part.Get("content")
-			if nestedContent.Exists() && nestedContent.IsArray() {
-				nestedContent.ForEach(func(nestedIndex, nestedPart gjson.Result) bool {
-					if nestedPart.Get("type").String() == "tool_reference" {
-						nestedToolName := nestedPart.Get("tool_name").String()
-						if strings.HasPrefix(nestedToolName, prefix) {
-							nestedPath := fmt.Sprintf("content.%d.content.%d.tool_name", index.Int(), nestedIndex.Int())
-							body, _ = sjson.SetBytes(body, nestedPath, strings.TrimPrefix(nestedToolName, prefix))
-						}
-					}
-					return true
-				})
-			}
+		if part.Get("type").String() != "tool_use" {
+			return true
+		}
+		name := part.Get("name").String()
+		if !strings.HasPrefix(name, prefix) {
+			return true
 		}
+		path := fmt.Sprintf("content.%d.name", index.Int())
+		body, _ = sjson.SetBytes(body, path, strings.TrimPrefix(name, prefix))
 		return true
 	})
 	return body
@@ -935,34 +832,15 @@ func stripClaudeToolPrefixFromStreamLine(line []byte, prefix string) []byte {
 		return line
 	}
 	contentBlock := gjson.GetBytes(payload, "content_block")
-	if !contentBlock.Exists() {
+	if !contentBlock.Exists() || contentBlock.Get("type").String() != "tool_use" {
 		return line
 	}
-
-	blockType := contentBlock.Get("type").String()
-	var updated []byte
-	var err error
-
-	switch blockType {
-	case "tool_use":
-		name := contentBlock.Get("name").String()
-		if !strings.HasPrefix(name, prefix) {
-			return line
-		}
-		updated, err = sjson.SetBytes(payload, "content_block.name", strings.TrimPrefix(name, prefix))
-		if err != nil {
-			return line
-		}
-	case "tool_reference":
-		toolName := contentBlock.Get("tool_name").String()
-		if !strings.HasPrefix(toolName, prefix) {
-			return line
-		}
-		updated, err = sjson.SetBytes(payload, "content_block.tool_name", strings.TrimPrefix(toolName, prefix))
-		if err != nil {
-			return line
-		}
-	default:
+	name := contentBlock.Get("name").String()
+	if !strings.HasPrefix(name, prefix) {
+		return line
+	}
+	updated, err := sjson.SetBytes(payload, "content_block.name", strings.TrimPrefix(name, prefix))
+	if err != nil {
 		return line
 	}
 
@@ -982,10 +860,10 @@ func getClientUserAgent(ctx context.Context) string {
 }
 
 // getCloakConfigFromAuth extracts cloak configuration from auth attributes.
-// Returns (cloakMode, strictMode, sensitiveWords, cacheUserID).
-func getCloakConfigFromAuth(auth *cliproxyauth.Auth) (string, bool, []string, bool) {
+// Returns (cloakMode, strictMode, sensitiveWords).
+func getCloakConfigFromAuth(auth *cliproxyauth.Auth) (string, bool, []string) {
 	if auth == nil || auth.Attributes == nil {
-		return "auto", false, nil, false
+		return "auto", false, nil
 	}
 
 	cloakMode := auth.Attributes["cloak_mode"]
@@ -1003,9 +881,7 @@ func getCloakConfigFromAuth(auth *cliproxyauth.Auth) (string, bool, []string, bo
 		}
 	}
 
-	cacheUserID := strings.EqualFold(strings.TrimSpace(auth.Attributes["cloak_cache_user_id"]), "true")
-
-	return cloakMode, strictMode, sensitiveWords, cacheUserID
+	return cloakMode, strictMode, sensitiveWords
 }
 
 // resolveClaudeKeyCloakConfig finds the matching ClaudeKey config and returns its CloakConfig.
@@ -1038,24 +914,16 @@ func resolveClaudeKeyCloakConfig(cfg *config.Config, auth *cliproxyauth.Auth) *c
 }
 
 // injectFakeUserID generates and injects a fake user ID into the request metadata.
-// When useCache is false, a new user ID is generated for every call.
-func injectFakeUserID(payload []byte, apiKey string, useCache bool) []byte {
-	generateID := func() string {
-		if useCache {
-			return cachedUserID(apiKey)
-		}
-		return generateFakeUserID()
-	}
-
+func injectFakeUserID(payload []byte) []byte {
 	metadata := gjson.GetBytes(payload, "metadata")
 	if !metadata.Exists() {
-		payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateID())
+		payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateFakeUserID())
 		return payload
 	}
 
 	existingUserID := gjson.GetBytes(payload, "metadata.user_id").String()
 	if existingUserID == "" || !isValidUserID(existingUserID) {
-		payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateID())
+		payload, _ = sjson.SetBytes(payload, "metadata.user_id", generateFakeUserID())
 	}
 	return payload
 }
@@ -1092,7 +960,7 @@ func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte {
 
 // applyCloaking applies cloaking transformations to the payload based on config and client.
 // Cloaking includes: system prompt injection, fake user ID, and sensitive word obfuscation.
-func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, payload []byte, model string, apiKey string) []byte {
+func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, payload []byte, model string) []byte {
 	clientUserAgent := getClientUserAgent(ctx)
 
 	// Get cloak config from ClaudeKey configuration
@@ -1102,20 +970,16 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A
 	var cloakMode string
 	var strictMode bool
 	var sensitiveWords []string
-	var cacheUserID bool
 
 	if cloakCfg != nil {
 		cloakMode = cloakCfg.Mode
 		strictMode = cloakCfg.StrictMode
 		sensitiveWords = cloakCfg.SensitiveWords
-		if cloakCfg.CacheUserID != nil {
-			cacheUserID = *cloakCfg.CacheUserID
-		}
 	}
 
 	// Fallback to auth attributes if no config found
 	if cloakMode == "" {
-		attrMode, attrStrict, attrWords, attrCache := getCloakConfigFromAuth(auth)
+		attrMode, attrStrict, attrWords := getCloakConfigFromAuth(auth)
 		cloakMode = attrMode
 		if !strictMode {
 			strictMode = attrStrict
@@ -1123,12 +987,6 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A
 		if len(sensitiveWords) == 0 {
 			sensitiveWords = attrWords
 		}
-		if cloakCfg == nil || cloakCfg.CacheUserID == nil {
-			cacheUserID = attrCache
-		}
-	} else if cloakCfg == nil || cloakCfg.CacheUserID == nil {
-		_, _, _, attrCache := getCloakConfigFromAuth(auth)
-		cacheUserID = attrCache
 	}
 
 	// Determine if cloaking should be applied
@@ -1142,7 +1000,7 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A
 	}
 
 	// Inject fake user ID
-	payload = injectFakeUserID(payload, apiKey, cacheUserID)
+	payload = injectFakeUserID(payload)
 
 	// Apply sensitive word obfuscation
 	if len(sensitiveWords) > 0 {
diff --git a/internal/runtime/executor/claude_executor_test.go b/internal/runtime/executor/claude_executor_test.go
index dd29ed8ad7..36fb7ad4e2 100644
--- a/internal/runtime/executor/claude_executor_test.go
+++ b/internal/runtime/executor/claude_executor_test.go
@@ -2,18 +2,9 @@ package executor
 
 import (
 	"bytes"
-	"context"
-	"io"
-	"net/http"
-	"net/http/httptest"
 	"testing"
 
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
-	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
-	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
 	"github.com/tidwall/gjson"
-	"github.com/tidwall/sjson"
 )
 
 func TestApplyClaudeToolPrefix(t *testing.T) {
@@ -34,18 +25,6 @@ func TestApplyClaudeToolPrefix(t *testing.T) {
 	}
 }
 
-func TestApplyClaudeToolPrefix_WithToolReference(t *testing.T) {
-	input := []byte(`{"tools":[{"name":"alpha"}],"messages":[{"role":"user","content":[{"type":"tool_reference","tool_name":"beta"},{"type":"tool_reference","tool_name":"proxy_gamma"}]}]}`)
-	out := applyClaudeToolPrefix(input, "proxy_")
-
-	if got := gjson.GetBytes(out, "messages.0.content.0.tool_name").String(); got != "proxy_beta" {
-		t.Fatalf("messages.0.content.0.tool_name = %q, want %q", got, "proxy_beta")
-	}
-	if got := gjson.GetBytes(out, "messages.0.content.1.tool_name").String(); got != "proxy_gamma" {
-		t.Fatalf("messages.0.content.1.tool_name = %q, want %q", got, "proxy_gamma")
-	}
-}
-
 func TestApplyClaudeToolPrefix_SkipsBuiltinTools(t *testing.T) {
 	input := []byte(`{"tools":[{"type":"web_search_20250305","name":"web_search"},{"name":"my_custom_tool","input_schema":{"type":"object"}}]}`)
 	out := applyClaudeToolPrefix(input, "proxy_")
@@ -58,97 +37,6 @@ func TestApplyClaudeToolPrefix_SkipsBuiltinTools(t *testing.T) {
 	}
 }
 
-func TestApplyClaudeToolPrefix_BuiltinToolSkipped(t *testing.T) {
-	body := []byte(`{
-		"tools": [
-			{"type": "web_search_20250305", "name": "web_search", "max_uses": 5},
-			{"name": "Read"}
-		],
-		"messages": [
-			{"role": "user", "content": [
-				{"type": "tool_use", "name": "web_search", "id": "ws1", "input": {}},
-				{"type": "tool_use", "name": "Read", "id": "r1", "input": {}}
-			]}
-		]
-	}`)
-	out := applyClaudeToolPrefix(body, "proxy_")
-
-	if got := gjson.GetBytes(out, "tools.0.name").String(); got != "web_search" {
-		t.Fatalf("tools.0.name = %q, want %q", got, "web_search")
-	}
-	if got := gjson.GetBytes(out, "messages.0.content.0.name").String(); got != "web_search" {
-		t.Fatalf("messages.0.content.0.name = %q, want %q", got, "web_search")
-	}
-	if got := gjson.GetBytes(out, "tools.1.name").String(); got != "proxy_Read" {
-		t.Fatalf("tools.1.name = %q, want %q", got, "proxy_Read")
-	}
-	if got := gjson.GetBytes(out, "messages.0.content.1.name").String(); got != "proxy_Read" {
-		t.Fatalf("messages.0.content.1.name = %q, want %q", got, "proxy_Read")
-	}
-}
-
-func TestApplyClaudeToolPrefix_KnownBuiltinInHistoryOnly(t *testing.T) {
-	body := []byte(`{
-		"tools": [
-			{"name": "Read"}
-		],
-		"messages": [
-			{"role": "user", "content": [
-				{"type": "tool_use", "name": "web_search", "id": "ws1", "input": {}}
-			]}
-		]
-	}`)
-	out := applyClaudeToolPrefix(body, "proxy_")
-
-	if got := gjson.GetBytes(out, "messages.0.content.0.name").String(); got != "web_search" {
-		t.Fatalf("messages.0.content.0.name = %q, want %q", got, "web_search")
-	}
-	if got := gjson.GetBytes(out, "tools.0.name").String(); got != "proxy_Read" {
-		t.Fatalf("tools.0.name = %q, want %q", got, "proxy_Read")
-	}
-}
-
-func TestApplyClaudeToolPrefix_CustomToolsPrefixed(t *testing.T) {
-	body := []byte(`{
-		"tools": [{"name": "Read"}, {"name": "Write"}],
-		"messages": [
-			{"role": "user", "content": [
-				{"type": "tool_use", "name": "Read", "id": "r1", "input": {}},
-				{"type": "tool_use", "name": "Write", "id": "w1", "input": {}}
-			]}
-		]
-	}`)
-	out := applyClaudeToolPrefix(body, "proxy_")
-
-	if got := gjson.GetBytes(out, "tools.0.name").String(); got != "proxy_Read" {
-		t.Fatalf("tools.0.name = %q, want %q", got, "proxy_Read")
-	}
-	if got := gjson.GetBytes(out, "tools.1.name").String(); got != "proxy_Write" {
-		t.Fatalf("tools.1.name = %q, want %q", got, "proxy_Write")
-	}
-	if got := gjson.GetBytes(out, "messages.0.content.0.name").String(); got != "proxy_Read" {
-		t.Fatalf("messages.0.content.0.name = %q, want %q", got, "proxy_Read")
-	}
-	if got := gjson.GetBytes(out, "messages.0.content.1.name").String(); got != "proxy_Write" {
-		t.Fatalf("messages.0.content.1.name = %q, want %q", got, "proxy_Write")
-	}
-}
-
-func TestApplyClaudeToolPrefix_ToolChoiceBuiltin(t *testing.T) {
-	body := []byte(`{
-		"tools": [
-			{"type": "web_search_20250305", "name": "web_search"},
-			{"name": "Read"}
-		],
-		"tool_choice": {"type": "tool", "name": "web_search"}
-	}`)
-	out := applyClaudeToolPrefix(body, "proxy_")
-
-	if got := gjson.GetBytes(out, "tool_choice.name").String(); got != "web_search" {
-		t.Fatalf("tool_choice.name = %q, want %q", got, "web_search")
-	}
-}
-
 func TestStripClaudeToolPrefixFromResponse(t *testing.T) {
 	input := []byte(`{"content":[{"type":"tool_use","name":"proxy_alpha","id":"t1","input":{}},{"type":"tool_use","name":"bravo","id":"t2","input":{}}]}`)
 	out := stripClaudeToolPrefixFromResponse(input, "proxy_")
@@ -161,18 +49,6 @@ func TestStripClaudeToolPrefixFromResponse(t *testing.T) {
 	}
 }
 
-func TestStripClaudeToolPrefixFromResponse_WithToolReference(t *testing.T) {
-	input := []byte(`{"content":[{"type":"tool_reference","tool_name":"proxy_alpha"},{"type":"tool_reference","tool_name":"bravo"}]}`)
-	out := stripClaudeToolPrefixFromResponse(input, "proxy_")
-
-	if got := gjson.GetBytes(out, "content.0.tool_name").String(); got != "alpha" {
-		t.Fatalf("content.0.tool_name = %q, want %q", got, "alpha")
-	}
-	if got := gjson.GetBytes(out, "content.1.tool_name").String(); got != "bravo" {
-		t.Fatalf("content.1.tool_name = %q, want %q", got, "bravo")
-	}
-}
-
 func TestStripClaudeToolPrefixFromStreamLine(t *testing.T) {
 	line := []byte(`data: {"type":"content_block_start","content_block":{"type":"tool_use","name":"proxy_alpha","id":"t1"},"index":0}`)
 	out := stripClaudeToolPrefixFromStreamLine(line, "proxy_")
@@ -185,166 +61,3 @@ func TestStripClaudeToolPrefixFromStreamLine(t *testing.T) {
 		t.Fatalf("content_block.name = %q, want %q", got, "alpha")
 	}
 }
-
-func TestStripClaudeToolPrefixFromStreamLine_WithToolReference(t *testing.T) {
-	line := []byte(`data: {"type":"content_block_start","content_block":{"type":"tool_reference","tool_name":"proxy_beta"},"index":0}`)
-	out := stripClaudeToolPrefixFromStreamLine(line, "proxy_")
-
-	payload := bytes.TrimSpace(out)
-	if bytes.HasPrefix(payload, []byte("data:")) {
-		payload = bytes.TrimSpace(payload[len("data:"):])
-	}
-	if got := gjson.GetBytes(payload, "content_block.tool_name").String(); got != "beta" {
-		t.Fatalf("content_block.tool_name = %q, want %q", got, "beta")
-	}
-}
-
-func TestApplyClaudeToolPrefix_NestedToolReference(t *testing.T) {
-	input := []byte(`{"messages":[{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_123","content":[{"type":"tool_reference","tool_name":"mcp__nia__manage_resource"}]}]}]}`)
-	out := applyClaudeToolPrefix(input, "proxy_")
-	got := gjson.GetBytes(out, "messages.0.content.0.content.0.tool_name").String()
-	if got != "proxy_mcp__nia__manage_resource" {
-		t.Fatalf("nested tool_reference tool_name = %q, want %q", got, "proxy_mcp__nia__manage_resource")
-	}
-}
-
-func TestClaudeExecutor_ReusesUserIDAcrossModelsWhenCacheEnabled(t *testing.T) {
-	resetUserIDCache()
-
-	var userIDs []string
-	var requestModels []string
-	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		body, _ := io.ReadAll(r.Body)
-		userID := gjson.GetBytes(body, "metadata.user_id").String()
-		model := gjson.GetBytes(body, "model").String()
-		userIDs = append(userIDs, userID)
-		requestModels = append(requestModels, model)
-		t.Logf("HTTP Server received request: model=%s, user_id=%s, url=%s", model, userID, r.URL.String())
-		w.Header().Set("Content-Type", "application/json")
-		_, _ = w.Write([]byte(`{"id":"msg_1","type":"message","model":"claude-3-5-sonnet","role":"assistant","content":[{"type":"text","text":"ok"}],"usage":{"input_tokens":1,"output_tokens":1}}`))
-	}))
-	defer server.Close()
-
-	t.Logf("End-to-end test: Fake HTTP server started at %s", server.URL)
-
-	cacheEnabled := true
-	executor := NewClaudeExecutor(&config.Config{
-		ClaudeKey: []config.ClaudeKey{
-			{
-				APIKey:  "key-123",
-				BaseURL: server.URL,
-				Cloak: &config.CloakConfig{
-					CacheUserID: &cacheEnabled,
-				},
-			},
-		},
-	})
-	auth := &cliproxyauth.Auth{Attributes: map[string]string{
-		"api_key":  "key-123",
-		"base_url": server.URL,
-	}}
-
-	payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`)
-	models := []string{"claude-3-5-sonnet", "claude-3-5-haiku"}
-	for _, model := range models {
-		t.Logf("Sending request for model: %s", model)
-		modelPayload, _ := sjson.SetBytes(payload, "model", model)
-		if _, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
-			Model:   model,
-			Payload: modelPayload,
-		}, cliproxyexecutor.Options{
-			SourceFormat: sdktranslator.FromString("claude"),
-		}); err != nil {
-			t.Fatalf("Execute(%s) error: %v", model, err)
-		}
-	}
-
-	if len(userIDs) != 2 {
-		t.Fatalf("expected 2 requests, got %d", len(userIDs))
-	}
-	if userIDs[0] == "" || userIDs[1] == "" {
-		t.Fatal("expected user_id to be populated")
-	}
-	t.Logf("user_id[0] (model=%s): %s", requestModels[0], userIDs[0])
-	t.Logf("user_id[1] (model=%s): %s", requestModels[1], userIDs[1])
-	if userIDs[0] != userIDs[1] {
-		t.Fatalf("expected user_id to be reused across models, got %q and %q", userIDs[0], userIDs[1])
-	}
-	if !isValidUserID(userIDs[0]) {
-		t.Fatalf("user_id %q is not valid", userIDs[0])
-	}
-	t.Logf("✓ End-to-end test passed: Same user_id (%s) was used for both models", userIDs[0])
-}
-
-func TestClaudeExecutor_GeneratesNewUserIDByDefault(t *testing.T) {
-	resetUserIDCache()
-
-	var userIDs []string
-	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		body, _ := io.ReadAll(r.Body)
-		userIDs = append(userIDs, gjson.GetBytes(body, "metadata.user_id").String())
-		w.Header().Set("Content-Type", "application/json")
-		_, _ = w.Write([]byte(`{"id":"msg_1","type":"message","model":"claude-3-5-sonnet","role":"assistant","content":[{"type":"text","text":"ok"}],"usage":{"input_tokens":1,"output_tokens":1}}`))
-	}))
-	defer server.Close()
-
-	executor := NewClaudeExecutor(&config.Config{})
-	auth := &cliproxyauth.Auth{Attributes: map[string]string{
-		"api_key":  "key-123",
-		"base_url": server.URL,
-	}}
-
-	payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`)
-
-	for i := 0; i < 2; i++ {
-		if _, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
-			Model:   "claude-3-5-sonnet",
-			Payload: payload,
-		}, cliproxyexecutor.Options{
-			SourceFormat: sdktranslator.FromString("claude"),
-		}); err != nil {
-			t.Fatalf("Execute call %d error: %v", i, err)
-		}
-	}
-
-	if len(userIDs) != 2 {
-		t.Fatalf("expected 2 requests, got %d", len(userIDs))
-	}
-	if userIDs[0] == "" || userIDs[1] == "" {
-		t.Fatal("expected user_id to be populated")
-	}
-	if userIDs[0] == userIDs[1] {
-		t.Fatalf("expected user_id to change when caching is not enabled, got identical values %q", userIDs[0])
-	}
-	if !isValidUserID(userIDs[0]) || !isValidUserID(userIDs[1]) {
-		t.Fatalf("user_ids should be valid, got %q and %q", userIDs[0], userIDs[1])
-	}
-}
-
-func TestStripClaudeToolPrefixFromResponse_NestedToolReference(t *testing.T) {
-	input := []byte(`{"content":[{"type":"tool_result","tool_use_id":"toolu_123","content":[{"type":"tool_reference","tool_name":"proxy_mcp__nia__manage_resource"}]}]}`)
-	out := stripClaudeToolPrefixFromResponse(input, "proxy_")
-	got := gjson.GetBytes(out, "content.0.content.0.tool_name").String()
-	if got != "mcp__nia__manage_resource" {
-		t.Fatalf("nested tool_reference tool_name = %q, want %q", got, "mcp__nia__manage_resource")
-	}
-}
-
-func TestApplyClaudeToolPrefix_NestedToolReferenceWithStringContent(t *testing.T) {
-	// tool_result.content can be a string - should not be processed
-	input := []byte(`{"messages":[{"role":"user","content":[{"type":"tool_result","tool_use_id":"toolu_123","content":"plain string result"}]}]}`)
-	out := applyClaudeToolPrefix(input, "proxy_")
-	got := gjson.GetBytes(out, "messages.0.content.0.content").String()
-	if got != "plain string result" {
-		t.Fatalf("string content should remain unchanged = %q", got)
-	}
-}
-
-func TestApplyClaudeToolPrefix_SkipsBuiltinToolReference(t *testing.T) {
-	input := []byte(`{"tools":[{"type":"web_search_20250305","name":"web_search"}],"messages":[{"role":"user","content":[{"type":"tool_result","tool_use_id":"t1","content":[{"type":"tool_reference","tool_name":"web_search"}]}]}]}`)
-	out := applyClaudeToolPrefix(input, "proxy_")
-	got := gjson.GetBytes(out, "messages.0.content.0.content.0.tool_name").String()
-	if got != "web_search" {
-		t.Fatalf("built-in tool_reference should not be prefixed, got %q", got)
-	}
-}
diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 01de8f9707..09ce644e35 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -27,11 +27,6 @@ import (
 	"github.com/google/uuid"
 )
 
-const (
-	codexClientVersion = "0.101.0"
-	codexUserAgent     = "codex_cli_rs/0.101.0 (Mac OS 26.0.1; arm64) Apple_Terminal/464"
-)
-
 var dataTag = []byte("data:")
 
 // CodexExecutor is a stateless executor for Codex (OpenAI Responses API entrypoint).
@@ -93,13 +88,12 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -182,8 +176,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 		}
 
 		var param any
-		out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, originalPayload, body, line, &param)
-		resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+		out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(originalPayload), body, line, &param)
+		resp = cliproxyexecutor.Response{Payload: []byte(out)}
 		return resp, nil
 	}
 	err = statusErr{code: 408, msg: "stream error: stream disconnected before completion: stream closed before response.completed"}
@@ -203,13 +197,12 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai-response")
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -272,12 +265,12 @@ func (e *CodexExecutor) executeCompact(ctx context.Context, auth *cliproxyauth.A
 	reporter.publish(ctx, parseOpenAIUsage(data))
 	reporter.ensurePublished(ctx)
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, originalPayload, body, data, &param)
-	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(originalPayload), body, data, &param)
+	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
 
-func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
+func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	if opts.Alt == "responses/compact" {
 		return nil, statusErr{code: http.StatusBadRequest, msg: "streaming not supported for /responses/compact"}
 	}
@@ -293,13 +286,12 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -362,6 +354,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		return nil, err
 	}
 	out := make(chan cliproxyexecutor.StreamChunk)
+	stream = out
 	go func() {
 		defer close(out)
 		defer func() {
@@ -385,7 +378,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 				}
 			}
 
-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, originalPayload, body, bytes.Clone(line), &param)
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(originalPayload), body, bytes.Clone(line), &param)
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 			}
@@ -396,7 +389,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
 	}()
-	return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
+	return stream, nil
 }
 
 func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
@@ -404,7 +397,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	body, err := thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -641,9 +634,10 @@ func applyCodexHeaders(r *http.Request, auth *cliproxyauth.Auth, token string, s
 		ginHeaders = ginCtx.Request.Header
 	}
 
-	misc.EnsureHeader(r.Header, ginHeaders, "Version", codexClientVersion)
+	misc.EnsureHeader(r.Header, ginHeaders, "Version", "0.21.0")
+	misc.EnsureHeader(r.Header, ginHeaders, "Openai-Beta", "responses=experimental")
 	misc.EnsureHeader(r.Header, ginHeaders, "Session_id", uuid.NewString())
-	misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", codexUserAgent)
+	misc.EnsureHeader(r.Header, ginHeaders, "User-Agent", "codex_cli_rs/0.50.0 (Mac OS 26.0.1; arm64) Apple_Terminal/464")
 
 	if stream {
 		r.Header.Set("Accept", "text/event-stream")
diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go
index cb3ffb5969..16ff015872 100644
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -119,13 +119,12 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
 
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -224,8 +223,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 		if httpResp.StatusCode >= 200 && httpResp.StatusCode < 300 {
 			reporter.publish(ctx, parseGeminiCLIUsage(data))
 			var param any
-			out := sdktranslator.TranslateNonStream(respCtx, to, from, attemptModel, opts.OriginalRequest, payload, data, &param)
-			resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+			out := sdktranslator.TranslateNonStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), payload, data, &param)
+			resp = cliproxyexecutor.Response{Payload: []byte(out)}
 			return resp, nil
 		}
 
@@ -256,7 +255,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 }
 
 // ExecuteStream performs a streaming request to the Gemini CLI API.
-func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
+func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	if opts.Alt == "responses/compact" {
 		return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
 	}
@@ -273,13 +272,12 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
 
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
+	basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
 	basePayload, err = thinking.ApplyThinking(basePayload, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -382,6 +380,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 		}
 
 		out := make(chan cliproxyexecutor.StreamChunk)
+		stream = out
 		go func(resp *http.Response, reqBody []byte, attemptModel string) {
 			defer close(out)
 			defer func() {
@@ -400,14 +399,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 						reporter.publish(ctx, detail)
 					}
 					if bytes.HasPrefix(line, dataTag) {
-						segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, bytes.Clone(line), &param)
+						segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone(line), &param)
 						for i := range segments {
 							out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
 						}
 					}
 				}
 
-				segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, []byte("[DONE]"), &param)
+				segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), &param)
 				for i := range segments {
 					out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
 				}
@@ -429,18 +428,18 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 			appendAPIResponseChunk(ctx, e.cfg, data)
 			reporter.publish(ctx, parseGeminiCLIUsage(data))
 			var param any
-			segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, data, &param)
+			segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, data, &param)
 			for i := range segments {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
 			}
 
-			segments = sdktranslator.TranslateStream(respCtx, to, from, attemptModel, opts.OriginalRequest, reqBody, []byte("[DONE]"), &param)
+			segments = sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), &param)
 			for i := range segments {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])}
 			}
 		}(httpResp, append([]byte(nil), payload...), attemptModel)
 
-		return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
+		return stream, nil
 	}
 
 	if len(lastBody) > 0 {
@@ -486,7 +485,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	// The loop variable attemptModel is only used as the concrete model id sent to the upstream
 	// Gemini CLI endpoint when iterating fallback variants.
 	for range models {
-		payload := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+		payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 		payload, err = thinking.ApplyThinking(payload, req.Model, from.String(), to.String(), e.Identifier())
 		if err != nil {
@@ -545,7 +544,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 		if resp.StatusCode >= 200 && resp.StatusCode < 300 {
 			count := gjson.GetBytes(data, "totalTokens").Int()
 			translated := sdktranslator.TranslateTokenCount(respCtx, to, from, count, data)
-			return cliproxyexecutor.Response{Payload: []byte(translated), Headers: resp.Header.Clone()}, nil
+			return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
 		}
 		lastStatus = resp.StatusCode
 		lastBody = append([]byte(nil), data...)
@@ -898,7 +897,8 @@ func parseRetryDelay(errorBody []byte) (*time.Duration, error) {
 		if matches := re.FindStringSubmatch(message); len(matches) > 1 {
 			seconds, err := strconv.Atoi(matches[1])
 			if err == nil {
-				return new(time.Duration(seconds) * time.Second), nil
+				duration := time.Duration(seconds) * time.Second
+				return &duration, nil
 			}
 		}
 	}
diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go
index 7c25b8935f..8f729f5bb9 100644
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -116,13 +116,12 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	// Official Gemini API via API key or OAuth bearer
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -204,13 +203,13 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseGeminiUsage(data))
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
-	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
 
 // ExecuteStream performs a streaming request to the Gemini API.
-func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
+func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	if opts.Alt == "responses/compact" {
 		return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
 	}
@@ -223,13 +222,12 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -298,6 +296,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 		return nil, err
 	}
 	out := make(chan cliproxyexecutor.StreamChunk)
+	stream = out
 	go func() {
 		defer close(out)
 		defer func() {
@@ -319,12 +318,12 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 			if detail, ok := parseGeminiStreamUsage(payload); ok {
 				reporter.publish(ctx, detail)
 			}
-			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(payload), &param)
+			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(payload), &param)
 			for i := range lines {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 			}
 		}
-		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
+		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone([]byte("[DONE]")), &param)
 		for i := range lines {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 		}
@@ -334,7 +333,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
 	}()
-	return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
+	return stream, nil
 }
 
 // CountTokens counts tokens for the given request using the Gemini API.
@@ -345,7 +344,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
-	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -415,7 +414,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 
 	count := gjson.GetBytes(data, "totalTokens").Int()
 	translated := sdktranslator.TranslateTokenCount(respCtx, to, from, count, data)
-	return cliproxyexecutor.Response{Payload: []byte(translated), Headers: resp.Header.Clone()}, nil
+	return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
 }
 
 // Refresh refreshes the authentication credentials (no-op for Gemini API key).
diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go
index 7ad1c6186b..83456a86b4 100644
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -253,7 +253,7 @@ func (e *GeminiVertexExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 }
 
 // ExecuteStream performs a streaming request to the Vertex AI API.
-func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (*cliproxyexecutor.StreamResult, error) {
+func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	if opts.Alt == "responses/compact" {
 		return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
 	}
@@ -318,13 +318,12 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		from := opts.SourceFormat
 		to := sdktranslator.FromString("gemini")
 
-		originalPayloadSource := req.Payload
+		originalPayload := bytes.Clone(req.Payload)
 		if len(opts.OriginalRequest) > 0 {
-			originalPayloadSource = opts.OriginalRequest
+			originalPayload = bytes.Clone(opts.OriginalRequest)
 		}
-		originalPayload := originalPayloadSource
 		originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-		body = sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+		body = sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 		body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 		if err != nil {
@@ -418,8 +417,8 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
-	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
 
@@ -433,13 +432,12 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -523,13 +521,13 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	reporter.publish(ctx, parseGeminiUsage(data))
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
-	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
 
 // executeStreamWithServiceAccount handles streaming authentication using service account credentials.
-func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (_ *cliproxyexecutor.StreamResult, err error) {
+func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
@@ -538,13 +536,12 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -618,6 +615,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	}
 
 	out := make(chan cliproxyexecutor.StreamChunk)
+	stream = out
 	go func() {
 		defer close(out)
 		defer func() {
@@ -634,12 +632,12 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 			if detail, ok := parseGeminiStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
+			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
 			for i := range lines {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 			}
 		}
-		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
+		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, []byte("[DONE]"), &param)
 		for i := range lines {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 		}
@@ -649,11 +647,11 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
 	}()
-	return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
+	return stream, nil
 }
 
 // executeStreamWithAPIKey handles streaming authentication using API key credentials.
-func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (_ *cliproxyexecutor.StreamResult, err error) {
+func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
@@ -662,13 +660,12 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -742,6 +739,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	}
 
 	out := make(chan cliproxyexecutor.StreamChunk)
+	stream = out
 	go func() {
 		defer close(out)
 		defer func() {
@@ -758,12 +756,12 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 			if detail, ok := parseGeminiStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
+			lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
 			for i := range lines {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 			}
 		}
-		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
+		lines := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, []byte("[DONE]"), &param)
 		for i := range lines {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])}
 		}
@@ -773,7 +771,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
 	}()
-	return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
+	return stream, nil
 }
 
 // countTokensWithServiceAccount counts tokens using service account credentials.
@@ -783,7 +781,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 
-	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -857,7 +855,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	count := gjson.GetBytes(data, "totalTokens").Int()
 	out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
-	return cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}, nil
+	return cliproxyexecutor.Response{Payload: []byte(out)}, nil
 }
 
 // countTokensWithAPIKey handles token counting using API key credentials.
@@ -867,7 +865,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 
-	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	translatedReq, err := thinking.ApplyThinking(translatedReq, req.Model, from.String(), to.String(), e.Identifier())
 	if err != nil {
@@ -941,7 +939,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 	appendAPIResponseChunk(ctx, e.cfg, data)
 	count := gjson.GetBytes(data, "totalTokens").Int()
 	out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data)
-	return cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}, nil
+	return cliproxyexecutor.Response{Payload: []byte(out)}, nil
 }
 
 // vertexCreds extracts project, location and raw service account JSON from auth metadata.
@@ -1005,8 +1003,6 @@ func vertexBaseURL(location string) string {
 	loc := strings.TrimSpace(location)
 	if loc == "" {
 		loc = "us-central1"
-	} else if loc == "global" {
-		return "https://aiplatform.googleapis.com"
 	}
 	return fmt.Sprintf("https://%s-aiplatform.googleapis.com", loc)
 }
diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go
index 65a0b8f81e..08a0a5af43 100644
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -4,16 +4,12 @@ import (
 	"bufio"
 	"bytes"
 	"context"
-	"crypto/hmac"
-	"crypto/sha256"
-	"encoding/hex"
 	"fmt"
 	"io"
 	"net/http"
 	"strings"
 	"time"
 
-	"github.com/google/uuid"
 	iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
@@ -91,13 +87,12 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
@@ -168,13 +163,13 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	var param any
 	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
 	// the original model name in the response for client compatibility.
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
-	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
 
 // ExecuteStream performs a streaming chat completion request.
-func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
+func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	if opts.Alt == "responses/compact" {
 		return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
 	}
@@ -194,13 +189,12 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), "iflow", e.Identifier())
@@ -262,6 +256,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	}
 
 	out := make(chan cliproxyexecutor.StreamChunk)
+	stream = out
 	go func() {
 		defer close(out)
 		defer func() {
@@ -279,7 +274,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			if detail, ok := parseOpenAIStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 			}
@@ -293,7 +288,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		reporter.ensurePublished(ctx)
 	}()
 
-	return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
+	return stream, nil
 }
 
 func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
@@ -301,7 +296,7 @@ func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	enc, err := tokenizerForModel(baseModel)
 	if err != nil {
@@ -456,20 +451,6 @@ func applyIFlowHeaders(r *http.Request, apiKey string, stream bool) {
 	r.Header.Set("Content-Type", "application/json")
 	r.Header.Set("Authorization", "Bearer "+apiKey)
 	r.Header.Set("User-Agent", iflowUserAgent)
-
-	// Generate session-id
-	sessionID := "session-" + generateUUID()
-	r.Header.Set("session-id", sessionID)
-
-	// Generate timestamp and signature
-	timestamp := time.Now().UnixMilli()
-	r.Header.Set("x-iflow-timestamp", fmt.Sprintf("%d", timestamp))
-
-	signature := createIFlowSignature(iflowUserAgent, sessionID, timestamp, apiKey)
-	if signature != "" {
-		r.Header.Set("x-iflow-signature", signature)
-	}
-
 	if stream {
 		r.Header.Set("Accept", "text/event-stream")
 	} else {
@@ -477,23 +458,6 @@ func applyIFlowHeaders(r *http.Request, apiKey string, stream bool) {
 	}
 }
 
-// createIFlowSignature generates HMAC-SHA256 signature for iFlow API requests.
-// The signature payload format is: userAgent:sessionId:timestamp
-func createIFlowSignature(userAgent, sessionID string, timestamp int64, apiKey string) string {
-	if apiKey == "" {
-		return ""
-	}
-	payload := fmt.Sprintf("%s:%s:%d", userAgent, sessionID, timestamp)
-	h := hmac.New(sha256.New, []byte(apiKey))
-	h.Write([]byte(payload))
-	return hex.EncodeToString(h.Sum(nil))
-}
-
-// generateUUID generates a random UUID v4 string.
-func generateUUID() string {
-	return uuid.New().String()
-}
-
 func iflowCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) {
 	if a == nil {
 		return "", ""
diff --git a/internal/runtime/executor/logging_helpers.go b/internal/runtime/executor/logging_helpers.go
index ae2aee3ffd..e987624335 100644
--- a/internal/runtime/executor/logging_helpers.go
+++ b/internal/runtime/executor/logging_helpers.go
@@ -80,7 +80,7 @@ func recordAPIRequest(ctx context.Context, cfg *config.Config, info upstreamRequ
 	writeHeaders(builder, info.Headers)
 	builder.WriteString("\nBody:\n")
 	if len(info.Body) > 0 {
-		builder.WriteString(string(info.Body))
+		builder.WriteString(string(bytes.Clone(info.Body)))
 	} else {
 		builder.WriteString("<empty>")
 	}
@@ -152,7 +152,7 @@ func appendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byt
 	if cfg == nil || !cfg.RequestLog {
 		return
 	}
-	data := bytes.TrimSpace(chunk)
+	data := bytes.TrimSpace(bytes.Clone(chunk))
 	if len(data) == 0 {
 		return
 	}
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index d28b36251a..ee61556e5a 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -88,13 +88,12 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		to = sdktranslator.FromString("openai-response")
 		endpoint = "/responses/compact"
 	}
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, opts.Stream)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream)
 	requestedModel := payloadRequestedModel(opts, req.Model)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)
 	if opts.Alt == "responses/compact" {
@@ -171,12 +170,12 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	reporter.ensurePublished(ctx)
 	// Translate response back to source format when needed
 	var param any
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, body, &param)
-	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, body, &param)
+	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
 
-func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
+func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	baseModel := thinking.ParseSuffix(req.Model).ModelName
 
 	reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth)
@@ -190,13 +189,12 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	requestedModel := payloadRequestedModel(opts, req.Model)
 	translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated, requestedModel)
 
@@ -258,6 +256,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		return nil, err
 	}
 	out := make(chan cliproxyexecutor.StreamChunk)
+	stream = out
 	go func() {
 		defer close(out)
 		defer func() {
@@ -284,7 +283,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 
 			// OpenAI-compatible streams are SSE: lines typically prefixed with "data: ".
 			// Pass through translator; it yields one or more chunks for the target schema.
-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, translated, bytes.Clone(line), &param)
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), translated, bytes.Clone(line), &param)
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 			}
@@ -297,7 +296,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		// Ensure we record the request if no usage chunk was ever seen
 		reporter.ensurePublished(ctx)
 	}()
-	return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
+	return stream, nil
 }
 
 func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
@@ -305,7 +304,7 @@ func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyau
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	modelForCounting := baseModel
 
diff --git a/internal/runtime/executor/proxy_helpers.go b/internal/runtime/executor/proxy_helpers.go
index 8998eb236b..ab0f626acc 100644
--- a/internal/runtime/executor/proxy_helpers.go
+++ b/internal/runtime/executor/proxy_helpers.go
@@ -6,7 +6,6 @@ import (
 	"net/http"
 	"net/url"
 	"strings"
-	"sync"
 	"time"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -15,19 +14,11 @@ import (
 	"golang.org/x/net/proxy"
 )
 
-// httpClientCache caches HTTP clients by proxy URL to enable connection reuse
-var (
-	httpClientCache      = make(map[string]*http.Client)
-	httpClientCacheMutex sync.RWMutex
-)
-
 // newProxyAwareHTTPClient creates an HTTP client with proper proxy configuration priority:
 // 1. Use auth.ProxyURL if configured (highest priority)
 // 2. Use cfg.ProxyURL if auth proxy is not configured
 // 3. Use RoundTripper from context if neither are configured
 //
-// This function caches HTTP clients by proxy URL to enable TCP/TLS connection reuse.
-//
 // Parameters:
 //   - ctx: The context containing optional RoundTripper
 //   - cfg: The application configuration
@@ -37,6 +28,11 @@ var (
 // Returns:
 //   - *http.Client: An HTTP client with configured proxy or transport
 func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, timeout time.Duration) *http.Client {
+	httpClient := &http.Client{}
+	if timeout > 0 {
+		httpClient.Timeout = timeout
+	}
+
 	// Priority 1: Use auth.ProxyURL if configured
 	var proxyURL string
 	if auth != nil {
@@ -48,39 +44,11 @@ func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip
 		proxyURL = strings.TrimSpace(cfg.ProxyURL)
 	}
 
-	// Build cache key from proxy URL (empty string for no proxy)
-	cacheKey := proxyURL
-
-	// Check cache first
-	httpClientCacheMutex.RLock()
-	if cachedClient, ok := httpClientCache[cacheKey]; ok {
-		httpClientCacheMutex.RUnlock()
-		// Return a wrapper with the requested timeout but shared transport
-		if timeout > 0 {
-			return &http.Client{
-				Transport: cachedClient.Transport,
-				Timeout:   timeout,
-			}
-		}
-		return cachedClient
-	}
-	httpClientCacheMutex.RUnlock()
-
-	// Create new client
-	httpClient := &http.Client{}
-	if timeout > 0 {
-		httpClient.Timeout = timeout
-	}
-
 	// If we have a proxy URL configured, set up the transport
 	if proxyURL != "" {
 		transport := buildProxyTransport(proxyURL)
 		if transport != nil {
 			httpClient.Transport = transport
-			// Cache the client
-			httpClientCacheMutex.Lock()
-			httpClientCache[cacheKey] = httpClient
-			httpClientCacheMutex.Unlock()
 			return httpClient
 		}
 		// If proxy setup failed, log and fall through to context RoundTripper
@@ -92,13 +60,6 @@ func newProxyAwareHTTPClient(ctx context.Context, cfg *config.Config, auth *clip
 		httpClient.Transport = rt
 	}
 
-	// Cache the client for no-proxy case
-	if proxyURL == "" {
-		httpClientCacheMutex.Lock()
-		httpClientCache[cacheKey] = httpClient
-		httpClientCacheMutex.Unlock()
-	}
-
 	return httpClient
 }
 
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index bcc4a057ae..8df359e94e 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -22,7 +22,9 @@ import (
 )
 
 const (
-	qwenUserAgent = "QwenCode/0.10.3 (darwin; arm64)"
+	qwenUserAgent           = "google-api-nodejs-client/9.15.1"
+	qwenXGoogAPIClient      = "gl-node/22.17.0"
+	qwenClientMetadataValue = "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI"
 )
 
 // QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
@@ -79,13 +81,12 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
@@ -149,12 +150,12 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	var param any
 	// Note: TranslateNonStream uses req.Model (original with suffix) to preserve
 	// the original model name in the response for client compatibility.
-	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, opts.OriginalRequest, body, data, &param)
-	resp = cliproxyexecutor.Response{Payload: []byte(out), Headers: httpResp.Header.Clone()}
+	out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
+	resp = cliproxyexecutor.Response{Payload: []byte(out)}
 	return resp, nil
 }
 
-func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
+func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) {
 	if opts.Alt == "responses/compact" {
 		return nil, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
 	}
@@ -170,13 +171,12 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	originalPayloadSource := req.Payload
+	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
-		originalPayloadSource = opts.OriginalRequest
+		originalPayload = bytes.Clone(opts.OriginalRequest)
 	}
-	originalPayload := originalPayloadSource
 	originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true)
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, true)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true)
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 
 	body, err = thinking.ApplyThinking(body, req.Model, from.String(), to.String(), e.Identifier())
@@ -236,6 +236,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		return nil, err
 	}
 	out := make(chan cliproxyexecutor.StreamChunk)
+	stream = out
 	go func() {
 		defer close(out)
 		defer func() {
@@ -252,12 +253,12 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 			if detail, ok := parseOpenAIStreamUsage(line); ok {
 				reporter.publish(ctx, detail)
 			}
-			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, bytes.Clone(line), &param)
+			chunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
 			for i := range chunks {
 				out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
 			}
 		}
-		doneChunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, opts.OriginalRequest, body, []byte("[DONE]"), &param)
+		doneChunks := sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone([]byte("[DONE]")), &param)
 		for i := range doneChunks {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(doneChunks[i])}
 		}
@@ -267,7 +268,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 			out <- cliproxyexecutor.StreamChunk{Err: errScan}
 		}
 	}()
-	return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
+	return stream, nil
 }
 
 func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
@@ -275,7 +276,7 @@ func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth,
 
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
-	body := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
+	body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false)
 
 	modelName := gjson.GetBytes(body, "model").String()
 	if strings.TrimSpace(modelName) == "" {
@@ -341,18 +342,8 @@ func applyQwenHeaders(r *http.Request, token string, stream bool) {
 	r.Header.Set("Content-Type", "application/json")
 	r.Header.Set("Authorization", "Bearer "+token)
 	r.Header.Set("User-Agent", qwenUserAgent)
-	r.Header.Set("X-Dashscope-Useragent", qwenUserAgent)
-	r.Header.Set("X-Stainless-Runtime-Version", "v22.17.0")
-	r.Header.Set("Sec-Fetch-Mode", "cors")
-	r.Header.Set("X-Stainless-Lang", "js")
-	r.Header.Set("X-Stainless-Arch", "arm64")
-	r.Header.Set("X-Stainless-Package-Version", "5.11.0")
-	r.Header.Set("X-Dashscope-Cachecontrol", "enable")
-	r.Header.Set("X-Stainless-Retry-Count", "0")
-	r.Header.Set("X-Stainless-Os", "MacOS")
-	r.Header.Set("X-Dashscope-Authtype", "qwen-oauth")
-	r.Header.Set("X-Stainless-Runtime", "node")
-
+	r.Header.Set("X-Goog-Api-Client", qwenXGoogAPIClient)
+	r.Header.Set("Client-Metadata", qwenClientMetadataValue)
 	if stream {
 		r.Header.Set("Accept", "text/event-stream")
 		return
diff --git a/internal/runtime/executor/thinking_providers.go b/internal/runtime/executor/thinking_providers.go
index b961db9035..5a143670e4 100644
--- a/internal/runtime/executor/thinking_providers.go
+++ b/internal/runtime/executor/thinking_providers.go
@@ -7,6 +7,5 @@ import (
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/kimi"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
 )
diff --git a/internal/runtime/executor/token_helpers.go b/internal/runtime/executor/token_helpers.go
index 5418859959..f4236f9be2 100644
--- a/internal/runtime/executor/token_helpers.go
+++ b/internal/runtime/executor/token_helpers.go
@@ -2,109 +2,43 @@ package executor
 
 import (
 	"fmt"
-	"regexp"
-	"strconv"
 	"strings"
-	"sync"
 
 	"github.com/tidwall/gjson"
 	"github.com/tiktoken-go/tokenizer"
 )
 
-// tokenizerCache stores tokenizer instances to avoid repeated creation
-var tokenizerCache sync.Map
-
-// TokenizerWrapper wraps a tokenizer codec with an adjustment factor for models
-// where tiktoken may not accurately estimate token counts (e.g., Claude models)
-type TokenizerWrapper struct {
-	Codec            tokenizer.Codec
-	AdjustmentFactor float64 // 1.0 means no adjustment, >1.0 means tiktoken underestimates
-}
-
-// Count returns the token count with adjustment factor applied
-func (tw *TokenizerWrapper) Count(text string) (int, error) {
-	count, err := tw.Codec.Count(text)
-	if err != nil {
-		return 0, err
-	}
-	if tw.AdjustmentFactor != 1.0 && tw.AdjustmentFactor > 0 {
-		return int(float64(count) * tw.AdjustmentFactor), nil
-	}
-	return count, nil
-}
-
-// getTokenizer returns a cached tokenizer for the given model.
-// This improves performance by avoiding repeated tokenizer creation.
-func getTokenizer(model string) (*TokenizerWrapper, error) {
-	// Check cache first
-	if cached, ok := tokenizerCache.Load(model); ok {
-		return cached.(*TokenizerWrapper), nil
-	}
-
-	// Cache miss, create new tokenizer
-	wrapper, err := tokenizerForModel(model)
-	if err != nil {
-		return nil, err
-	}
-
-	// Store in cache (use LoadOrStore to handle race conditions)
-	actual, _ := tokenizerCache.LoadOrStore(model, wrapper)
-	return actual.(*TokenizerWrapper), nil
-}
-
 // tokenizerForModel returns a tokenizer codec suitable for an OpenAI-style model id.
-// For Claude models, applies a 1.1 adjustment factor since tiktoken may underestimate.
-func tokenizerForModel(model string) (*TokenizerWrapper, error) {
+func tokenizerForModel(model string) (tokenizer.Codec, error) {
 	sanitized := strings.ToLower(strings.TrimSpace(model))
-
-	// Claude models use cl100k_base with 1.1 adjustment factor
-	// because tiktoken may underestimate Claude's actual token count
-	if strings.Contains(sanitized, "claude") || strings.HasPrefix(sanitized, "kiro-") || strings.HasPrefix(sanitized, "amazonq-") {
-		enc, err := tokenizer.Get(tokenizer.Cl100kBase)
-		if err != nil {
-			return nil, err
-		}
-		return &TokenizerWrapper{Codec: enc, AdjustmentFactor: 1.1}, nil
-	}
-
-	var enc tokenizer.Codec
-	var err error
-
 	switch {
 	case sanitized == "":
-		enc, err = tokenizer.Get(tokenizer.Cl100kBase)
-	case strings.HasPrefix(sanitized, "gpt-5.2"):
-		enc, err = tokenizer.ForModel(tokenizer.GPT5)
-	case strings.HasPrefix(sanitized, "gpt-5.1"):
-		enc, err = tokenizer.ForModel(tokenizer.GPT5)
+		return tokenizer.Get(tokenizer.Cl100kBase)
 	case strings.HasPrefix(sanitized, "gpt-5"):
-		enc, err = tokenizer.ForModel(tokenizer.GPT5)
+		return tokenizer.ForModel(tokenizer.GPT5)
+	case strings.HasPrefix(sanitized, "gpt-5.1"):
+		return tokenizer.ForModel(tokenizer.GPT5)
 	case strings.HasPrefix(sanitized, "gpt-4.1"):
-		enc, err = tokenizer.ForModel(tokenizer.GPT41)
+		return tokenizer.ForModel(tokenizer.GPT41)
 	case strings.HasPrefix(sanitized, "gpt-4o"):
-		enc, err = tokenizer.ForModel(tokenizer.GPT4o)
+		return tokenizer.ForModel(tokenizer.GPT4o)
 	case strings.HasPrefix(sanitized, "gpt-4"):
-		enc, err = tokenizer.ForModel(tokenizer.GPT4)
+		return tokenizer.ForModel(tokenizer.GPT4)
 	case strings.HasPrefix(sanitized, "gpt-3.5"), strings.HasPrefix(sanitized, "gpt-3"):
-		enc, err = tokenizer.ForModel(tokenizer.GPT35Turbo)
+		return tokenizer.ForModel(tokenizer.GPT35Turbo)
 	case strings.HasPrefix(sanitized, "o1"):
-		enc, err = tokenizer.ForModel(tokenizer.O1)
+		return tokenizer.ForModel(tokenizer.O1)
 	case strings.HasPrefix(sanitized, "o3"):
-		enc, err = tokenizer.ForModel(tokenizer.O3)
+		return tokenizer.ForModel(tokenizer.O3)
 	case strings.HasPrefix(sanitized, "o4"):
-		enc, err = tokenizer.ForModel(tokenizer.O4Mini)
+		return tokenizer.ForModel(tokenizer.O4Mini)
 	default:
-		enc, err = tokenizer.Get(tokenizer.O200kBase)
+		return tokenizer.Get(tokenizer.O200kBase)
 	}
-
-	if err != nil {
-		return nil, err
-	}
-	return &TokenizerWrapper{Codec: enc, AdjustmentFactor: 1.0}, nil
 }
 
 // countOpenAIChatTokens approximates prompt tokens for OpenAI chat completions payloads.
-func countOpenAIChatTokens(enc *TokenizerWrapper, payload []byte) (int64, error) {
+func countOpenAIChatTokens(enc tokenizer.Codec, payload []byte) (int64, error) {
 	if enc == nil {
 		return 0, fmt.Errorf("encoder is nil")
 	}
@@ -128,206 +62,11 @@ func countOpenAIChatTokens(enc *TokenizerWrapper, payload []byte) (int64, error)
 		return 0, nil
 	}
 
-	// Count text tokens
 	count, err := enc.Count(joined)
 	if err != nil {
 		return 0, err
 	}
-
-	// Extract and add image tokens from placeholders
-	imageTokens := extractImageTokens(joined)
-
-	return int64(count) + int64(imageTokens), nil
-}
-
-// countClaudeChatTokens approximates prompt tokens for Claude API chat completions payloads.
-// This handles Claude's message format with system, messages, and tools.
-// Image tokens are estimated based on image dimensions when available.
-func countClaudeChatTokens(enc *TokenizerWrapper, payload []byte) (int64, error) {
-	if enc == nil {
-		return 0, fmt.Errorf("encoder is nil")
-	}
-	if len(payload) == 0 {
-		return 0, nil
-	}
-
-	root := gjson.ParseBytes(payload)
-	segments := make([]string, 0, 32)
-
-	// Collect system prompt (can be string or array of content blocks)
-	collectClaudeSystem(root.Get("system"), &segments)
-
-	// Collect messages
-	collectClaudeMessages(root.Get("messages"), &segments)
-
-	// Collect tools
-	collectClaudeTools(root.Get("tools"), &segments)
-
-	joined := strings.TrimSpace(strings.Join(segments, "\n"))
-	if joined == "" {
-		return 0, nil
-	}
-
-	// Count text tokens
-	count, err := enc.Count(joined)
-	if err != nil {
-		return 0, err
-	}
-
-	// Extract and add image tokens from placeholders
-	imageTokens := extractImageTokens(joined)
-
-	return int64(count) + int64(imageTokens), nil
-}
-
-// imageTokenPattern matches [IMAGE:xxx tokens] format for extracting estimated image tokens
-var imageTokenPattern = regexp.MustCompile(`\[IMAGE:(\d+) tokens\]`)
-
-// extractImageTokens extracts image token estimates from placeholder text.
-// Placeholders are in the format [IMAGE:xxx tokens] where xxx is the estimated token count.
-func extractImageTokens(text string) int {
-	matches := imageTokenPattern.FindAllStringSubmatch(text, -1)
-	total := 0
-	for _, match := range matches {
-		if len(match) > 1 {
-			if tokens, err := strconv.Atoi(match[1]); err == nil {
-				total += tokens
-			}
-		}
-	}
-	return total
-}
-
-// estimateImageTokens calculates estimated tokens for an image based on dimensions.
-// Based on Claude's image token calculation: tokens ≈ (width * height) / 750
-// Minimum 85 tokens, maximum 1590 tokens (for 1568x1568 images).
-func estimateImageTokens(width, height float64) int {
-	if width <= 0 || height <= 0 {
-		// No valid dimensions, use default estimate (medium-sized image)
-		return 1000
-	}
-
-	tokens := int(width * height / 750)
-
-	// Apply bounds
-	if tokens < 85 {
-		tokens = 85
-	}
-	if tokens > 1590 {
-		tokens = 1590
-	}
-
-	return tokens
-}
-
-// collectClaudeSystem extracts text from Claude's system field.
-// System can be a string or an array of content blocks.
-func collectClaudeSystem(system gjson.Result, segments *[]string) {
-	if !system.Exists() {
-		return
-	}
-	if system.Type == gjson.String {
-		addIfNotEmpty(segments, system.String())
-		return
-	}
-	if system.IsArray() {
-		system.ForEach(func(_, block gjson.Result) bool {
-			blockType := block.Get("type").String()
-			if blockType == "text" || blockType == "" {
-				addIfNotEmpty(segments, block.Get("text").String())
-			}
-			// Also handle plain string blocks
-			if block.Type == gjson.String {
-				addIfNotEmpty(segments, block.String())
-			}
-			return true
-		})
-	}
-}
-
-// collectClaudeMessages extracts text from Claude's messages array.
-func collectClaudeMessages(messages gjson.Result, segments *[]string) {
-	if !messages.Exists() || !messages.IsArray() {
-		return
-	}
-	messages.ForEach(func(_, message gjson.Result) bool {
-		addIfNotEmpty(segments, message.Get("role").String())
-		collectClaudeContent(message.Get("content"), segments)
-		return true
-	})
-}
-
-// collectClaudeContent extracts text from Claude's content field.
-// Content can be a string or an array of content blocks.
-// For images, estimates token count based on dimensions when available.
-func collectClaudeContent(content gjson.Result, segments *[]string) {
-	if !content.Exists() {
-		return
-	}
-	if content.Type == gjson.String {
-		addIfNotEmpty(segments, content.String())
-		return
-	}
-	if content.IsArray() {
-		content.ForEach(func(_, part gjson.Result) bool {
-			partType := part.Get("type").String()
-			switch partType {
-			case "text":
-				addIfNotEmpty(segments, part.Get("text").String())
-			case "image":
-				// Estimate image tokens based on dimensions if available
-				source := part.Get("source")
-				if source.Exists() {
-					width := source.Get("width").Float()
-					height := source.Get("height").Float()
-					if width > 0 && height > 0 {
-						tokens := estimateImageTokens(width, height)
-						addIfNotEmpty(segments, fmt.Sprintf("[IMAGE:%d tokens]", tokens))
-					} else {
-						// No dimensions available, use default estimate
-						addIfNotEmpty(segments, "[IMAGE:1000 tokens]")
-					}
-				} else {
-					// No source info, use default estimate
-					addIfNotEmpty(segments, "[IMAGE:1000 tokens]")
-				}
-			case "tool_use":
-				addIfNotEmpty(segments, part.Get("id").String())
-				addIfNotEmpty(segments, part.Get("name").String())
-				if input := part.Get("input"); input.Exists() {
-					addIfNotEmpty(segments, input.Raw)
-				}
-			case "tool_result":
-				addIfNotEmpty(segments, part.Get("tool_use_id").String())
-				collectClaudeContent(part.Get("content"), segments)
-			case "thinking":
-				addIfNotEmpty(segments, part.Get("thinking").String())
-			default:
-				// For unknown types, try to extract any text content
-				if part.Type == gjson.String {
-					addIfNotEmpty(segments, part.String())
-				} else if part.Type == gjson.JSON {
-					addIfNotEmpty(segments, part.Raw)
-				}
-			}
-			return true
-		})
-	}
-}
-
-// collectClaudeTools extracts text from Claude's tools array.
-func collectClaudeTools(tools gjson.Result, segments *[]string) {
-	if !tools.Exists() || !tools.IsArray() {
-		return
-	}
-	tools.ForEach(func(_, tool gjson.Result) bool {
-		addIfNotEmpty(segments, tool.Get("name").String())
-		addIfNotEmpty(segments, tool.Get("description").String())
-		if inputSchema := tool.Get("input_schema"); inputSchema.Exists() {
-			addIfNotEmpty(segments, inputSchema.Raw)
-		}
-		return true
-	})
+	return int64(count), nil
 }
 
 // buildOpenAIUsageJSON returns a minimal usage structure understood by downstream translators.
diff --git a/internal/runtime/executor/usage_helpers.go b/internal/runtime/executor/usage_helpers.go
index a642fac2b9..00f547df22 100644
--- a/internal/runtime/executor/usage_helpers.go
+++ b/internal/runtime/executor/usage_helpers.go
@@ -252,44 +252,6 @@ func parseOpenAIStreamUsage(line []byte) (usage.Detail, bool) {
 	return detail, true
 }
 
-func parseOpenAIResponsesUsageDetail(usageNode gjson.Result) usage.Detail {
-	detail := usage.Detail{
-		InputTokens:  usageNode.Get("input_tokens").Int(),
-		OutputTokens: usageNode.Get("output_tokens").Int(),
-		TotalTokens:  usageNode.Get("total_tokens").Int(),
-	}
-	if detail.TotalTokens == 0 {
-		detail.TotalTokens = detail.InputTokens + detail.OutputTokens
-	}
-	if cached := usageNode.Get("input_tokens_details.cached_tokens"); cached.Exists() {
-		detail.CachedTokens = cached.Int()
-	}
-	if reasoning := usageNode.Get("output_tokens_details.reasoning_tokens"); reasoning.Exists() {
-		detail.ReasoningTokens = reasoning.Int()
-	}
-	return detail
-}
-
-func parseOpenAIResponsesUsage(data []byte) usage.Detail {
-	usageNode := gjson.ParseBytes(data).Get("usage")
-	if !usageNode.Exists() {
-		return usage.Detail{}
-	}
-	return parseOpenAIResponsesUsageDetail(usageNode)
-}
-
-func parseOpenAIResponsesStreamUsage(line []byte) (usage.Detail, bool) {
-	payload := jsonPayload(line)
-	if len(payload) == 0 || !gjson.ValidBytes(payload) {
-		return usage.Detail{}, false
-	}
-	usageNode := gjson.GetBytes(payload, "usage")
-	if !usageNode.Exists() {
-		return usage.Detail{}, false
-	}
-	return parseOpenAIResponsesUsageDetail(usageNode), true
-}
-
 func parseClaudeUsage(data []byte) usage.Detail {
 	usageNode := gjson.ParseBytes(data).Get("usage")
 	if !usageNode.Exists() {
diff --git a/internal/store/gitstore.go b/internal/store/gitstore.go
index c8db660cb3..3b68e4b0af 100644
--- a/internal/store/gitstore.go
+++ b/internal/store/gitstore.go
@@ -21,9 +21,6 @@ import (
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 )
 
-// gcInterval defines minimum time between garbage collection runs.
-const gcInterval = 5 * time.Minute
-
 // GitTokenStore persists token records and auth metadata using git as the backing storage.
 type GitTokenStore struct {
 	mu        sync.Mutex
@@ -34,7 +31,6 @@ type GitTokenStore struct {
 	remote    string
 	username  string
 	password  string
-	lastGC    time.Time
 }
 
 // NewGitTokenStore creates a token store that saves credentials to disk through the
@@ -617,7 +613,6 @@ func (s *GitTokenStore) commitAndPushLocked(message string, relPaths ...string)
 	} else if errRewrite := s.rewriteHeadAsSingleCommit(repo, headRef.Name(), commitHash, message, signature); errRewrite != nil {
 		return errRewrite
 	}
-	s.maybeRunGC(repo)
 	if err = repo.Push(&git.PushOptions{Auth: s.gitAuth(), Force: true}); err != nil {
 		if errors.Is(err, git.NoErrAlreadyUpToDate) {
 			return nil
@@ -657,23 +652,6 @@ func (s *GitTokenStore) rewriteHeadAsSingleCommit(repo *git.Repository, branch p
 	return nil
 }
 
-func (s *GitTokenStore) maybeRunGC(repo *git.Repository) {
-	now := time.Now()
-	if now.Sub(s.lastGC) < gcInterval {
-		return
-	}
-	s.lastGC = now
-
-	pruneOpts := git.PruneOptions{
-		OnlyObjectsOlderThan: now,
-		Handler:              repo.DeleteObject,
-	}
-	if err := repo.Prune(pruneOpts); err != nil && !errors.Is(err, git.ErrLooseObjectsNotSupported) {
-		return
-	}
-	_ = repo.RepackObjects(&git.RepackConfig{})
-}
-
 // PersistConfig commits and pushes configuration changes to git.
 func (s *GitTokenStore) PersistConfig(_ context.Context) error {
 	if err := s.EnsureRepository(); err != nil {
diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go
index 8a5a1d7d27..58c262868c 100644
--- a/internal/thinking/apply.go
+++ b/internal/thinking/apply.go
@@ -18,7 +18,6 @@ var providerAppliers = map[string]ProviderApplier{
 	"codex":       nil,
 	"iflow":       nil,
 	"antigravity": nil,
-	"kimi":        nil,
 }
 
 // GetProviderApplier returns the ProviderApplier for the given provider name.
@@ -327,9 +326,6 @@ func extractThinkingConfig(body []byte, provider string) ThinkingConfig {
 			return config
 		}
 		return extractOpenAIConfig(body)
-	case "kimi":
-		// Kimi uses OpenAI-compatible reasoning_effort format
-		return extractOpenAIConfig(body)
 	default:
 		return ThinkingConfig{}
 	}
@@ -392,12 +388,7 @@ func extractGeminiConfig(body []byte, provider string) ThinkingConfig {
 	}
 
 	// Check thinkingLevel first (Gemini 3 format takes precedence)
-	level := gjson.GetBytes(body, prefix+".thinkingLevel")
-	if !level.Exists() {
-		// Google official Gemini Python SDK sends snake_case field names
-		level = gjson.GetBytes(body, prefix+".thinking_level")
-	}
-	if level.Exists() {
+	if level := gjson.GetBytes(body, prefix+".thinkingLevel"); level.Exists() {
 		value := level.String()
 		switch value {
 		case "none":
@@ -410,12 +401,7 @@ func extractGeminiConfig(body []byte, provider string) ThinkingConfig {
 	}
 
 	// Check thinkingBudget (Gemini 2.5 format)
-	budget := gjson.GetBytes(body, prefix+".thinkingBudget")
-	if !budget.Exists() {
-		// Google official Gemini Python SDK sends snake_case field names
-		budget = gjson.GetBytes(body, prefix+".thinking_budget")
-	}
-	if budget.Exists() {
+	if budget := gjson.GetBytes(body, prefix+".thinkingBudget"); budget.Exists() {
 		value := int(budget.Int())
 		switch value {
 		case 0:
diff --git a/internal/thinking/provider/antigravity/apply.go b/internal/thinking/provider/antigravity/apply.go
index d202035fc6..9c1c79f6da 100644
--- a/internal/thinking/provider/antigravity/apply.go
+++ b/internal/thinking/provider/antigravity/apply.go
@@ -94,10 +94,8 @@ func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig, m
 }
 
 func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
-	// Remove conflicting fields to avoid both thinkingLevel and thinkingBudget in output
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
-	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_budget")
-	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_level")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
 
@@ -116,30 +114,28 @@ func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig)
 
 	level := string(config.Level)
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
-
-	// Respect user's explicit includeThoughts setting from original body; default to true if not set
-	// Support both camelCase and snake_case variants
-	includeThoughts := true
-	if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts"); inc.Exists() {
-		includeThoughts = inc.Bool()
-	} else if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.include_thoughts"); inc.Exists() {
-		includeThoughts = inc.Bool()
-	}
-	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
 	return result, nil
 }
 
 func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo, isClaude bool) ([]byte, error) {
-	// Remove conflicting fields to avoid both thinkingLevel and thinkingBudget in output
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
-	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_level")
-	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_budget")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
 
 	budget := config.Budget
+	includeThoughts := false
+	switch config.Mode {
+	case thinking.ModeNone:
+		includeThoughts = false
+	case thinking.ModeAuto:
+		includeThoughts = true
+	default:
+		includeThoughts = budget > 0
+	}
 
-	// Apply Claude-specific constraints first to get the final budget value
+	// Apply Claude-specific constraints
 	if isClaude && modelInfo != nil {
 		budget, result = a.normalizeClaudeBudget(budget, result, modelInfo)
 		// Check if budget was removed entirely
@@ -148,37 +144,6 @@ func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig,
 		}
 	}
 
-	// For ModeNone, always set includeThoughts to false regardless of user setting.
-	// This ensures that when user requests budget=0 (disable thinking output),
-	// the includeThoughts is correctly set to false even if budget is clamped to min.
-	if config.Mode == thinking.ModeNone {
-		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
-		return result, nil
-	}
-
-	// Determine includeThoughts: respect user's explicit setting from original body if provided
-	// Support both camelCase and snake_case variants
-	var includeThoughts bool
-	var userSetIncludeThoughts bool
-	if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts"); inc.Exists() {
-		includeThoughts = inc.Bool()
-		userSetIncludeThoughts = true
-	} else if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.include_thoughts"); inc.Exists() {
-		includeThoughts = inc.Bool()
-		userSetIncludeThoughts = true
-	}
-
-	if !userSetIncludeThoughts {
-		// No explicit setting, use default logic based on mode
-		switch config.Mode {
-		case thinking.ModeAuto:
-			includeThoughts = true
-		default:
-			includeThoughts = budget > 0
-		}
-	}
-
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
 	return result, nil
diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go
index 3c74d5146d..3faf4786fb 100644
--- a/internal/thinking/provider/claude/apply.go
+++ b/internal/thinking/provider/claude/apply.go
@@ -83,6 +83,10 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *
 
 	// Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint)
 	result = a.normalizeClaudeBudget(result, config.Budget, modelInfo)
+
+	// When thinking is enabled, Claude API requires assistant messages with tool_use
+	// to have a thinking block. Inject empty thinking block if missing.
+	result = injectThinkingBlockForToolUse(result)
 	return result, nil
 }
 
@@ -149,18 +153,85 @@ func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte,
 		body = []byte(`{}`)
 	}
 
+	var result []byte
 	switch config.Mode {
 	case thinking.ModeNone:
-		result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
+		result, _ = sjson.SetBytes(body, "thinking.type", "disabled")
 		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
 		return result, nil
 	case thinking.ModeAuto:
-		result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
+		result, _ = sjson.SetBytes(body, "thinking.type", "enabled")
 		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
-		return result, nil
 	default:
-		result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
+		result, _ = sjson.SetBytes(body, "thinking.type", "enabled")
 		result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
-		return result, nil
 	}
+
+	// When thinking is enabled, Claude API requires assistant messages with tool_use
+	// to have a thinking block. Inject empty thinking block if missing.
+	result = injectThinkingBlockForToolUse(result)
+	return result, nil
+}
+
+// injectThinkingBlockForToolUse adds empty thinking block to assistant messages
+// that have tool_use but no thinking block. This is required by Claude API when
+// thinking is enabled.
+func injectThinkingBlockForToolUse(body []byte) []byte {
+	messages := gjson.GetBytes(body, "messages")
+	if !messages.IsArray() {
+		return body
+	}
+
+	messageArray := messages.Array()
+	modified := false
+	newMessages := "[]"
+
+	for _, msg := range messageArray {
+		role := msg.Get("role").String()
+		if role != "assistant" {
+			newMessages, _ = sjson.SetRaw(newMessages, "-1", msg.Raw)
+			continue
+		}
+
+		content := msg.Get("content")
+		if !content.IsArray() {
+			newMessages, _ = sjson.SetRaw(newMessages, "-1", msg.Raw)
+			continue
+		}
+
+		contentArray := content.Array()
+		hasToolUse := false
+		hasThinking := false
+
+		for _, part := range contentArray {
+			partType := part.Get("type").String()
+			if partType == "tool_use" {
+				hasToolUse = true
+			}
+			if partType == "thinking" {
+				hasThinking = true
+			}
+		}
+
+		if hasToolUse && !hasThinking {
+			// Inject empty thinking block at the beginning of content
+			newContent := "[]"
+			newContent, _ = sjson.SetRaw(newContent, "-1", `{"type":"thinking","thinking":""}`)
+			for _, part := range contentArray {
+				newContent, _ = sjson.SetRaw(newContent, "-1", part.Raw)
+			}
+			msgJSON := msg.Raw
+			msgJSON, _ = sjson.SetRaw(msgJSON, "content", newContent)
+			newMessages, _ = sjson.SetRaw(newMessages, "-1", msgJSON)
+			modified = true
+			continue
+		}
+
+		newMessages, _ = sjson.SetRaw(newMessages, "-1", msg.Raw)
+	}
+
+	if modified {
+		body, _ = sjson.SetRawBytes(body, "messages", []byte(newMessages))
+	}
+	return body
 }
diff --git a/internal/thinking/provider/claude/apply_test.go b/internal/thinking/provider/claude/apply_test.go
new file mode 100644
index 0000000000..dc7916e84f
--- /dev/null
+++ b/internal/thinking/provider/claude/apply_test.go
@@ -0,0 +1,187 @@
+package claude
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+)
+
+func TestInjectThinkingBlockForToolUse(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name: "assistant with tool_use but no thinking - should inject thinking",
+			input: `{
+				"model": "kimi-k2.5",
+				"messages": [
+					{
+						"role": "assistant",
+						"content": [
+							{"type": "text", "text": "Let me use a tool"},
+							{"type": "tool_use", "id": "tool_1", "name": "test_tool", "input": {}}
+						]
+					}
+				]
+			}`,
+			expected: "thinking",
+		},
+		{
+			name: "assistant with tool_use and thinking - should not modify",
+			input: `{
+				"model": "kimi-k2.5",
+				"messages": [
+					{
+						"role": "assistant",
+						"content": [
+							{"type": "thinking", "thinking": "I need to use a tool"},
+							{"type": "tool_use", "id": "tool_1", "name": "test_tool", "input": {}}
+						]
+					}
+				]
+			}`,
+			expected: "thinking",
+		},
+		{
+			name: "user message with tool_use - should not modify",
+			input: `{
+				"model": "kimi-k2.5",
+				"messages": [
+					{
+						"role": "user",
+						"content": [
+							{"type": "tool_result", "tool_use_id": "tool_1", "content": "result"}
+						]
+					}
+				]
+			}`,
+			expected: "",
+		},
+		{
+			name: "assistant without tool_use - should not modify",
+			input: `{
+				"model": "kimi-k2.5",
+				"messages": [
+					{
+						"role": "assistant",
+						"content": [
+							{"type": "text", "text": "Hello!"}
+						]
+					}
+				]
+			}`,
+			expected: "",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := injectThinkingBlockForToolUse([]byte(tt.input))
+			
+			// Check if thinking block exists in assistant messages with tool_use
+			messages := gjson.GetBytes(result, "messages")
+			if !messages.IsArray() {
+				t.Fatal("messages is not an array")
+			}
+
+			for _, msg := range messages.Array() {
+				if msg.Get("role").String() == "assistant" {
+					content := msg.Get("content")
+					if !content.IsArray() {
+						continue
+					}
+
+					hasToolUse := false
+					hasThinking := false
+					for _, part := range content.Array() {
+						partType := part.Get("type").String()
+						if partType == "tool_use" {
+							hasToolUse = true
+						}
+						if partType == "thinking" {
+							hasThinking = true
+						}
+					}
+
+					if hasToolUse && tt.expected == "thinking" && !hasThinking {
+						t.Errorf("Expected thinking block in assistant message with tool_use, but not found")
+					}
+				}
+			}
+		})
+	}
+}
+
+func TestApplyCompatibleClaude(t *testing.T) {
+	tests := []struct {
+		name           string
+		input          string
+		config         thinking.ThinkingConfig
+		expectThinking bool
+	}{
+		{
+			name: "thinking enabled with tool_use - should inject thinking block",
+			input: `{
+				"model": "kimi-k2.5",
+				"messages": [
+					{
+						"role": "assistant",
+						"content": [
+							{"type": "tool_use", "id": "tool_1", "name": "test_tool", "input": {}}
+						]
+					}
+				]
+			}`,
+			config: thinking.ThinkingConfig{
+				Mode:   thinking.ModeBudget,
+				Budget: 4000,
+			},
+			expectThinking: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applyCompatibleClaude([]byte(tt.input), tt.config)
+			if err != nil {
+				t.Fatalf("applyCompatibleClaude failed: %v", err)
+			}
+
+			// Check if thinking.type is enabled
+			thinkingType := gjson.GetBytes(result, "thinking.type").String()
+			if thinkingType != "enabled" {
+				t.Errorf("Expected thinking.type=enabled, got %s", thinkingType)
+			}
+
+			// Check if thinking block is injected
+			messages := gjson.GetBytes(result, "messages")
+			if !messages.IsArray() {
+				t.Fatal("messages is not an array")
+			}
+
+			for _, msg := range messages.Array() {
+				if msg.Get("role").String() == "assistant" {
+					content := msg.Get("content")
+					if !content.IsArray() {
+						continue
+					}
+
+					hasThinking := false
+					for _, part := range content.Array() {
+						if part.Get("type").String() == "thinking" {
+							hasThinking = true
+							break
+						}
+					}
+
+					if tt.expectThinking && !hasThinking {
+						t.Errorf("Expected thinking block in assistant message, but not found. Result: %s", string(result))
+					}
+				}
+			}
+		})
+	}
+}
diff --git a/internal/thinking/provider/gemini/apply.go b/internal/thinking/provider/gemini/apply.go
index 39bb4231d0..c8560f194e 100644
--- a/internal/thinking/provider/gemini/apply.go
+++ b/internal/thinking/provider/gemini/apply.go
@@ -118,10 +118,8 @@ func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig)
 	//   - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
 	// ValidateConfig sets config.Level to the lowest level when ModeNone + Budget > 0.
 
-	// Remove conflicting fields to avoid both thinkingLevel and thinkingBudget in output
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingBudget")
-	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.thinking_budget")
-	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.thinking_level")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts")
 
@@ -140,58 +138,29 @@ func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig)
 
 	level := string(config.Level)
 	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", level)
-
-	// Respect user's explicit includeThoughts setting from original body; default to true if not set
-	// Support both camelCase and snake_case variants
-	includeThoughts := true
-	if inc := gjson.GetBytes(body, "generationConfig.thinkingConfig.includeThoughts"); inc.Exists() {
-		includeThoughts = inc.Bool()
-	} else if inc := gjson.GetBytes(body, "generationConfig.thinkingConfig.include_thoughts"); inc.Exists() {
-		includeThoughts = inc.Bool()
-	}
-	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", includeThoughts)
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", true)
 	return result, nil
 }
 
 func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
-	// Remove conflicting fields to avoid both thinkingLevel and thinkingBudget in output
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingLevel")
-	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.thinking_level")
-	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.thinking_budget")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "generationConfig.thinkingConfig.include_thoughts")
 
 	budget := config.Budget
-
-	// For ModeNone, always set includeThoughts to false regardless of user setting.
-	// This ensures that when user requests budget=0 (disable thinking output),
-	// the includeThoughts is correctly set to false even if budget is clamped to min.
-	if config.Mode == thinking.ModeNone {
-		result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingBudget", budget)
-		result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", false)
-		return result, nil
-	}
-
-	// Determine includeThoughts: respect user's explicit setting from original body if provided
-	// Support both camelCase and snake_case variants
-	var includeThoughts bool
-	var userSetIncludeThoughts bool
-	if inc := gjson.GetBytes(body, "generationConfig.thinkingConfig.includeThoughts"); inc.Exists() {
-		includeThoughts = inc.Bool()
-		userSetIncludeThoughts = true
-	} else if inc := gjson.GetBytes(body, "generationConfig.thinkingConfig.include_thoughts"); inc.Exists() {
-		includeThoughts = inc.Bool()
-		userSetIncludeThoughts = true
-	}
-
-	if !userSetIncludeThoughts {
-		// No explicit setting, use default logic based on mode
-		switch config.Mode {
-		case thinking.ModeAuto:
-			includeThoughts = true
-		default:
-			includeThoughts = budget > 0
-		}
+	// ModeNone semantics:
+	//   - ModeNone + Budget=0: completely disable thinking
+	//   - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
+	// When ZeroAllowed=false, ValidateConfig clamps Budget to Min while preserving ModeNone.
+	includeThoughts := false
+	switch config.Mode {
+	case thinking.ModeNone:
+		includeThoughts = false
+	case thinking.ModeAuto:
+		includeThoughts = true
+	default:
+		includeThoughts = budget > 0
 	}
 
 	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingBudget", budget)
diff --git a/internal/thinking/provider/geminicli/apply.go b/internal/thinking/provider/geminicli/apply.go
index 5908b6bce5..75d9242a3b 100644
--- a/internal/thinking/provider/geminicli/apply.go
+++ b/internal/thinking/provider/geminicli/apply.go
@@ -79,10 +79,8 @@ func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) (
 }
 
 func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
-	// Remove conflicting fields to avoid both thinkingLevel and thinkingBudget in output
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
-	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_budget")
-	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_level")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
 
@@ -101,58 +99,25 @@ func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig)
 
 	level := string(config.Level)
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
-
-	// Respect user's explicit includeThoughts setting from original body; default to true if not set
-	// Support both camelCase and snake_case variants
-	includeThoughts := true
-	if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts"); inc.Exists() {
-		includeThoughts = inc.Bool()
-	} else if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.include_thoughts"); inc.Exists() {
-		includeThoughts = inc.Bool()
-	}
-	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
 	return result, nil
 }
 
 func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
-	// Remove conflicting fields to avoid both thinkingLevel and thinkingBudget in output
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
 	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
-	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_level")
-	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.thinking_budget")
 	// Normalize includeThoughts field name to avoid oneof conflicts in upstream JSON parsing.
 	result, _ = sjson.DeleteBytes(result, "request.generationConfig.thinkingConfig.include_thoughts")
 
 	budget := config.Budget
-
-	// For ModeNone, always set includeThoughts to false regardless of user setting.
-	// This ensures that when user requests budget=0 (disable thinking output),
-	// the includeThoughts is correctly set to false even if budget is clamped to min.
-	if config.Mode == thinking.ModeNone {
-		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
-		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
-		return result, nil
-	}
-
-	// Determine includeThoughts: respect user's explicit setting from original body if provided
-	// Support both camelCase and snake_case variants
-	var includeThoughts bool
-	var userSetIncludeThoughts bool
-	if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts"); inc.Exists() {
-		includeThoughts = inc.Bool()
-		userSetIncludeThoughts = true
-	} else if inc := gjson.GetBytes(body, "request.generationConfig.thinkingConfig.include_thoughts"); inc.Exists() {
-		includeThoughts = inc.Bool()
-		userSetIncludeThoughts = true
-	}
-
-	if !userSetIncludeThoughts {
-		// No explicit setting, use default logic based on mode
-		switch config.Mode {
-		case thinking.ModeAuto:
-			includeThoughts = true
-		default:
-			includeThoughts = budget > 0
-		}
+	includeThoughts := false
+	switch config.Mode {
+	case thinking.ModeNone:
+		includeThoughts = false
+	case thinking.ModeAuto:
+		includeThoughts = true
+	default:
+		includeThoughts = budget > 0
 	}
 
 	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
diff --git a/internal/thinking/provider/openai/apply.go b/internal/thinking/provider/openai/apply.go
index e8a2562f11..eaad30ee84 100644
--- a/internal/thinking/provider/openai/apply.go
+++ b/internal/thinking/provider/openai/apply.go
@@ -10,53 +10,10 @@ import (
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
-	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
 
-// validReasoningEffortLevels contains the standard values accepted by the
-// OpenAI reasoning_effort field. Provider-specific extensions (xhigh, minimal,
-// auto) are NOT in this set and must be clamped before use.
-var validReasoningEffortLevels = map[string]struct{}{
-	"none":   {},
-	"low":    {},
-	"medium": {},
-	"high":   {},
-}
-
-// clampReasoningEffort maps any thinking level string to a value that is safe
-// to send as OpenAI reasoning_effort. Non-standard CPA-internal values are
-// mapped to the nearest standard equivalent.
-//
-// Mapping rules:
-//   - none / low / medium / high  → returned as-is (already valid)
-//   - xhigh                       → "high" (nearest lower standard level)
-//   - minimal                     → "low" (nearest higher standard level)
-//   - auto                        → "medium" (reasonable default)
-//   - anything else               → "medium" (safe default)
-func clampReasoningEffort(level string) string {
-	if _, ok := validReasoningEffortLevels[level]; ok {
-		return level
-	}
-	var clamped string
-	switch level {
-	case string(thinking.LevelXHigh):
-		clamped = string(thinking.LevelHigh)
-	case string(thinking.LevelMinimal):
-		clamped = string(thinking.LevelLow)
-	case string(thinking.LevelAuto):
-		clamped = string(thinking.LevelMedium)
-	default:
-		clamped = string(thinking.LevelMedium)
-	}
-	log.WithFields(log.Fields{
-		"original": level,
-		"clamped":  clamped,
-	}).Debug("openai: reasoning_effort clamped to nearest valid standard value")
-	return clamped
-}
-
 // Applier implements thinking.ProviderApplier for OpenAI models.
 //
 // OpenAI-specific behavior:
@@ -101,7 +58,7 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *
 	}
 
 	if config.Mode == thinking.ModeLevel {
-		result, _ := sjson.SetBytes(body, "reasoning_effort", clampReasoningEffort(string(config.Level)))
+		result, _ := sjson.SetBytes(body, "reasoning_effort", string(config.Level))
 		return result, nil
 	}
 
@@ -122,7 +79,7 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *
 		return body, nil
 	}
 
-	result, _ := sjson.SetBytes(body, "reasoning_effort", clampReasoningEffort(effort))
+	result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
 	return result, nil
 }
 
@@ -157,7 +114,7 @@ func applyCompatibleOpenAI(body []byte, config thinking.ThinkingConfig) ([]byte,
 		return body, nil
 	}
 
-	result, _ := sjson.SetBytes(body, "reasoning_effort", clampReasoningEffort(effort))
+	result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
 	return result, nil
 }
 
diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go
index 448aa9762f..3a0c8d7b0d 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -6,6 +6,7 @@
 package claude
 
 import (
+	"bytes"
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/cache"
@@ -36,7 +37,7 @@ import (
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
 	enableThoughtTranslate := true
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 
 	// system instruction
 	systemInstructionJSON := ""
@@ -114,6 +115,8 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 							if signatureResult.Exists() && signatureResult.String() != "" {
 								arrayClientSignatures := strings.SplitN(signatureResult.String(), "#", 2)
 								if len(arrayClientSignatures) == 2 {
+									// Compare using model group to handle model mapping
+									// e.g., claude-opus-4-5-thinking -> "claude" group should match "claude#signature"
 									if cache.GetModelGroup(modelName) == arrayClientSignatures[0] {
 										clientSignature = arrayClientSignatures[1]
 									}
@@ -231,12 +234,8 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 
 							} else if functionResponseResult.IsObject() {
 								functionResponseJSON, _ = sjson.SetRaw(functionResponseJSON, "response.result", functionResponseResult.Raw)
-							} else if functionResponseResult.Raw != "" {
-								functionResponseJSON, _ = sjson.SetRaw(functionResponseJSON, "response.result", functionResponseResult.Raw)
 							} else {
-								// Content field is missing entirely — .Raw is empty which
-								// causes sjson.SetRaw to produce invalid JSON (e.g. "result":}).
-								functionResponseJSON, _ = sjson.Set(functionResponseJSON, "response.result", "")
+								functionResponseJSON, _ = sjson.SetRaw(functionResponseJSON, "response.result", functionResponseResult.Raw)
 							}
 
 							partJSON := `{}`
@@ -348,8 +347,7 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	// Inject interleaved thinking hint when both tools and thinking are active
 	hasTools := toolDeclCount > 0
 	thinkingResult := gjson.GetBytes(rawJSON, "thinking")
-	thinkingType := thinkingResult.Get("type").String()
-	hasThinking := thinkingResult.Exists() && thinkingResult.IsObject() && (thinkingType == "enabled" || thinkingType == "adaptive")
+	hasThinking := thinkingResult.Exists() && thinkingResult.IsObject() && thinkingResult.Get("type").String() == "enabled"
 	isClaudeThinking := util.IsClaudeThinkingModel(modelName)
 
 	if hasTools && hasThinking && isClaudeThinking {
@@ -382,18 +380,12 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
 	if t := gjson.GetBytes(rawJSON, "thinking"); enableThoughtTranslate && t.Exists() && t.IsObject() {
-		switch t.Get("type").String() {
-		case "enabled":
+		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 			}
-		case "adaptive":
-			// Keep adaptive as a high level sentinel; ApplyThinking resolves it
-			// to model-specific max capability.
-			out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingLevel", "high")
-			out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 		}
 	}
 	if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
diff --git a/internal/translator/antigravity/claude/antigravity_claude_request_test.go b/internal/translator/antigravity/claude/antigravity_claude_request_test.go
index c28a14ec9e..9f40b9faee 100644
--- a/internal/translator/antigravity/claude/antigravity_claude_request_test.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request_test.go
@@ -661,85 +661,6 @@ func TestConvertClaudeRequestToAntigravity_ThinkingOnly_NoHint(t *testing.T) {
 	}
 }
 
-func TestConvertClaudeRequestToAntigravity_ToolResultNoContent(t *testing.T) {
-	// Bug repro: tool_result with no content field produces invalid JSON
-	inputJSON := []byte(`{
-		"model": "claude-opus-4-6-thinking",
-		"messages": [
-			{
-				"role": "assistant",
-				"content": [
-					{
-						"type": "tool_use",
-						"id": "MyTool-123-456",
-						"name": "MyTool",
-						"input": {"key": "value"}
-					}
-				]
-			},
-			{
-				"role": "user",
-				"content": [
-					{
-						"type": "tool_result",
-						"tool_use_id": "MyTool-123-456"
-					}
-				]
-			}
-		]
-	}`)
-
-	output := ConvertClaudeRequestToAntigravity("claude-opus-4-6-thinking", inputJSON, true)
-	outputStr := string(output)
-
-	if !gjson.Valid(outputStr) {
-		t.Errorf("Result is not valid JSON:\n%s", outputStr)
-	}
-
-	// Verify the functionResponse has a valid result value
-	fr := gjson.Get(outputStr, "request.contents.1.parts.0.functionResponse.response.result")
-	if !fr.Exists() {
-		t.Error("functionResponse.response.result should exist")
-	}
-}
-
-func TestConvertClaudeRequestToAntigravity_ToolResultNullContent(t *testing.T) {
-	// Bug repro: tool_result with null content produces invalid JSON
-	inputJSON := []byte(`{
-		"model": "claude-opus-4-6-thinking",
-		"messages": [
-			{
-				"role": "assistant",
-				"content": [
-					{
-						"type": "tool_use",
-						"id": "MyTool-123-456",
-						"name": "MyTool",
-						"input": {"key": "value"}
-					}
-				]
-			},
-			{
-				"role": "user",
-				"content": [
-					{
-						"type": "tool_result",
-						"tool_use_id": "MyTool-123-456",
-						"content": null
-					}
-				]
-			}
-		]
-	}`)
-
-	output := ConvertClaudeRequestToAntigravity("claude-opus-4-6-thinking", inputJSON, true)
-	outputStr := string(output)
-
-	if !gjson.Valid(outputStr) {
-		t.Errorf("Result is not valid JSON:\n%s", outputStr)
-	}
-}
-
 func TestConvertClaudeRequestToAntigravity_ToolAndThinking_NoExistingSystem(t *testing.T) {
 	// When tools + thinking but no system instruction, should create one with hint
 	inputJSON := []byte(`{
diff --git a/internal/translator/antigravity/gemini/antigravity_gemini_request.go b/internal/translator/antigravity/gemini/antigravity_gemini_request.go
index 1d04474069..2ad9bd8075 100644
--- a/internal/translator/antigravity/gemini/antigravity_gemini_request.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_request.go
@@ -6,6 +6,7 @@
 package gemini
 
 import (
+	"bytes"
 	"fmt"
 	"strings"
 
@@ -33,7 +34,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Gemini API format
 func ConvertGeminiRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	template := ""
 	template = `{"project":"","request":{},"model":""}`
 	template, _ = sjson.SetRaw(template, "request", string(rawJSON))
diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
index a8105c4ec3..9cc809eeb6 100644
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -3,6 +3,7 @@
 package chat_completions
 
 import (
+	"bytes"
 	"fmt"
 	"strings"
 
@@ -27,7 +28,7 @@ const geminiCLIFunctionThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	// Base envelope (no default thinkingConfig)
 	out := []byte(`{"project":"","request":{"contents":[]},"model":"gemini-2.5-pro"}`)
 
diff --git a/internal/translator/antigravity/openai/responses/antigravity_openai-responses_request.go b/internal/translator/antigravity/openai/responses/antigravity_openai-responses_request.go
index 90bfa14c05..65d4dcd8b4 100644
--- a/internal/translator/antigravity/openai/responses/antigravity_openai-responses_request.go
+++ b/internal/translator/antigravity/openai/responses/antigravity_openai-responses_request.go
@@ -1,12 +1,14 @@
 package responses
 
 import (
+	"bytes"
+
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/antigravity/gemini"
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/responses"
 )
 
 func ConvertOpenAIResponsesRequestToAntigravity(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	rawJSON = ConvertOpenAIResponsesRequestToGemini(modelName, rawJSON, stream)
 	return ConvertGeminiRequestToAntigravity(modelName, rawJSON, stream)
 }
diff --git a/internal/translator/claude/gemini-cli/claude_gemini-cli_request.go b/internal/translator/claude/gemini-cli/claude_gemini-cli_request.go
index 831d784db3..c10b35ff5a 100644
--- a/internal/translator/claude/gemini-cli/claude_gemini-cli_request.go
+++ b/internal/translator/claude/gemini-cli/claude_gemini-cli_request.go
@@ -6,6 +6,8 @@
 package geminiCLI
 
 import (
+	"bytes"
+
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/claude/gemini"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -28,7 +30,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Claude Code API format
 func ConvertGeminiCLIRequestToClaude(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 
 	modelResult := gjson.GetBytes(rawJSON, "model")
 	// Extract the inner request object and promote it to the top level
diff --git a/internal/translator/claude/gemini/claude_gemini_request.go b/internal/translator/claude/gemini/claude_gemini_request.go
index ea53da0540..a26ac51a45 100644
--- a/internal/translator/claude/gemini/claude_gemini_request.go
+++ b/internal/translator/claude/gemini/claude_gemini_request.go
@@ -6,6 +6,7 @@
 package gemini
 
 import (
+	"bytes"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/hex"
@@ -45,7 +46,7 @@ var (
 // Returns:
 //   - []byte: The transformed request data in Claude Code API format
 func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 
 	if account == "" {
 		u, _ := uuid.NewRandom()
@@ -115,11 +116,7 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 		// Include thoughts configuration for reasoning process visibility
 		// Translator only does format conversion, ApplyThinking handles model capability validation.
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			thinkingLevel := thinkingConfig.Get("thinkingLevel")
-			if !thinkingLevel.Exists() {
-				thinkingLevel = thinkingConfig.Get("thinking_level")
-			}
-			if thinkingLevel.Exists() {
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
 				level := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
 				switch level {
 				case "":
@@ -135,29 +132,23 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream
 						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
 					}
 				}
-			} else {
-				thinkingBudget := thinkingConfig.Get("thinkingBudget")
-				if !thinkingBudget.Exists() {
-					thinkingBudget = thinkingConfig.Get("thinking_budget")
-				}
-				if thinkingBudget.Exists() {
-					budget := int(thinkingBudget.Int())
-					switch budget {
-					case 0:
-						out, _ = sjson.Set(out, "thinking.type", "disabled")
-						out, _ = sjson.Delete(out, "thinking.budget_tokens")
-					case -1:
-						out, _ = sjson.Set(out, "thinking.type", "enabled")
-						out, _ = sjson.Delete(out, "thinking.budget_tokens")
-					default:
-						out, _ = sjson.Set(out, "thinking.type", "enabled")
-						out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
-					}
-				} else if includeThoughts := thinkingConfig.Get("includeThoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				budget := int(thinkingBudget.Int())
+				switch budget {
+				case 0:
+					out, _ = sjson.Set(out, "thinking.type", "disabled")
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				case -1:
 					out, _ = sjson.Set(out, "thinking.type", "enabled")
-				} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+					out, _ = sjson.Delete(out, "thinking.budget_tokens")
+				default:
 					out, _ = sjson.Set(out, "thinking.type", "enabled")
+					out, _ = sjson.Set(out, "thinking.budget_tokens", budget)
 				}
+			} else if includeThoughts := thinkingConfig.Get("includeThoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+				out, _ = sjson.Set(out, "thinking.type", "enabled")
+			} else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True {
+				out, _ = sjson.Set(out, "thinking.type", "enabled")
 			}
 		}
 	}
diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_request.go b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
index 3cad18825e..41274628a1 100644
--- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go
@@ -6,6 +6,7 @@
 package chat_completions
 
 import (
+	"bytes"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/hex"
@@ -43,7 +44,7 @@ var (
 // Returns:
 //   - []byte: The transformed request data in Claude Code API format
 func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 
 	if account == "" {
 		u, _ := uuid.NewRandom()
diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_response.go b/internal/translator/claude/openai/chat-completions/claude_openai_response.go
index 346db69a11..0ddfeaecba 100644
--- a/internal/translator/claude/openai/chat-completions/claude_openai_response.go
+++ b/internal/translator/claude/openai/chat-completions/claude_openai_response.go
@@ -50,10 +50,6 @@ type ToolCallAccumulator struct {
 // Returns:
 //   - []string: A slice of strings, each containing an OpenAI-compatible JSON response
 func ConvertClaudeResponseToOpenAI(_ context.Context, modelName string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) []string {
-	var localParam any
-	if param == nil {
-		param = &localParam
-	}
 	if *param == nil {
 		*param = &ConvertAnthropicResponseToOpenAIParams{
 			CreatedAt:    0,
diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_request.go b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
index 337f9be93b..5cbe23bf1b 100644
--- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go
@@ -1,6 +1,7 @@
 package responses
 
 import (
+	"bytes"
 	"crypto/rand"
 	"crypto/sha256"
 	"encoding/hex"
@@ -31,7 +32,7 @@ var (
 // - max_output_tokens -> max_tokens
 // - stream passthrough via parameter
 func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 
 	if account == "" {
 		u, _ := uuid.NewRandom()
diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go
index 223a2559f7..aa91b17549 100644
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -6,6 +6,7 @@
 package claude
 
 import (
+	"bytes"
 	"fmt"
 	"strconv"
 	"strings"
@@ -34,7 +35,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in internal client format
 func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 
 	template := `{"model":"","instructions":"","input":[]}`
 
@@ -222,10 +223,6 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 					reasoningEffort = effort
 				}
 			}
-		case "adaptive":
-			// Claude adaptive means "enable with max capacity"; keep it as highest level
-			// and let ApplyThinking normalize per target model capability.
-			reasoningEffort = string(thinking.LevelXHigh)
 		case "disabled":
 			if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" {
 				reasoningEffort = effort
diff --git a/internal/translator/codex/claude/codex_claude_response.go b/internal/translator/codex/claude/codex_claude_response.go
index cdcf2e4f55..238d3e24df 100644
--- a/internal/translator/codex/claude/codex_claude_response.go
+++ b/internal/translator/codex/claude/codex_claude_response.go
@@ -22,9 +22,8 @@ var (
 
 // ConvertCodexResponseToClaudeParams holds parameters for response conversion.
 type ConvertCodexResponseToClaudeParams struct {
-	HasToolCall              bool
-	BlockIndex               int
-	HasReceivedArgumentsDelta bool
+	HasToolCall bool
+	BlockIndex  int
 }
 
 // ConvertCodexResponseToClaude performs sophisticated streaming response format conversion.
@@ -114,10 +113,10 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
 		template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
 		p := (*param).(*ConvertCodexResponseToClaudeParams).HasToolCall
 		stopReason := rootResult.Get("response.stop_reason").String()
-		if p {
-			template, _ = sjson.Set(template, "delta.stop_reason", "tool_use")
-		} else if stopReason == "max_tokens" || stopReason == "stop" {
+		if stopReason != "" {
 			template, _ = sjson.Set(template, "delta.stop_reason", stopReason)
+		} else if p {
+			template, _ = sjson.Set(template, "delta.stop_reason", "tool_use")
 		} else {
 			template, _ = sjson.Set(template, "delta.stop_reason", "end_turn")
 		}
@@ -138,7 +137,6 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
 		itemType := itemResult.Get("type").String()
 		if itemType == "function_call" {
 			(*param).(*ConvertCodexResponseToClaudeParams).HasToolCall = true
-			(*param).(*ConvertCodexResponseToClaudeParams).HasReceivedArgumentsDelta = false
 			template = `{"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`
 			template, _ = sjson.Set(template, "index", (*param).(*ConvertCodexResponseToClaudeParams).BlockIndex)
 			template, _ = sjson.Set(template, "content_block.id", itemResult.Get("call_id").String())
@@ -173,29 +171,12 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
 			output += fmt.Sprintf("data: %s\n\n", template)
 		}
 	} else if typeStr == "response.function_call_arguments.delta" {
-		(*param).(*ConvertCodexResponseToClaudeParams).HasReceivedArgumentsDelta = true
 		template = `{"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":""}}`
 		template, _ = sjson.Set(template, "index", (*param).(*ConvertCodexResponseToClaudeParams).BlockIndex)
 		template, _ = sjson.Set(template, "delta.partial_json", rootResult.Get("delta").String())
 
 		output += "event: content_block_delta\n"
 		output += fmt.Sprintf("data: %s\n\n", template)
-	} else if typeStr == "response.function_call_arguments.done" {
-		// Some models (e.g. gpt-5.3-codex-spark) send function call arguments
-		// in a single "done" event without preceding "delta" events.
-		// Emit the full arguments as a single input_json_delta so the
-		// downstream Claude client receives the complete tool input.
-		// When delta events were already received, skip to avoid duplicating arguments.
-		if !(*param).(*ConvertCodexResponseToClaudeParams).HasReceivedArgumentsDelta {
-			if args := rootResult.Get("arguments").String(); args != "" {
-				template = `{"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":""}}`
-				template, _ = sjson.Set(template, "index", (*param).(*ConvertCodexResponseToClaudeParams).BlockIndex)
-				template, _ = sjson.Set(template, "delta.partial_json", args)
-
-				output += "event: content_block_delta\n"
-				output += fmt.Sprintf("data: %s\n\n", template)
-			}
-		}
 	}
 
 	return []string{output}
diff --git a/internal/translator/codex/gemini-cli/codex_gemini-cli_request.go b/internal/translator/codex/gemini-cli/codex_gemini-cli_request.go
index 8b32453d26..db056a24d7 100644
--- a/internal/translator/codex/gemini-cli/codex_gemini-cli_request.go
+++ b/internal/translator/codex/gemini-cli/codex_gemini-cli_request.go
@@ -6,6 +6,8 @@
 package geminiCLI
 
 import (
+	"bytes"
+
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/codex/gemini"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -28,7 +30,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Codex API format
 func ConvertGeminiCLIRequestToCodex(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 
 	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
 	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelName)
diff --git a/internal/translator/codex/gemini/codex_gemini_request.go b/internal/translator/codex/gemini/codex_gemini_request.go
index 9f5d7b311c..bfea4c6de5 100644
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -6,6 +6,7 @@
 package gemini
 
 import (
+	"bytes"
 	"crypto/rand"
 	"fmt"
 	"math/big"
@@ -36,7 +37,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Codex API format
 func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	// Base template
 	out := `{"model":"","instructions":"","input":[]}`
 
@@ -242,30 +243,19 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	out, _ = sjson.Set(out, "parallel_tool_calls", true)
 
 	// Convert Gemini thinkingConfig to Codex reasoning.effort.
-	// Note: Google official Python SDK sends snake_case fields (thinking_level/thinking_budget).
 	effortSet := false
 	if genConfig := root.Get("generationConfig"); genConfig.Exists() {
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			thinkingLevel := thinkingConfig.Get("thinkingLevel")
-			if !thinkingLevel.Exists() {
-				thinkingLevel = thinkingConfig.Get("thinking_level")
-			}
-			if thinkingLevel.Exists() {
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
 				effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
 				if effort != "" {
 					out, _ = sjson.Set(out, "reasoning.effort", effort)
 					effortSet = true
 				}
-			} else {
-				thinkingBudget := thinkingConfig.Get("thinkingBudget")
-				if !thinkingBudget.Exists() {
-					thinkingBudget = thinkingConfig.Get("thinking_budget")
-				}
-				if thinkingBudget.Exists() {
-					if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
-						out, _ = sjson.Set(out, "reasoning.effort", effort)
-						effortSet = true
-					}
+			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
+					out, _ = sjson.Set(out, "reasoning.effort", effort)
+					effortSet = true
 				}
 			}
 		}
diff --git a/internal/translator/codex/openai/chat-completions/codex_openai_request.go b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
index e79f97cd3b..4cd234355d 100644
--- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go
+++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
@@ -7,6 +7,8 @@
 package chat_completions
 
 import (
+	"bytes"
+
 	"strconv"
 	"strings"
 
@@ -27,7 +29,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in OpenAI Responses API format
 func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	// Start with empty JSON object
 	out := `{"instructions":""}`
 
diff --git a/internal/translator/codex/openai/chat-completions/codex_openai_response.go b/internal/translator/codex/openai/chat-completions/codex_openai_response.go
index f0e264c8ce..6d86c247a8 100644
--- a/internal/translator/codex/openai/chat-completions/codex_openai_response.go
+++ b/internal/translator/codex/openai/chat-completions/codex_openai_response.go
@@ -20,12 +20,10 @@ var (
 
 // ConvertCliToOpenAIParams holds parameters for response conversion.
 type ConvertCliToOpenAIParams struct {
-	ResponseID                string
-	CreatedAt                 int64
-	Model                     string
-	FunctionCallIndex         int
-	HasReceivedArgumentsDelta bool
-	HasToolCallAnnounced      bool
+	ResponseID        string
+	CreatedAt         int64
+	Model             string
+	FunctionCallIndex int
 }
 
 // ConvertCodexResponseToOpenAI translates a single chunk of a streaming response from the
@@ -45,12 +43,10 @@ type ConvertCliToOpenAIParams struct {
 func ConvertCodexResponseToOpenAI(_ context.Context, modelName string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) []string {
 	if *param == nil {
 		*param = &ConvertCliToOpenAIParams{
-			Model:                     modelName,
-			CreatedAt:                 0,
-			ResponseID:                "",
-			FunctionCallIndex:         -1,
-			HasReceivedArgumentsDelta: false,
-			HasToolCallAnnounced:      false,
+			Model:             modelName,
+			CreatedAt:         0,
+			ResponseID:        "",
+			FunctionCallIndex: -1,
 		}
 	}
 
@@ -94,9 +90,6 @@ func ConvertCodexResponseToOpenAI(_ context.Context, modelName string, originalR
 		if inputTokensResult := usageResult.Get("input_tokens"); inputTokensResult.Exists() {
 			template, _ = sjson.Set(template, "usage.prompt_tokens", inputTokensResult.Int())
 		}
-		if cachedTokensResult := usageResult.Get("input_tokens_details.cached_tokens"); cachedTokensResult.Exists() {
-			template, _ = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokensResult.Int())
-		}
 		if reasoningTokensResult := usageResult.Get("output_tokens_details.reasoning_tokens"); reasoningTokensResult.Exists() {
 			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", reasoningTokensResult.Int())
 		}
@@ -122,92 +115,34 @@ func ConvertCodexResponseToOpenAI(_ context.Context, modelName string, originalR
 		}
 		template, _ = sjson.Set(template, "choices.0.finish_reason", finishReason)
 		template, _ = sjson.Set(template, "choices.0.native_finish_reason", finishReason)
-	} else if dataType == "response.output_item.added" {
-		itemResult := rootResult.Get("item")
-		if !itemResult.Exists() || itemResult.Get("type").String() != "function_call" {
-			return []string{}
-		}
-
-		// Increment index for this new function call item.
-		(*param).(*ConvertCliToOpenAIParams).FunctionCallIndex++
-		(*param).(*ConvertCliToOpenAIParams).HasReceivedArgumentsDelta = false
-		(*param).(*ConvertCliToOpenAIParams).HasToolCallAnnounced = true
-
-		functionCallItemTemplate := `{"index":0,"id":"","type":"function","function":{"name":"","arguments":""}}`
-		functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "index", (*param).(*ConvertCliToOpenAIParams).FunctionCallIndex)
-		functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", itemResult.Get("call_id").String())
-
-		// Restore original tool name if it was shortened.
-		name := itemResult.Get("name").String()
-		rev := buildReverseMapFromOriginalOpenAI(originalRequestRawJSON)
-		if orig, ok := rev[name]; ok {
-			name = orig
-		}
-		functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", name)
-		functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", "")
-
-		template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
-		template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", `[]`)
-		template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallItemTemplate)
-
-	} else if dataType == "response.function_call_arguments.delta" {
-		(*param).(*ConvertCliToOpenAIParams).HasReceivedArgumentsDelta = true
-
-		deltaValue := rootResult.Get("delta").String()
-		functionCallItemTemplate := `{"index":0,"function":{"arguments":""}}`
-		functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "index", (*param).(*ConvertCliToOpenAIParams).FunctionCallIndex)
-		functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", deltaValue)
-
-		template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", `[]`)
-		template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallItemTemplate)
-
-	} else if dataType == "response.function_call_arguments.done" {
-		if (*param).(*ConvertCliToOpenAIParams).HasReceivedArgumentsDelta {
-			// Arguments were already streamed via delta events; nothing to emit.
-			return []string{}
-		}
-
-		// Fallback: no delta events were received, emit the full arguments as a single chunk.
-		fullArgs := rootResult.Get("arguments").String()
-		functionCallItemTemplate := `{"index":0,"function":{"arguments":""}}`
-		functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "index", (*param).(*ConvertCliToOpenAIParams).FunctionCallIndex)
-		functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fullArgs)
-
-		template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", `[]`)
-		template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallItemTemplate)
-
 	} else if dataType == "response.output_item.done" {
+		functionCallItemTemplate := `{"index":0,"id":"","type":"function","function":{"name":"","arguments":""}}`
 		itemResult := rootResult.Get("item")
-		if !itemResult.Exists() || itemResult.Get("type").String() != "function_call" {
-			return []string{}
-		}
-
-		if (*param).(*ConvertCliToOpenAIParams).HasToolCallAnnounced {
-			// Tool call was already announced via output_item.added; skip emission.
-			(*param).(*ConvertCliToOpenAIParams).HasToolCallAnnounced = false
-			return []string{}
-		}
+		if itemResult.Exists() {
+			if itemResult.Get("type").String() != "function_call" {
+				return []string{}
+			}
 
-		// Fallback path: model skipped output_item.added, so emit complete tool call now.
-		(*param).(*ConvertCliToOpenAIParams).FunctionCallIndex++
+			// set the index
+			(*param).(*ConvertCliToOpenAIParams).FunctionCallIndex++
+			functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "index", (*param).(*ConvertCliToOpenAIParams).FunctionCallIndex)
 
-		functionCallItemTemplate := `{"index":0,"id":"","type":"function","function":{"name":"","arguments":""}}`
-		functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "index", (*param).(*ConvertCliToOpenAIParams).FunctionCallIndex)
+			template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", `[]`)
+			functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", itemResult.Get("call_id").String())
 
-		template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls", `[]`)
-		functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", itemResult.Get("call_id").String())
+			// Restore original tool name if it was shortened
+			name := itemResult.Get("name").String()
+			// Build reverse map on demand from original request tools
+			rev := buildReverseMapFromOriginalOpenAI(originalRequestRawJSON)
+			if orig, ok := rev[name]; ok {
+				name = orig
+			}
+			functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", name)
 
-		// Restore original tool name if it was shortened.
-		name := itemResult.Get("name").String()
-		rev := buildReverseMapFromOriginalOpenAI(originalRequestRawJSON)
-		if orig, ok := rev[name]; ok {
-			name = orig
+			functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", itemResult.Get("arguments").String())
+			template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
+			template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallItemTemplate)
 		}
-		functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", name)
-
-		functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", itemResult.Get("arguments").String())
-		template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
-		template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallItemTemplate)
 
 	} else {
 		return []string{}
@@ -270,9 +205,6 @@ func ConvertCodexResponseToOpenAINonStream(_ context.Context, _ string, original
 		if inputTokensResult := usageResult.Get("input_tokens"); inputTokensResult.Exists() {
 			template, _ = sjson.Set(template, "usage.prompt_tokens", inputTokensResult.Int())
 		}
-		if cachedTokensResult := usageResult.Get("input_tokens_details.cached_tokens"); cachedTokensResult.Exists() {
-			template, _ = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokensResult.Int())
-		}
 		if reasoningTokensResult := usageResult.Get("output_tokens_details.reasoning_tokens"); reasoningTokensResult.Exists() {
 			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", reasoningTokensResult.Int())
 		}
diff --git a/internal/translator/codex/openai/responses/codex_openai-responses_request.go b/internal/translator/codex/openai/responses/codex_openai-responses_request.go
index f0407149e0..389c6d3131 100644
--- a/internal/translator/codex/openai/responses/codex_openai-responses_request.go
+++ b/internal/translator/codex/openai/responses/codex_openai-responses_request.go
@@ -1,6 +1,7 @@
 package responses
 
 import (
+	"bytes"
 	"fmt"
 
 	"github.com/tidwall/gjson"
@@ -8,13 +9,7 @@ import (
 )
 
 func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := inputRawJSON
-
-	inputResult := gjson.GetBytes(rawJSON, "input")
-	if inputResult.Type == gjson.String {
-		input, _ := sjson.Set(`[{"type":"message","role":"user","content":[{"type":"input_text","text":""}]}]`, "0.content.0.text", inputResult.String())
-		rawJSON, _ = sjson.SetRawBytes(rawJSON, "input", []byte(input))
-	}
+	rawJSON := bytes.Clone(inputRawJSON)
 
 	rawJSON, _ = sjson.SetBytes(rawJSON, "stream", true)
 	rawJSON, _ = sjson.SetBytes(rawJSON, "store", false)
@@ -27,9 +22,6 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
 	rawJSON, _ = sjson.DeleteBytes(rawJSON, "top_p")
 	rawJSON, _ = sjson.DeleteBytes(rawJSON, "service_tier")
 
-	// Delete the user field as it is not supported by the Codex upstream.
-	rawJSON, _ = sjson.DeleteBytes(rawJSON, "user")
-
 	// Convert role "system" to "developer" in input array to comply with Codex API requirements.
 	rawJSON = convertSystemRoleToDeveloper(rawJSON)
 
diff --git a/internal/translator/codex/openai/responses/codex_openai-responses_request_test.go b/internal/translator/codex/openai/responses/codex_openai-responses_request_test.go
index 4f5624869f..ea41323860 100644
--- a/internal/translator/codex/openai/responses/codex_openai-responses_request_test.go
+++ b/internal/translator/codex/openai/responses/codex_openai-responses_request_test.go
@@ -263,20 +263,3 @@ func TestConvertSystemRoleToDeveloper_AssistantRole(t *testing.T) {
 		t.Errorf("Expected third role 'assistant', got '%s'", thirdRole.String())
 	}
 }
-
-func TestUserFieldDeletion(t *testing.T) {  
-	inputJSON := []byte(`{  
-		"model": "gpt-5.2",  
-		"user": "test-user",  
-		"input": [{"role": "user", "content": "Hello"}]  
-	}`)  
-	  
-	output := ConvertOpenAIResponsesRequestToCodex("gpt-5.2", inputJSON, false)  
-	outputStr := string(output)  
-	  
-	// Verify user field is deleted  
-	userField := gjson.Get(outputStr, "user")  
-	if userField.Exists() {
-		t.Errorf("user field should be deleted, but it was found with value: %s", userField.Raw)
-	}
-}
diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
index ee66138140..f4a51e8b67 100644
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -35,7 +35,7 @@ const geminiCLIClaudeThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	rawJSON = bytes.Replace(rawJSON, []byte(`"url":{"type":"string","format":"uri",`), []byte(`"url":{"type":"string",`), -1)
 
 	// Build output Gemini CLI request JSON
@@ -116,19 +116,6 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
 						part, _ = sjson.Set(part, "functionResponse.name", funcName)
 						part, _ = sjson.Set(part, "functionResponse.response.result", responseData)
 						contentJSON, _ = sjson.SetRaw(contentJSON, "parts.-1", part)
-
-					case "image":
-						source := contentResult.Get("source")
-						if source.Get("type").String() == "base64" {
-							mimeType := source.Get("media_type").String()
-							data := source.Get("data").String()
-							if mimeType != "" && data != "" {
-								part := `{"inlineData":{"mime_type":"","data":""}}`
-								part, _ = sjson.Set(part, "inlineData.mime_type", mimeType)
-								part, _ = sjson.Set(part, "inlineData.data", data)
-								contentJSON, _ = sjson.SetRaw(contentJSON, "parts.-1", part)
-							}
-						}
 					}
 					return true
 				})
@@ -173,18 +160,12 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
 
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
 	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
-		switch t.Get("type").String() {
-		case "enabled":
+		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 			}
-		case "adaptive":
-			// Keep adaptive as a high level sentinel; ApplyThinking resolves it
-			// to model-specific max capability.
-			out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingLevel", "high")
-			out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
 		}
 	}
 	if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
diff --git a/internal/translator/gemini-cli/gemini/gemini-cli_gemini_request.go b/internal/translator/gemini-cli/gemini/gemini-cli_gemini_request.go
index 15ff8b983a..ac6227fe62 100644
--- a/internal/translator/gemini-cli/gemini/gemini-cli_gemini_request.go
+++ b/internal/translator/gemini-cli/gemini/gemini-cli_gemini_request.go
@@ -6,6 +6,7 @@
 package gemini
 
 import (
+	"bytes"
 	"fmt"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
@@ -32,7 +33,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in Gemini API format
 func ConvertGeminiRequestToGeminiCLI(_ string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	template := ""
 	template = `{"project":"","request":{},"model":""}`
 	template, _ = sjson.SetRaw(template, "request", string(rawJSON))
diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
index 53da71f4e5..2351130f0a 100644
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -3,6 +3,7 @@
 package chat_completions
 
 import (
+	"bytes"
 	"fmt"
 	"strings"
 
@@ -27,7 +28,7 @@ const geminiCLIFunctionThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request data in Gemini CLI API format
 func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	// Base envelope (no default thinkingConfig)
 	out := []byte(`{"project":"","request":{"contents":[]},"model":"gemini-2.5-pro"}`)
 
diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
index 0415e01493..5a1faf510d 100644
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
@@ -14,7 +14,6 @@ import (
 	"time"
 
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
-	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -78,20 +77,14 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
 		template, _ = sjson.Set(template, "id", responseIDResult.String())
 	}
 
-	finishReason := ""
-	if stopReasonResult := gjson.GetBytes(rawJSON, "response.stop_reason"); stopReasonResult.Exists() {
-		finishReason = stopReasonResult.String()
+	// Extract and set the finish reason.
+	if finishReasonResult := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason"); finishReasonResult.Exists() {
+		template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
+		template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
 	}
-	if finishReason == "" {
-		if finishReasonResult := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason"); finishReasonResult.Exists() {
-			finishReason = finishReasonResult.String()
-		}
-	}
-	finishReason = strings.ToLower(finishReason)
 
 	// Extract and set usage metadata (token counts).
 	if usageResult := gjson.GetBytes(rawJSON, "response.usageMetadata"); usageResult.Exists() {
-		cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
 		if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
 			template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
 		}
@@ -104,14 +97,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
 		if thoughtsTokenCount > 0 {
 			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
 		}
-		// Include cached token count if present (indicates prompt caching is working)
-		if cachedTokenCount > 0 {
-			var err error
-			template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
-			if err != nil {
-				log.Warnf("gemini-cli openai response: failed to set cached_tokens: %v", err)
-			}
-		}
 	}
 
 	// Process the main content part of the response.
@@ -202,12 +187,6 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
 	if hasFunctionCall {
 		template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
 		template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
-	} else if finishReason != "" && (*param).(*convertCliResponseToOpenAIChatParams).FunctionIndex == 0 {
-		// Only pass through specific finish reasons
-		if finishReason == "max_tokens" || finishReason == "stop" {
-			template, _ = sjson.Set(template, "choices.0.finish_reason", finishReason)
-			template, _ = sjson.Set(template, "choices.0.native_finish_reason", finishReason)
-		}
 	}
 
 	return []string{template}
diff --git a/internal/translator/gemini-cli/openai/responses/gemini-cli_openai-responses_request.go b/internal/translator/gemini-cli/openai/responses/gemini-cli_openai-responses_request.go
index 657e45fdb2..b70e3d839a 100644
--- a/internal/translator/gemini-cli/openai/responses/gemini-cli_openai-responses_request.go
+++ b/internal/translator/gemini-cli/openai/responses/gemini-cli_openai-responses_request.go
@@ -1,12 +1,14 @@
 package responses
 
 import (
+	"bytes"
+
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini-cli/gemini"
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/responses"
 )
 
 func ConvertOpenAIResponsesRequestToGeminiCLI(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	rawJSON = ConvertOpenAIResponsesRequestToGemini(modelName, rawJSON, stream)
 	return ConvertGeminiRequestToGeminiCLI(modelName, rawJSON, stream)
 }
diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go
index e882f769a8..0d5361a52f 100644
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -28,7 +28,7 @@ const geminiClaudeThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request in Gemini CLI format.
 func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	rawJSON = bytes.Replace(rawJSON, []byte(`"url":{"type":"string","format":"uri",`), []byte(`"url":{"type":"string",`), -1)
 
 	// Build output Gemini CLI request JSON
@@ -154,18 +154,12 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
 	// Translator only does format conversion, ApplyThinking handles model capability validation.
 	if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
-		switch t.Get("type").String() {
-		case "enabled":
+		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true)
 			}
-		case "adaptive":
-			// Keep adaptive as a high level sentinel; ApplyThinking resolves it
-			// to model-specific max capability.
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingLevel", "high")
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true)
 		}
 	}
 	if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
diff --git a/internal/translator/gemini/gemini-cli/gemini_gemini-cli_request.go b/internal/translator/gemini/gemini-cli/gemini_gemini-cli_request.go
index 1b2cdb4636..3b70bd3e15 100644
--- a/internal/translator/gemini/gemini-cli/gemini_gemini-cli_request.go
+++ b/internal/translator/gemini/gemini-cli/gemini_gemini-cli_request.go
@@ -6,6 +6,7 @@
 package geminiCLI
 
 import (
+	"bytes"
 	"fmt"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
@@ -18,7 +19,7 @@ import (
 // It extracts the model name, system instruction, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the internal client.
 func ConvertGeminiCLIRequestToGemini(_ string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	modelResult := gjson.GetBytes(rawJSON, "model")
 	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
 	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelResult.String())
diff --git a/internal/translator/gemini/gemini/gemini_gemini_request.go b/internal/translator/gemini/gemini/gemini_gemini_request.go
index 8024e9e329..2388aaf8da 100644
--- a/internal/translator/gemini/gemini/gemini_gemini_request.go
+++ b/internal/translator/gemini/gemini/gemini_gemini_request.go
@@ -4,6 +4,7 @@
 package gemini
 
 import (
+	"bytes"
 	"fmt"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
@@ -18,7 +19,7 @@ import (
 //
 // It keeps the payload otherwise unchanged.
 func ConvertGeminiRequestToGemini(_ string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	// Fast path: if no contents field, only attach safety settings
 	contents := gjson.GetBytes(rawJSON, "contents")
 	if !contents.Exists() {
diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
index 5de3568198..a7c20852b2 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -3,6 +3,7 @@
 package chat_completions
 
 import (
+	"bytes"
 	"fmt"
 	"strings"
 
@@ -27,7 +28,7 @@ const geminiFunctionThoughtSignature = "skip_thought_signature_validator"
 // Returns:
 //   - []byte: The transformed request data in Gemini API format
 func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	// Base envelope (no default thinkingConfig)
 	out := []byte(`{"contents":[]}`)
 
diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
index ee581c46e0..9cce35f975 100644
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
@@ -129,16 +129,11 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 			candidateIndex := int(candidate.Get("index").Int())
 			template, _ = sjson.Set(template, "choices.0.index", candidateIndex)
 
-			finishReason := ""
-			if stopReasonResult := gjson.GetBytes(rawJSON, "stop_reason"); stopReasonResult.Exists() {
-				finishReason = stopReasonResult.String()
-			}
-			if finishReason == "" {
-				if finishReasonResult := gjson.GetBytes(rawJSON, "candidates.0.finishReason"); finishReasonResult.Exists() {
-					finishReason = finishReasonResult.String()
-				}
+			// Extract and set the finish reason.
+			if finishReasonResult := candidate.Get("finishReason"); finishReasonResult.Exists() {
+				template, _ = sjson.Set(template, "choices.0.finish_reason", strings.ToLower(finishReasonResult.String()))
+				template, _ = sjson.Set(template, "choices.0.native_finish_reason", strings.ToLower(finishReasonResult.String()))
 			}
-			finishReason = strings.ToLower(finishReason)
 
 			partsResult := candidate.Get("content.parts")
 			hasFunctionCall := false
@@ -230,12 +225,6 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 			if hasFunctionCall {
 				template, _ = sjson.Set(template, "choices.0.finish_reason", "tool_calls")
 				template, _ = sjson.Set(template, "choices.0.native_finish_reason", "tool_calls")
-			} else if finishReason != "" {
-				// Only pass through specific finish reasons
-				if finishReason == "max_tokens" || finishReason == "stop" {
-					template, _ = sjson.Set(template, "choices.0.finish_reason", finishReason)
-					template, _ = sjson.Set(template, "choices.0.native_finish_reason", finishReason)
-				}
 			}
 
 			responseStrings = append(responseStrings, template)
diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
index aca0171781..5277b71b2e 100644
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -1,6 +1,7 @@
 package responses
 
 import (
+	"bytes"
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
@@ -11,7 +12,7 @@ import (
 const geminiResponsesThoughtSignature = "skip_thought_signature_validator"
 
 func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 
 	// Note: modelName and stream parameters are part of the fixed method signature
 	_ = modelName // Unused but required by interface
@@ -117,29 +118,19 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 			switch itemType {
 			case "message":
 				if strings.EqualFold(itemRole, "system") {
-					if contentArray := item.Get("content"); contentArray.Exists() {
-						systemInstr := ""
-						if systemInstructionResult := gjson.Get(out, "system_instruction"); systemInstructionResult.Exists() {
-							systemInstr = systemInstructionResult.Raw
-						} else {
-							systemInstr = `{"parts":[]}`
-						}
-
-						if contentArray.IsArray() {
-							contentArray.ForEach(func(_, contentItem gjson.Result) bool {
-								part := `{"text":""}`
-								text := contentItem.Get("text").String()
-								part, _ = sjson.Set(part, "text", text)
-								systemInstr, _ = sjson.SetRaw(systemInstr, "parts.-1", part)
-								return true
-							})
-						} else if contentArray.Type == gjson.String {
-							part := `{"text":""}`
-							part, _ = sjson.Set(part, "text", contentArray.String())
-							systemInstr, _ = sjson.SetRaw(systemInstr, "parts.-1", part)
-						}
-
-						if systemInstr != `{"parts":[]}` {
+					if contentArray := item.Get("content"); contentArray.Exists() && contentArray.IsArray() {
+						var builder strings.Builder
+						contentArray.ForEach(func(_, contentItem gjson.Result) bool {
+							text := contentItem.Get("text").String()
+							if builder.Len() > 0 && text != "" {
+								builder.WriteByte('\n')
+							}
+							builder.WriteString(text)
+							return true
+						})
+						if !gjson.Get(out, "system_instruction").Exists() {
+							systemInstr := `{"parts":[{"text":""}]}`
+							systemInstr, _ = sjson.Set(systemInstr, "parts.0.text", builder.String())
 							out, _ = sjson.SetRaw(out, "system_instruction", systemInstr)
 						}
 					}
@@ -246,22 +237,8 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 					})
 
 					flush()
-				} else if contentArray.Type == gjson.String {
-					effRole := "user"
-					if itemRole != "" {
-						switch strings.ToLower(itemRole) {
-						case "assistant", "model":
-							effRole = "model"
-						default:
-							effRole = strings.ToLower(itemRole)
-						}
-					}
-
-					one := `{"role":"","parts":[{"text":""}]}`
-					one, _ = sjson.Set(one, "role", effRole)
-					one, _ = sjson.Set(one, "parts.0.text", contentArray.String())
-					out, _ = sjson.SetRaw(out, "contents.-1", one)
 				}
+
 			case "function_call":
 				// Handle function calls - convert to model message with functionCall
 				name := item.Get("name").String()
diff --git a/internal/translator/init.go b/internal/translator/init.go
index 0754db03b4..084ea7ac23 100644
--- a/internal/translator/init.go
+++ b/internal/translator/init.go
@@ -33,7 +33,4 @@ import (
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/antigravity/gemini"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/antigravity/openai/chat-completions"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/antigravity/openai/responses"
-
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/claude"
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/openai"
 )
diff --git a/internal/translator/openai/claude/openai_claude_request.go b/internal/translator/openai/claude/openai_claude_request.go
index acb79a1396..8fac14ecf6 100644
--- a/internal/translator/openai/claude/openai_claude_request.go
+++ b/internal/translator/openai/claude/openai_claude_request.go
@@ -6,6 +6,7 @@
 package claude
 
 import (
+	"bytes"
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
@@ -17,7 +18,7 @@ import (
 // It extracts the model name, system instruction, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the OpenAI API.
 func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	// Base OpenAI Chat Completions API template
 	out := `{"model":"","messages":[]}`
 
@@ -60,10 +61,13 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 	out, _ = sjson.Set(out, "stream", stream)
 
 	// Thinking: Convert Claude thinking.budget_tokens to OpenAI reasoning_effort
+	// Also track if thinking is enabled to ensure reasoning_content is added for tool_calls
+	thinkingEnabled := false
 	if thinkingConfig := root.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
 		if thinkingType := thinkingConfig.Get("type"); thinkingType.Exists() {
 			switch thinkingType.String() {
 			case "enabled":
+				thinkingEnabled = true
 				if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() {
 					budget := int(budgetTokens.Int())
 					if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" {
@@ -75,10 +79,6 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 						out, _ = sjson.Set(out, "reasoning_effort", effort)
 					}
 				}
-			case "adaptive":
-				// Claude adaptive means "enable with max capacity"; keep it as highest level
-				// and let ApplyThinking normalize per target model capability.
-				out, _ = sjson.Set(out, "reasoning_effort", string(thinking.LevelXHigh))
 			case "disabled":
 				if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" {
 					out, _ = sjson.Set(out, "reasoning_effort", effort)
@@ -220,6 +220,10 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 						// Add reasoning_content if present
 						if hasReasoning {
 							msgJSON, _ = sjson.Set(msgJSON, "reasoning_content", reasoningContent)
+						} else if thinkingEnabled && hasToolCalls {
+							// Claude API requires reasoning_content in assistant messages with tool_calls
+							// when thinking mode is enabled, even if empty
+							msgJSON, _ = sjson.Set(msgJSON, "reasoning_content", "")
 						}
 
 						// Add tool_calls if present (in same message as content)
diff --git a/internal/translator/openai/claude/openai_claude_request_test.go b/internal/translator/openai/claude/openai_claude_request_test.go
index d08de1b25c..3e7fe8fd07 100644
--- a/internal/translator/openai/claude/openai_claude_request_test.go
+++ b/internal/translator/openai/claude/openai_claude_request_test.go
@@ -588,3 +588,124 @@ func TestConvertClaudeRequestToOpenAI_AssistantThinkingToolUseThinkingSplit(t *t
 		t.Fatalf("Expected reasoning_content %q, got %q", "t1\n\nt2", got)
 	}
 }
+
+// TestConvertClaudeRequestToOpenAI_ThinkingEnabledToolCallsNoReasoning tests that
+// when thinking mode is enabled and assistant message has tool_calls but no thinking content,
+// an empty reasoning_content is added to satisfy Claude API requirements.
+func TestConvertClaudeRequestToOpenAI_ThinkingEnabledToolCallsNoReasoning(t *testing.T) {
+	tests := []struct {
+		name                    string
+		inputJSON               string
+		wantHasReasoningContent bool
+		wantReasoningContent    string
+	}{
+		{
+			name: "thinking enabled with tool_calls but no thinking content adds empty reasoning_content",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"thinking": {"type": "enabled", "budget_tokens": 4000},
+				"messages": [{
+					"role": "assistant",
+					"content": [
+						{"type": "text", "text": "I will help you."},
+						{"type": "tool_use", "id": "tool_1", "name": "read_file", "input": {"path": "/test.txt"}}
+					]
+				}]
+			}`,
+			wantHasReasoningContent: true,
+			wantReasoningContent:    "",
+		},
+		{
+			name: "thinking enabled with tool_calls and thinking content uses actual reasoning",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"thinking": {"type": "enabled", "budget_tokens": 4000},
+				"messages": [{
+					"role": "assistant",
+					"content": [
+						{"type": "thinking", "thinking": "Let me analyze this..."},
+						{"type": "text", "text": "I will help you."},
+						{"type": "tool_use", "id": "tool_1", "name": "read_file", "input": {"path": "/test.txt"}}
+					]
+				}]
+			}`,
+			wantHasReasoningContent: true,
+			wantReasoningContent:    "Let me analyze this...",
+		},
+		{
+			name: "thinking disabled with tool_calls does not add reasoning_content",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"thinking": {"type": "disabled"},
+				"messages": [{
+					"role": "assistant",
+					"content": [
+						{"type": "text", "text": "I will help you."},
+						{"type": "tool_use", "id": "tool_1", "name": "read_file", "input": {"path": "/test.txt"}}
+					]
+				}]
+			}`,
+			wantHasReasoningContent: false,
+			wantReasoningContent:    "",
+		},
+		{
+			name: "no thinking config with tool_calls does not add reasoning_content",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"messages": [{
+					"role": "assistant",
+					"content": [
+						{"type": "text", "text": "I will help you."},
+						{"type": "tool_use", "id": "tool_1", "name": "read_file", "input": {"path": "/test.txt"}}
+					]
+				}]
+			}`,
+			wantHasReasoningContent: false,
+			wantReasoningContent:    "",
+		},
+		{
+			name: "thinking enabled without tool_calls and no thinking content does not add reasoning_content",
+			inputJSON: `{
+				"model": "claude-3-opus",
+				"thinking": {"type": "enabled", "budget_tokens": 4000},
+				"messages": [{
+					"role": "assistant",
+					"content": [
+						{"type": "text", "text": "Simple response without tools."}
+					]
+				}]
+			}`,
+			wantHasReasoningContent: false,
+			wantReasoningContent:    "",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ConvertClaudeRequestToOpenAI("test-model", []byte(tt.inputJSON), false)
+			resultJSON := gjson.ParseBytes(result)
+
+			messages := resultJSON.Get("messages").Array()
+			if len(messages) == 0 {
+				t.Fatal("Expected at least one message")
+			}
+
+			assistantMsg := messages[0]
+			if assistantMsg.Get("role").String() != "assistant" {
+				t.Fatalf("Expected assistant message, got %s", assistantMsg.Get("role").String())
+			}
+
+			hasReasoningContent := assistantMsg.Get("reasoning_content").Exists()
+			if hasReasoningContent != tt.wantHasReasoningContent {
+				t.Errorf("reasoning_content existence = %v, want %v", hasReasoningContent, tt.wantHasReasoningContent)
+			}
+
+			if hasReasoningContent {
+				gotReasoningContent := assistantMsg.Get("reasoning_content").String()
+				if gotReasoningContent != tt.wantReasoningContent {
+					t.Errorf("reasoning_content = %q, want %q", gotReasoningContent, tt.wantReasoningContent)
+				}
+			}
+		})
+	}
+}
diff --git a/internal/translator/openai/claude/openai_claude_response.go b/internal/translator/openai/claude/openai_claude_response.go
index 8ddf3084ae..ca20c84849 100644
--- a/internal/translator/openai/claude/openai_claude_response.go
+++ b/internal/translator/openai/claude/openai_claude_response.go
@@ -8,7 +8,6 @@ package claude
 import (
 	"bytes"
 	"context"
-	"encoding/json"
 	"fmt"
 	"strings"
 
@@ -128,40 +127,16 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 		param.CreatedAt = root.Get("created").Int()
 	}
 
-	// Helper to ensure message_start is sent before any content_block_start
-	// This is required by the Anthropic SSE protocol - message_start must come first.
-	// Some OpenAI-compatible providers (like GitHub Copilot) may not send role: "assistant"
-	// in the first chunk, so we need to emit message_start when we first see content.
-	ensureMessageStarted := func() {
-		if param.MessageStarted {
-			return
-		}
-		messageStart := map[string]interface{}{
-			"type": "message_start",
-			"message": map[string]interface{}{
-				"id":            param.MessageID,
-				"type":          "message",
-				"role":          "assistant",
-				"model":         param.Model,
-				"content":       []interface{}{},
-				"stop_reason":   nil,
-				"stop_sequence": nil,
-				"usage": map[string]interface{}{
-					"input_tokens":  0,
-					"output_tokens": 0,
-				},
-			},
-		}
-		messageStartJSON, _ := json.Marshal(messageStart)
-		results = append(results, "event: message_start\ndata: "+string(messageStartJSON)+"\n\n")
-		param.MessageStarted = true
-	}
-
-	// Check if this is the first chunk (has role)
+	// Emit message_start on the very first chunk, regardless of whether it has a role field.
+	// Some providers (like Copilot) may send tool_calls in the first chunk without a role field.
 	if delta := root.Get("choices.0.delta"); delta.Exists() {
-		if role := delta.Get("role"); role.Exists() && role.String() == "assistant" && !param.MessageStarted {
+		if !param.MessageStarted {
 			// Send message_start event
-			ensureMessageStarted()
+			messageStartJSON := `{"type":"message_start","message":{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}}`
+			messageStartJSON, _ = sjson.Set(messageStartJSON, "message.id", param.MessageID)
+			messageStartJSON, _ = sjson.Set(messageStartJSON, "message.model", param.Model)
+			results = append(results, "event: message_start\ndata: "+messageStartJSON+"\n\n")
+			param.MessageStarted = true
 
 			// Don't send content_block_start for text here - wait for actual content
 		}
@@ -174,34 +149,20 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 				}
 				stopTextContentBlock(param, &results)
 				if !param.ThinkingContentBlockStarted {
-					ensureMessageStarted() // Must send message_start before content_block_start
 					if param.ThinkingContentBlockIndex == -1 {
 						param.ThinkingContentBlockIndex = param.NextContentBlockIndex
 						param.NextContentBlockIndex++
 					}
-					contentBlockStart := map[string]interface{}{
-						"type":  "content_block_start",
-						"index": param.ThinkingContentBlockIndex,
-						"content_block": map[string]interface{}{
-							"type":     "thinking",
-							"thinking": "",
-						},
-					}
-					contentBlockStartJSON, _ := json.Marshal(contentBlockStart)
-					results = append(results, "event: content_block_start\ndata: "+string(contentBlockStartJSON)+"\n\n")
+					contentBlockStartJSON := `{"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}`
+					contentBlockStartJSON, _ = sjson.Set(contentBlockStartJSON, "index", param.ThinkingContentBlockIndex)
+					results = append(results, "event: content_block_start\ndata: "+contentBlockStartJSON+"\n\n")
 					param.ThinkingContentBlockStarted = true
 				}
 
-				thinkingDelta := map[string]interface{}{
-					"type":  "content_block_delta",
-					"index": param.ThinkingContentBlockIndex,
-					"delta": map[string]interface{}{
-						"type":     "thinking_delta",
-						"thinking": reasoningText,
-					},
-				}
-				thinkingDeltaJSON, _ := json.Marshal(thinkingDelta)
-				results = append(results, "event: content_block_delta\ndata: "+string(thinkingDeltaJSON)+"\n\n")
+				thinkingDeltaJSON := `{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":""}}`
+				thinkingDeltaJSON, _ = sjson.Set(thinkingDeltaJSON, "index", param.ThinkingContentBlockIndex)
+				thinkingDeltaJSON, _ = sjson.Set(thinkingDeltaJSON, "delta.thinking", reasoningText)
+				results = append(results, "event: content_block_delta\ndata: "+thinkingDeltaJSON+"\n\n")
 			}
 		}
 
@@ -209,35 +170,21 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 		if content := delta.Get("content"); content.Exists() && content.String() != "" {
 			// Send content_block_start for text if not already sent
 			if !param.TextContentBlockStarted {
-				ensureMessageStarted() // Must send message_start before content_block_start
 				stopThinkingContentBlock(param, &results)
 				if param.TextContentBlockIndex == -1 {
 					param.TextContentBlockIndex = param.NextContentBlockIndex
 					param.NextContentBlockIndex++
 				}
-				contentBlockStart := map[string]interface{}{
-					"type":  "content_block_start",
-					"index": param.TextContentBlockIndex,
-					"content_block": map[string]interface{}{
-						"type": "text",
-						"text": "",
-					},
-				}
-				contentBlockStartJSON, _ := json.Marshal(contentBlockStart)
-				results = append(results, "event: content_block_start\ndata: "+string(contentBlockStartJSON)+"\n\n")
+				contentBlockStartJSON := `{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`
+				contentBlockStartJSON, _ = sjson.Set(contentBlockStartJSON, "index", param.TextContentBlockIndex)
+				results = append(results, "event: content_block_start\ndata: "+contentBlockStartJSON+"\n\n")
 				param.TextContentBlockStarted = true
 			}
 
-			contentDelta := map[string]interface{}{
-				"type":  "content_block_delta",
-				"index": param.TextContentBlockIndex,
-				"delta": map[string]interface{}{
-					"type": "text_delta",
-					"text": content.String(),
-				},
-			}
-			contentDeltaJSON, _ := json.Marshal(contentDelta)
-			results = append(results, "event: content_block_delta\ndata: "+string(contentDeltaJSON)+"\n\n")
+			contentDeltaJSON := `{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":""}}`
+			contentDeltaJSON, _ = sjson.Set(contentDeltaJSON, "index", param.TextContentBlockIndex)
+			contentDeltaJSON, _ = sjson.Set(contentDeltaJSON, "delta.text", content.String())
+			results = append(results, "event: content_block_delta\ndata: "+contentDeltaJSON+"\n\n")
 
 			// Accumulate content
 			param.ContentAccumulator.WriteString(content.String())
@@ -270,25 +217,16 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 					if name := function.Get("name"); name.Exists() {
 						accumulator.Name = name.String()
 
-						ensureMessageStarted() // Must send message_start before content_block_start
-
 						stopThinkingContentBlock(param, &results)
 
 						stopTextContentBlock(param, &results)
 
 						// Send content_block_start for tool_use
-						contentBlockStart := map[string]interface{}{
-							"type":  "content_block_start",
-							"index": blockIndex,
-							"content_block": map[string]interface{}{
-								"type":  "tool_use",
-								"id":    accumulator.ID,
-								"name":  accumulator.Name,
-								"input": map[string]interface{}{},
-							},
-						}
-						contentBlockStartJSON, _ := json.Marshal(contentBlockStart)
-						results = append(results, "event: content_block_start\ndata: "+string(contentBlockStartJSON)+"\n\n")
+						contentBlockStartJSON := `{"type":"content_block_start","index":0,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`
+						contentBlockStartJSON, _ = sjson.Set(contentBlockStartJSON, "index", blockIndex)
+						contentBlockStartJSON, _ = sjson.Set(contentBlockStartJSON, "content_block.id", accumulator.ID)
+						contentBlockStartJSON, _ = sjson.Set(contentBlockStartJSON, "content_block.name", accumulator.Name)
+						results = append(results, "event: content_block_start\ndata: "+contentBlockStartJSON+"\n\n")
 					}
 
 					// Handle function arguments
@@ -312,12 +250,9 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 
 		// Send content_block_stop for thinking content if needed
 		if param.ThinkingContentBlockStarted {
-			contentBlockStop := map[string]interface{}{
-				"type":  "content_block_stop",
-				"index": param.ThinkingContentBlockIndex,
-			}
-			contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
-			results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
+			contentBlockStopJSON := `{"type":"content_block_stop","index":0}`
+			contentBlockStopJSON, _ = sjson.Set(contentBlockStopJSON, "index", param.ThinkingContentBlockIndex)
+			results = append(results, "event: content_block_stop\ndata: "+contentBlockStopJSON+"\n\n")
 			param.ThinkingContentBlockStarted = false
 			param.ThinkingContentBlockIndex = -1
 		}
@@ -333,24 +268,15 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 
 				// Send complete input_json_delta with all accumulated arguments
 				if accumulator.Arguments.Len() > 0 {
-					inputDelta := map[string]interface{}{
-						"type":  "content_block_delta",
-						"index": blockIndex,
-						"delta": map[string]interface{}{
-							"type":         "input_json_delta",
-							"partial_json": util.FixJSON(accumulator.Arguments.String()),
-						},
-					}
-					inputDeltaJSON, _ := json.Marshal(inputDelta)
-					results = append(results, "event: content_block_delta\ndata: "+string(inputDeltaJSON)+"\n\n")
+					inputDeltaJSON := `{"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":""}}`
+					inputDeltaJSON, _ = sjson.Set(inputDeltaJSON, "index", blockIndex)
+					inputDeltaJSON, _ = sjson.Set(inputDeltaJSON, "delta.partial_json", util.FixJSON(accumulator.Arguments.String()))
+					results = append(results, "event: content_block_delta\ndata: "+inputDeltaJSON+"\n\n")
 				}
 
-				contentBlockStop := map[string]interface{}{
-					"type":  "content_block_stop",
-					"index": blockIndex,
-				}
-				contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
-				results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
+				contentBlockStopJSON := `{"type":"content_block_stop","index":0}`
+				contentBlockStopJSON, _ = sjson.Set(contentBlockStopJSON, "index", blockIndex)
+				results = append(results, "event: content_block_stop\ndata: "+contentBlockStopJSON+"\n\n")
 				delete(param.ToolCallBlockIndexes, index)
 			}
 			param.ContentBlocksStopped = true
@@ -363,36 +289,22 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 	// Only process if usage has actual values (not null)
 	if param.FinishReason != "" {
 		usage := root.Get("usage")
-		var inputTokens, outputTokens int64
+		var inputTokens, outputTokens, cachedTokens int64
 		if usage.Exists() && usage.Type != gjson.Null {
-			// Check if usage has actual token counts
-			promptTokens := usage.Get("prompt_tokens")
-			completionTokens := usage.Get("completion_tokens")
-
-			if promptTokens.Exists() && completionTokens.Exists() {
-				inputTokens = promptTokens.Int()
-				outputTokens = completionTokens.Int()
+			inputTokens, outputTokens, cachedTokens = extractOpenAIUsage(usage)
+			// Send message_delta with usage
+			messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+			messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason))
+			messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.input_tokens", inputTokens)
+			messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.output_tokens", outputTokens)
+			if cachedTokens > 0 {
+				messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.cache_read_input_tokens", cachedTokens)
 			}
-		}
-		// Send message_delta with usage
-		messageDelta := map[string]interface{}{
-			"type": "message_delta",
-			"delta": map[string]interface{}{
-				"stop_reason":   mapOpenAIFinishReasonToAnthropic(param.FinishReason),
-				"stop_sequence": nil,
-			},
-			"usage": map[string]interface{}{
-				"input_tokens":  inputTokens,
-				"output_tokens": outputTokens,
-			},
-		}
-
-		messageDeltaJSON, _ := json.Marshal(messageDelta)
-		results = append(results, "event: message_delta\ndata: "+string(messageDeltaJSON)+"\n\n")
-		param.MessageDeltaSent = true
-
-		emitMessageStopIfNeeded(param, &results)
+			results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n")
+			param.MessageDeltaSent = true
 
+			emitMessageStopIfNeeded(param, &results)
+		}
 	}
 
 	return results
@@ -404,12 +316,9 @@ func convertOpenAIDoneToAnthropic(param *ConvertOpenAIResponseToAnthropicParams)
 
 	// Ensure all content blocks are stopped before final events
 	if param.ThinkingContentBlockStarted {
-		contentBlockStop := map[string]interface{}{
-			"type":  "content_block_stop",
-			"index": param.ThinkingContentBlockIndex,
-		}
-		contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
-		results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
+		contentBlockStopJSON := `{"type":"content_block_stop","index":0}`
+		contentBlockStopJSON, _ = sjson.Set(contentBlockStopJSON, "index", param.ThinkingContentBlockIndex)
+		results = append(results, "event: content_block_stop\ndata: "+contentBlockStopJSON+"\n\n")
 		param.ThinkingContentBlockStarted = false
 		param.ThinkingContentBlockIndex = -1
 	}
@@ -422,24 +331,15 @@ func convertOpenAIDoneToAnthropic(param *ConvertOpenAIResponseToAnthropicParams)
 			blockIndex := param.toolContentBlockIndex(index)
 
 			if accumulator.Arguments.Len() > 0 {
-				inputDelta := map[string]interface{}{
-					"type":  "content_block_delta",
-					"index": blockIndex,
-					"delta": map[string]interface{}{
-						"type":         "input_json_delta",
-						"partial_json": util.FixJSON(accumulator.Arguments.String()),
-					},
-				}
-				inputDeltaJSON, _ := json.Marshal(inputDelta)
-				results = append(results, "event: content_block_delta\ndata: "+string(inputDeltaJSON)+"\n\n")
+				inputDeltaJSON := `{"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":""}}`
+				inputDeltaJSON, _ = sjson.Set(inputDeltaJSON, "index", blockIndex)
+				inputDeltaJSON, _ = sjson.Set(inputDeltaJSON, "delta.partial_json", util.FixJSON(accumulator.Arguments.String()))
+				results = append(results, "event: content_block_delta\ndata: "+inputDeltaJSON+"\n\n")
 			}
 
-			contentBlockStop := map[string]interface{}{
-				"type":  "content_block_stop",
-				"index": blockIndex,
-			}
-			contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
-			results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
+			contentBlockStopJSON := `{"type":"content_block_stop","index":0}`
+			contentBlockStopJSON, _ = sjson.Set(contentBlockStopJSON, "index", blockIndex)
+			results = append(results, "event: content_block_stop\ndata: "+contentBlockStopJSON+"\n\n")
 			delete(param.ToolCallBlockIndexes, index)
 		}
 		param.ContentBlocksStopped = true
@@ -447,16 +347,9 @@ func convertOpenAIDoneToAnthropic(param *ConvertOpenAIResponseToAnthropicParams)
 
 	// If we haven't sent message_delta yet (no usage info was received), send it now
 	if param.FinishReason != "" && !param.MessageDeltaSent {
-		messageDelta := map[string]interface{}{
-			"type": "message_delta",
-			"delta": map[string]interface{}{
-				"stop_reason":   mapOpenAIFinishReasonToAnthropic(param.FinishReason),
-				"stop_sequence": nil,
-			},
-		}
-
-		messageDeltaJSON, _ := json.Marshal(messageDelta)
-		results = append(results, "event: message_delta\ndata: "+string(messageDeltaJSON)+"\n\n")
+		messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+		messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason))
+		results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n")
 		param.MessageDeltaSent = true
 	}
 
@@ -469,105 +362,72 @@ func convertOpenAIDoneToAnthropic(param *ConvertOpenAIResponseToAnthropicParams)
 func convertOpenAINonStreamingToAnthropic(rawJSON []byte) []string {
 	root := gjson.ParseBytes(rawJSON)
 
-	// Build Anthropic response
-	response := map[string]interface{}{
-		"id":            root.Get("id").String(),
-		"type":          "message",
-		"role":          "assistant",
-		"model":         root.Get("model").String(),
-		"content":       []interface{}{},
-		"stop_reason":   nil,
-		"stop_sequence": nil,
-		"usage": map[string]interface{}{
-			"input_tokens":  0,
-			"output_tokens": 0,
-		},
-	}
+	out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}`
+	out, _ = sjson.Set(out, "id", root.Get("id").String())
+	out, _ = sjson.Set(out, "model", root.Get("model").String())
 
 	// Process message content and tool calls
-	var contentBlocks []interface{}
-
-	if choices := root.Get("choices"); choices.Exists() && choices.IsArray() {
+	if choices := root.Get("choices"); choices.Exists() && choices.IsArray() && len(choices.Array()) > 0 {
 		choice := choices.Array()[0] // Take first choice
-		reasoningNode := choice.Get("message.reasoning_content")
-		allReasoning := collectOpenAIReasoningTexts(reasoningNode)
 
-		for _, reasoningText := range allReasoning {
+		reasoningNode := choice.Get("message.reasoning_content")
+		for _, reasoningText := range collectOpenAIReasoningTexts(reasoningNode) {
 			if reasoningText == "" {
 				continue
 			}
-			contentBlocks = append(contentBlocks, map[string]interface{}{
-				"type":     "thinking",
-				"thinking": reasoningText,
-			})
+			block := `{"type":"thinking","thinking":""}`
+			block, _ = sjson.Set(block, "thinking", reasoningText)
+			out, _ = sjson.SetRaw(out, "content.-1", block)
 		}
 
 		// Handle text content
 		if content := choice.Get("message.content"); content.Exists() && content.String() != "" {
-			textBlock := map[string]interface{}{
-				"type": "text",
-				"text": content.String(),
-			}
-			contentBlocks = append(contentBlocks, textBlock)
+			block := `{"type":"text","text":""}`
+			block, _ = sjson.Set(block, "text", content.String())
+			out, _ = sjson.SetRaw(out, "content.-1", block)
 		}
 
 		// Handle tool calls
 		if toolCalls := choice.Get("message.tool_calls"); toolCalls.Exists() && toolCalls.IsArray() {
 			toolCalls.ForEach(func(_, toolCall gjson.Result) bool {
-				toolUseBlock := map[string]interface{}{
-					"type": "tool_use",
-					"id":   toolCall.Get("id").String(),
-					"name": toolCall.Get("function.name").String(),
-				}
-
-				// Parse arguments
-				argsStr := toolCall.Get("function.arguments").String()
-				argsStr = util.FixJSON(argsStr)
-				if argsStr != "" {
-					var args interface{}
-					if err := json.Unmarshal([]byte(argsStr), &args); err == nil {
-						toolUseBlock["input"] = args
+				toolUseBlock := `{"type":"tool_use","id":"","name":"","input":{}}`
+				toolUseBlock, _ = sjson.Set(toolUseBlock, "id", toolCall.Get("id").String())
+				toolUseBlock, _ = sjson.Set(toolUseBlock, "name", toolCall.Get("function.name").String())
+
+				argsStr := util.FixJSON(toolCall.Get("function.arguments").String())
+				if argsStr != "" && gjson.Valid(argsStr) {
+					argsJSON := gjson.Parse(argsStr)
+					if argsJSON.IsObject() {
+						toolUseBlock, _ = sjson.SetRaw(toolUseBlock, "input", argsJSON.Raw)
 					} else {
-						toolUseBlock["input"] = map[string]interface{}{}
+						toolUseBlock, _ = sjson.SetRaw(toolUseBlock, "input", "{}")
 					}
 				} else {
-					toolUseBlock["input"] = map[string]interface{}{}
+					toolUseBlock, _ = sjson.SetRaw(toolUseBlock, "input", "{}")
 				}
 
-				contentBlocks = append(contentBlocks, toolUseBlock)
+				out, _ = sjson.SetRaw(out, "content.-1", toolUseBlock)
 				return true
 			})
 		}
 
 		// Set stop reason
 		if finishReason := choice.Get("finish_reason"); finishReason.Exists() {
-			response["stop_reason"] = mapOpenAIFinishReasonToAnthropic(finishReason.String())
+			out, _ = sjson.Set(out, "stop_reason", mapOpenAIFinishReasonToAnthropic(finishReason.String()))
 		}
 	}
 
-	response["content"] = contentBlocks
-
 	// Set usage information
 	if usage := root.Get("usage"); usage.Exists() {
-		response["usage"] = map[string]interface{}{
-			"input_tokens":  usage.Get("prompt_tokens").Int(),
-			"output_tokens": usage.Get("completion_tokens").Int(),
-			"reasoning_tokens": func() int64 {
-				if v := usage.Get("completion_tokens_details.reasoning_tokens"); v.Exists() {
-					return v.Int()
-				}
-				return 0
-			}(),
-		}
-	} else {
-		response["usage"] = map[string]interface{}{
-			"input_tokens":  0,
-			"output_tokens": 0,
+		inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(usage)
+		out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
+		out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
+		if cachedTokens > 0 {
+			out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
 		}
 	}
 
-	responseJSON, _ := json.Marshal(response)
-	return []string{string(responseJSON)}
+	return []string{out}
 }
 
 // mapOpenAIFinishReasonToAnthropic maps OpenAI finish reasons to Anthropic equivalents
@@ -614,15 +474,15 @@ func collectOpenAIReasoningTexts(node gjson.Result) []string {
 
 	switch node.Type {
 	case gjson.String:
-		if text := strings.TrimSpace(node.String()); text != "" {
+		if text := node.String(); text != "" {
 			texts = append(texts, text)
 		}
 	case gjson.JSON:
 		if text := node.Get("text"); text.Exists() {
-			if trimmed := strings.TrimSpace(text.String()); trimmed != "" {
-				texts = append(texts, trimmed)
+			if textStr := text.String(); textStr != "" {
+				texts = append(texts, textStr)
 			}
-		} else if raw := strings.TrimSpace(node.Raw); raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") {
+		} else if raw := node.Raw; raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") {
 			texts = append(texts, raw)
 		}
 	}
@@ -634,12 +494,9 @@ func stopThinkingContentBlock(param *ConvertOpenAIResponseToAnthropicParams, res
 	if !param.ThinkingContentBlockStarted {
 		return
 	}
-	contentBlockStop := map[string]interface{}{
-		"type":  "content_block_stop",
-		"index": param.ThinkingContentBlockIndex,
-	}
-	contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
-	*results = append(*results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
+	contentBlockStopJSON := `{"type":"content_block_stop","index":0}`
+	contentBlockStopJSON, _ = sjson.Set(contentBlockStopJSON, "index", param.ThinkingContentBlockIndex)
+	*results = append(*results, "event: content_block_stop\ndata: "+contentBlockStopJSON+"\n\n")
 	param.ThinkingContentBlockStarted = false
 	param.ThinkingContentBlockIndex = -1
 }
@@ -656,12 +513,9 @@ func stopTextContentBlock(param *ConvertOpenAIResponseToAnthropicParams, results
 	if !param.TextContentBlockStarted {
 		return
 	}
-	contentBlockStop := map[string]interface{}{
-		"type":  "content_block_stop",
-		"index": param.TextContentBlockIndex,
-	}
-	contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
-	*results = append(*results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
+	contentBlockStopJSON := `{"type":"content_block_stop","index":0}`
+	contentBlockStopJSON, _ = sjson.Set(contentBlockStopJSON, "index", param.TextContentBlockIndex)
+	*results = append(*results, "event: content_block_stop\ndata: "+contentBlockStopJSON+"\n\n")
 	param.TextContentBlockStarted = false
 	param.TextContentBlockIndex = -1
 }
@@ -681,29 +535,19 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
 	_ = requestRawJSON
 
 	root := gjson.ParseBytes(rawJSON)
+	out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}`
+	out, _ = sjson.Set(out, "id", root.Get("id").String())
+	out, _ = sjson.Set(out, "model", root.Get("model").String())
 
-	response := map[string]interface{}{
-		"id":            root.Get("id").String(),
-		"type":          "message",
-		"role":          "assistant",
-		"model":         root.Get("model").String(),
-		"content":       []interface{}{},
-		"stop_reason":   nil,
-		"stop_sequence": nil,
-		"usage": map[string]interface{}{
-			"input_tokens":  0,
-			"output_tokens": 0,
-		},
-	}
-
-	contentBlocks := make([]interface{}, 0)
 	hasToolCall := false
+	stopReasonSet := false
 
 	if choices := root.Get("choices"); choices.Exists() && choices.IsArray() && len(choices.Array()) > 0 {
 		choice := choices.Array()[0]
 
 		if finishReason := choice.Get("finish_reason"); finishReason.Exists() {
-			response["stop_reason"] = mapOpenAIFinishReasonToAnthropic(finishReason.String())
+			out, _ = sjson.Set(out, "stop_reason", mapOpenAIFinishReasonToAnthropic(finishReason.String()))
+			stopReasonSet = true
 		}
 
 		if message := choice.Get("message"); message.Exists() {
@@ -716,10 +560,9 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
 						if textBuilder.Len() == 0 {
 							return
 						}
-						contentBlocks = append(contentBlocks, map[string]interface{}{
-							"type": "text",
-							"text": textBuilder.String(),
-						})
+						block := `{"type":"text","text":""}`
+						block, _ = sjson.Set(block, "text", textBuilder.String())
+						out, _ = sjson.SetRaw(out, "content.-1", block)
 						textBuilder.Reset()
 					}
 
@@ -727,16 +570,14 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
 						if thinkingBuilder.Len() == 0 {
 							return
 						}
-						contentBlocks = append(contentBlocks, map[string]interface{}{
-							"type":     "thinking",
-							"thinking": thinkingBuilder.String(),
-						})
+						block := `{"type":"thinking","thinking":""}`
+						block, _ = sjson.Set(block, "thinking", thinkingBuilder.String())
+						out, _ = sjson.SetRaw(out, "content.-1", block)
 						thinkingBuilder.Reset()
 					}
 
 					for _, item := range contentResult.Array() {
-						typeStr := item.Get("type").String()
-						switch typeStr {
+						switch item.Get("type").String() {
 						case "text":
 							flushThinking()
 							textBuilder.WriteString(item.Get("text").String())
@@ -747,25 +588,23 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
 							if toolCalls.IsArray() {
 								toolCalls.ForEach(func(_, tc gjson.Result) bool {
 									hasToolCall = true
-									toolUse := map[string]interface{}{
-										"type": "tool_use",
-										"id":   tc.Get("id").String(),
-										"name": tc.Get("function.name").String(),
-									}
+									toolUse := `{"type":"tool_use","id":"","name":"","input":{}}`
+									toolUse, _ = sjson.Set(toolUse, "id", tc.Get("id").String())
+									toolUse, _ = sjson.Set(toolUse, "name", tc.Get("function.name").String())
 
 									argsStr := util.FixJSON(tc.Get("function.arguments").String())
-									if argsStr != "" {
-										var parsed interface{}
-										if err := json.Unmarshal([]byte(argsStr), &parsed); err == nil {
-											toolUse["input"] = parsed
+									if argsStr != "" && gjson.Valid(argsStr) {
+										argsJSON := gjson.Parse(argsStr)
+										if argsJSON.IsObject() {
+											toolUse, _ = sjson.SetRaw(toolUse, "input", argsJSON.Raw)
 										} else {
-											toolUse["input"] = map[string]interface{}{}
+											toolUse, _ = sjson.SetRaw(toolUse, "input", "{}")
 										}
 									} else {
-										toolUse["input"] = map[string]interface{}{}
+										toolUse, _ = sjson.SetRaw(toolUse, "input", "{}")
 									}
 
-									contentBlocks = append(contentBlocks, toolUse)
+									out, _ = sjson.SetRaw(out, "content.-1", toolUse)
 									return true
 								})
 							}
@@ -785,10 +624,9 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
 				} else if contentResult.Type == gjson.String {
 					textContent := contentResult.String()
 					if textContent != "" {
-						contentBlocks = append(contentBlocks, map[string]interface{}{
-							"type": "text",
-							"text": textContent,
-						})
+						block := `{"type":"text","text":""}`
+						block, _ = sjson.Set(block, "text", textContent)
+						out, _ = sjson.SetRaw(out, "content.-1", block)
 					}
 				}
 			}
@@ -798,83 +636,78 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
 					if reasoningText == "" {
 						continue
 					}
-					contentBlocks = append(contentBlocks, map[string]interface{}{
-						"type":     "thinking",
-						"thinking": reasoningText,
-					})
+					block := `{"type":"thinking","thinking":""}`
+					block, _ = sjson.Set(block, "thinking", reasoningText)
+					out, _ = sjson.SetRaw(out, "content.-1", block)
 				}
 			}
 
 			if toolCalls := message.Get("tool_calls"); toolCalls.Exists() && toolCalls.IsArray() {
 				toolCalls.ForEach(func(_, toolCall gjson.Result) bool {
 					hasToolCall = true
-					toolUseBlock := map[string]interface{}{
-						"type": "tool_use",
-						"id":   toolCall.Get("id").String(),
-						"name": toolCall.Get("function.name").String(),
-					}
-
-					argsStr := toolCall.Get("function.arguments").String()
-					argsStr = util.FixJSON(argsStr)
-					if argsStr != "" {
-						var args interface{}
-						if err := json.Unmarshal([]byte(argsStr), &args); err == nil {
-							toolUseBlock["input"] = args
+					toolUseBlock := `{"type":"tool_use","id":"","name":"","input":{}}`
+					toolUseBlock, _ = sjson.Set(toolUseBlock, "id", toolCall.Get("id").String())
+					toolUseBlock, _ = sjson.Set(toolUseBlock, "name", toolCall.Get("function.name").String())
+
+					argsStr := util.FixJSON(toolCall.Get("function.arguments").String())
+					if argsStr != "" && gjson.Valid(argsStr) {
+						argsJSON := gjson.Parse(argsStr)
+						if argsJSON.IsObject() {
+							toolUseBlock, _ = sjson.SetRaw(toolUseBlock, "input", argsJSON.Raw)
 						} else {
-							toolUseBlock["input"] = map[string]interface{}{}
+							toolUseBlock, _ = sjson.SetRaw(toolUseBlock, "input", "{}")
 						}
 					} else {
-						toolUseBlock["input"] = map[string]interface{}{}
+						toolUseBlock, _ = sjson.SetRaw(toolUseBlock, "input", "{}")
 					}
 
-					contentBlocks = append(contentBlocks, toolUseBlock)
+					out, _ = sjson.SetRaw(out, "content.-1", toolUseBlock)
 					return true
 				})
 			}
 		}
 	}
 
-	response["content"] = contentBlocks
-
 	if respUsage := root.Get("usage"); respUsage.Exists() {
-		usageJSON := `{}`
-		usageJSON, _ = sjson.Set(usageJSON, "input_tokens", respUsage.Get("prompt_tokens").Int())
-		usageJSON, _ = sjson.Set(usageJSON, "output_tokens", respUsage.Get("completion_tokens").Int())
-		parsedUsage := gjson.Parse(usageJSON).Value().(map[string]interface{})
-		response["usage"] = parsedUsage
-	} else {
-		response["usage"] = `{"input_tokens":0,"output_tokens":0}`
-	}
-
-	if response["stop_reason"] == nil {
-		if hasToolCall {
-			response["stop_reason"] = "tool_use"
-		} else {
-			response["stop_reason"] = "end_turn"
+		inputTokens, outputTokens, cachedTokens := extractOpenAIUsage(respUsage)
+		out, _ = sjson.Set(out, "usage.input_tokens", inputTokens)
+		out, _ = sjson.Set(out, "usage.output_tokens", outputTokens)
+		if cachedTokens > 0 {
+			out, _ = sjson.Set(out, "usage.cache_read_input_tokens", cachedTokens)
 		}
 	}
 
-	if !hasToolCall {
-		if toolBlocks := response["content"].([]interface{}); len(toolBlocks) > 0 {
-			for _, block := range toolBlocks {
-				if m, ok := block.(map[string]interface{}); ok && m["type"] == "tool_use" {
-					hasToolCall = true
-					break
-				}
-			}
-		}
+	if !stopReasonSet {
 		if hasToolCall {
-			response["stop_reason"] = "tool_use"
+			out, _ = sjson.Set(out, "stop_reason", "tool_use")
+		} else {
+			out, _ = sjson.Set(out, "stop_reason", "end_turn")
 		}
 	}
 
-	responseJSON, err := json.Marshal(response)
-	if err != nil {
-		return ""
-	}
-	return string(responseJSON)
+	return out
 }
 
 func ClaudeTokenCount(ctx context.Context, count int64) string {
 	return fmt.Sprintf(`{"input_tokens":%d}`, count)
 }
+
+func extractOpenAIUsage(usage gjson.Result) (int64, int64, int64) {
+	if !usage.Exists() || usage.Type == gjson.Null {
+		return 0, 0, 0
+	}
+
+	inputTokens := usage.Get("prompt_tokens").Int()
+	outputTokens := usage.Get("completion_tokens").Int()
+	cachedTokens := usage.Get("prompt_tokens_details.cached_tokens").Int()
+
+	if cachedTokens > 0 {
+		if inputTokens >= cachedTokens {
+			inputTokens -= cachedTokens
+		} else {
+			inputTokens = 0
+		}
+	}
+
+	return inputTokens, outputTokens, cachedTokens
+}
diff --git a/internal/translator/openai/gemini-cli/openai_gemini_request.go b/internal/translator/openai/gemini-cli/openai_gemini_request.go
index 847c278f36..2efd2fdd19 100644
--- a/internal/translator/openai/gemini-cli/openai_gemini_request.go
+++ b/internal/translator/openai/gemini-cli/openai_gemini_request.go
@@ -6,6 +6,8 @@
 package geminiCLI
 
 import (
+	"bytes"
+
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/gemini"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -15,7 +17,7 @@ import (
 // It extracts the model name, generation config, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the OpenAI API.
 func ConvertGeminiCLIRequestToOpenAI(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	rawJSON = []byte(gjson.GetBytes(rawJSON, "request").Raw)
 	rawJSON, _ = sjson.SetBytes(rawJSON, "model", modelName)
 	if gjson.GetBytes(rawJSON, "systemInstruction").Exists() {
diff --git a/internal/translator/openai/gemini/openai_gemini_request.go b/internal/translator/openai/gemini/openai_gemini_request.go
index 167b71e91b..5469a123cf 100644
--- a/internal/translator/openai/gemini/openai_gemini_request.go
+++ b/internal/translator/openai/gemini/openai_gemini_request.go
@@ -6,6 +6,7 @@
 package gemini
 
 import (
+	"bytes"
 	"crypto/rand"
 	"fmt"
 	"math/big"
@@ -20,7 +21,7 @@ import (
 // It extracts the model name, generation config, message contents, and tool declarations
 // from the raw JSON request and returns them in the format expected by the OpenAI API.
 func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	// Base OpenAI Chat Completions API template
 	out := `{"model":"","messages":[]}`
 
@@ -82,27 +83,16 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream
 		}
 
 		// Map Gemini thinkingConfig to OpenAI reasoning_effort.
-		// Always perform conversion to support allowCompat models that may not be in registry.
-		// Note: Google official Python SDK sends snake_case fields (thinking_level/thinking_budget).
+		// Always perform conversion to support allowCompat models that may not be in registry
 		if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() {
-			thinkingLevel := thinkingConfig.Get("thinkingLevel")
-			if !thinkingLevel.Exists() {
-				thinkingLevel = thinkingConfig.Get("thinking_level")
-			}
-			if thinkingLevel.Exists() {
+			if thinkingLevel := thinkingConfig.Get("thinkingLevel"); thinkingLevel.Exists() {
 				effort := strings.ToLower(strings.TrimSpace(thinkingLevel.String()))
 				if effort != "" {
 					out, _ = sjson.Set(out, "reasoning_effort", effort)
 				}
-			} else {
-				thinkingBudget := thinkingConfig.Get("thinkingBudget")
-				if !thinkingBudget.Exists() {
-					thinkingBudget = thinkingConfig.Get("thinking_budget")
-				}
-				if thinkingBudget.Exists() {
-					if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
-						out, _ = sjson.Set(out, "reasoning_effort", effort)
-					}
+			} else if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() {
+				if effort, ok := thinking.ConvertBudgetToLevel(int(thinkingBudget.Int())); ok {
+					out, _ = sjson.Set(out, "reasoning_effort", effort)
 				}
 			}
 		}
diff --git a/internal/translator/openai/openai/chat-completions/openai_openai_request.go b/internal/translator/openai/openai/chat-completions/openai_openai_request.go
index a74cded6c7..211c0eb4a4 100644
--- a/internal/translator/openai/openai/chat-completions/openai_openai_request.go
+++ b/internal/translator/openai/openai/chat-completions/openai_openai_request.go
@@ -3,6 +3,7 @@
 package chat_completions
 
 import (
+	"bytes"
 	"github.com/tidwall/sjson"
 )
 
@@ -24,7 +25,7 @@ func ConvertOpenAIRequestToOpenAI(modelName string, inputRawJSON []byte, _ bool)
 		// If there's an error, return the original JSON or handle the error appropriately.
 		// For now, we'll return the original, but in a real scenario, logging or a more robust error
 		// handling mechanism would be needed.
-		return inputRawJSON
+		return bytes.Clone(inputRawJSON)
 	}
 	return updatedJSON
 }
diff --git a/internal/translator/openai/openai/responses/openai_openai-responses_request.go b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
index 9a64798bd7..86cf19f88c 100644
--- a/internal/translator/openai/openai/responses/openai_openai-responses_request.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
@@ -1,6 +1,7 @@
 package responses
 
 import (
+	"bytes"
 	"strings"
 
 	"github.com/tidwall/gjson"
@@ -27,7 +28,7 @@ import (
 // Returns:
 //   - []byte: The transformed request data in OpenAI chat completions format
 func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inputRawJSON []byte, stream bool) []byte {
-	rawJSON := inputRawJSON
+	rawJSON := bytes.Clone(inputRawJSON)
 	// Base OpenAI chat completions template with default values
 	out := `{"model":"","messages":[],"stream":false}`
 
@@ -67,10 +68,7 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 			case "message", "":
 				// Handle regular message conversion
 				role := item.Get("role").String()
-				if role == "developer" {
-					role = "user"
-				}
-				message := `{"role":"","content":[]}`
+				message := `{"role":"","content":""}`
 				message, _ = sjson.Set(message, "role", role)
 
 				if content := item.Get("content"); content.Exists() && content.IsArray() {
@@ -84,16 +82,20 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 						}
 
 						switch contentType {
-						case "input_text", "output_text":
+						case "input_text":
+							text := contentItem.Get("text").String()
+							if messageContent != "" {
+								messageContent += "\n" + text
+							} else {
+								messageContent = text
+							}
+						case "output_text":
 							text := contentItem.Get("text").String()
-							contentPart := `{"type":"text","text":""}`
-							contentPart, _ = sjson.Set(contentPart, "text", text)
-							message, _ = sjson.SetRaw(message, "content.-1", contentPart)
-						case "input_image":
-							imageURL := contentItem.Get("image_url").String()
-							contentPart := `{"type":"image_url","image_url":{"url":""}}`
-							contentPart, _ = sjson.Set(contentPart, "image_url.url", imageURL)
-							message, _ = sjson.SetRaw(message, "content.-1", contentPart)
+							if messageContent != "" {
+								messageContent += "\n" + text
+							} else {
+								messageContent = text
+							}
 						}
 						return true
 					})
@@ -165,8 +167,7 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 			// Only function tools need structural conversion because Chat Completions nests details under "function".
 			toolType := tool.Get("type").String()
 			if toolType != "" && toolType != "function" && tool.IsObject() {
-				// Almost all providers lack built-in tools, so we just ignore them.
-				// chatCompletionsTools = append(chatCompletionsTools, tool.Value())
+				chatCompletionsTools = append(chatCompletionsTools, tool.Value())
 				return true
 			}
 
diff --git a/internal/util/claude_model_test.go b/internal/util/claude_model_test.go
index d20c337de4..17f6106edf 100644
--- a/internal/util/claude_model_test.go
+++ b/internal/util/claude_model_test.go
@@ -11,7 +11,6 @@ func TestIsClaudeThinkingModel(t *testing.T) {
 		// Claude thinking models - should return true
 		{"claude-sonnet-4-5-thinking", "claude-sonnet-4-5-thinking", true},
 		{"claude-opus-4-5-thinking", "claude-opus-4-5-thinking", true},
-		{"claude-opus-4-6-thinking", "claude-opus-4-6-thinking", true},
 		{"Claude-Sonnet-Thinking uppercase", "Claude-Sonnet-4-5-Thinking", true},
 		{"claude thinking mixed case", "Claude-THINKING-Model", true},
 
diff --git a/internal/util/gemini_schema.go b/internal/util/gemini_schema.go
index b8d07bf4d9..fcc048c9fa 100644
--- a/internal/util/gemini_schema.go
+++ b/internal/util/gemini_schema.go
@@ -61,20 +61,14 @@ func cleanJSONSchema(jsonStr string, addPlaceholder bool) string {
 
 // removeKeywords removes all occurrences of specified keywords from the JSON schema.
 func removeKeywords(jsonStr string, keywords []string) string {
-	deletePaths := make([]string, 0)
-	pathsByField := findPathsByFields(jsonStr, keywords)
 	for _, key := range keywords {
-		for _, p := range pathsByField[key] {
+		for _, p := range findPaths(jsonStr, key) {
 			if isPropertyDefinition(trimSuffix(p, "."+key)) {
 				continue
 			}
-			deletePaths = append(deletePaths, p)
+			jsonStr, _ = sjson.Delete(jsonStr, p)
 		}
 	}
-	sortByDepth(deletePaths)
-	for _, p := range deletePaths {
-		jsonStr, _ = sjson.Delete(jsonStr, p)
-	}
 	return jsonStr
 }
 
@@ -241,9 +235,8 @@ var unsupportedConstraints = []string{
 }
 
 func moveConstraintsToDescription(jsonStr string) string {
-	pathsByField := findPathsByFields(jsonStr, unsupportedConstraints)
 	for _, key := range unsupportedConstraints {
-		for _, p := range pathsByField[key] {
+		for _, p := range findPaths(jsonStr, key) {
 			val := gjson.Get(jsonStr, p)
 			if !val.Exists() || val.IsObject() || val.IsArray() {
 				continue
@@ -428,25 +421,17 @@ func flattenTypeArrays(jsonStr string) string {
 
 func removeUnsupportedKeywords(jsonStr string) string {
 	keywords := append(unsupportedConstraints,
-		"$schema", "$defs", "definitions", "const", "$ref", "$id", "additionalProperties",
-		"propertyNames", "patternProperties", // Gemini doesn't support these schema keywords
-		"enumTitles", "prefill", // Claude/OpenCode schema metadata fields unsupported by Gemini
+		"$schema", "$defs", "definitions", "const", "$ref", "additionalProperties",
+		"propertyNames", // Gemini doesn't support property name validation
 	)
-
-	deletePaths := make([]string, 0)
-	pathsByField := findPathsByFields(jsonStr, keywords)
 	for _, key := range keywords {
-		for _, p := range pathsByField[key] {
+		for _, p := range findPaths(jsonStr, key) {
 			if isPropertyDefinition(trimSuffix(p, "."+key)) {
 				continue
 			}
-			deletePaths = append(deletePaths, p)
+			jsonStr, _ = sjson.Delete(jsonStr, p)
 		}
 	}
-	sortByDepth(deletePaths)
-	for _, p := range deletePaths {
-		jsonStr, _ = sjson.Delete(jsonStr, p)
-	}
 	// Remove x-* extension fields (e.g., x-google-enum-descriptions) that are not supported by Gemini API
 	jsonStr = removeExtensionFields(jsonStr)
 	return jsonStr
@@ -596,42 +581,6 @@ func findPaths(jsonStr, field string) []string {
 	return paths
 }
 
-func findPathsByFields(jsonStr string, fields []string) map[string][]string {
-	set := make(map[string]struct{}, len(fields))
-	for _, field := range fields {
-		set[field] = struct{}{}
-	}
-	paths := make(map[string][]string, len(set))
-	walkForFields(gjson.Parse(jsonStr), "", set, paths)
-	return paths
-}
-
-func walkForFields(value gjson.Result, path string, fields map[string]struct{}, paths map[string][]string) {
-	switch value.Type {
-	case gjson.JSON:
-		value.ForEach(func(key, val gjson.Result) bool {
-			keyStr := key.String()
-			safeKey := escapeGJSONPathKey(keyStr)
-
-			var childPath string
-			if path == "" {
-				childPath = safeKey
-			} else {
-				childPath = path + "." + safeKey
-			}
-
-			if _, ok := fields[keyStr]; ok {
-				paths[keyStr] = append(paths[keyStr], childPath)
-			}
-
-			walkForFields(val, childPath, fields, paths)
-			return true
-		})
-	case gjson.String, gjson.Number, gjson.True, gjson.False, gjson.Null:
-		// Terminal types - no further traversal needed
-	}
-}
-
 func sortByDepth(paths []string) {
 	sort.Slice(paths, func(i, j int) bool { return len(paths[i]) > len(paths[j]) })
 }
@@ -718,9 +667,6 @@ func orDefault(val, def string) string {
 }
 
 func escapeGJSONPathKey(key string) string {
-	if strings.IndexAny(key, ".*?") == -1 {
-		return key
-	}
 	return gjsonPathKeyReplacer.Replace(key)
 }
 
diff --git a/internal/util/gemini_schema_test.go b/internal/util/gemini_schema_test.go
index bb06e95673..ea63d1114a 100644
--- a/internal/util/gemini_schema_test.go
+++ b/internal/util/gemini_schema_test.go
@@ -870,57 +870,6 @@ func TestCleanJSONSchemaForAntigravity_BooleanEnumToString(t *testing.T) {
 	}
 }
 
-func TestCleanJSONSchemaForGemini_RemovesGeminiUnsupportedMetadataFields(t *testing.T) {
-	input := `{
-		"$schema": "http://json-schema.org/draft-07/schema#",
-		"$id": "root-schema",
-		"type": "object",
-		"properties": {
-			"payload": {
-				"type": "object",
-				"prefill": "hello",
-				"properties": {
-					"mode": {
-						"type": "string",
-						"enum": ["a", "b"],
-						"enumTitles": ["A", "B"]
-					}
-				},
-				"patternProperties": {
-					"^x-": {"type": "string"}
-				}
-			},
-			"$id": {
-				"type": "string",
-				"description": "property name should not be removed"
-			}
-		}
-	}`
-
-	expected := `{
-		"type": "object",
-		"properties": {
-			"payload": {
-				"type": "object",
-				"properties": {
-					"mode": {
-						"type": "string",
-						"enum": ["a", "b"],
-						"description": "Allowed: a, b"
-					}
-				}
-			},
-			"$id": {
-				"type": "string",
-				"description": "property name should not be removed"
-			}
-		}
-	}`
-
-	result := CleanJSONSchemaForGemini(input)
-	compareJSON(t, expected, result)
-}
-
 func TestRemoveExtensionFields(t *testing.T) {
 	tests := []struct {
 		name     string
diff --git a/internal/util/translator.go b/internal/util/translator.go
index 51ecb748a0..eca38a3079 100644
--- a/internal/util/translator.go
+++ b/internal/util/translator.go
@@ -6,6 +6,7 @@ package util
 import (
 	"bytes"
 	"fmt"
+	"strings"
 
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -32,15 +33,15 @@ func Walk(value gjson.Result, path, field string, paths *[]string) {
 			// . -> \.
 			// * -> \*
 			// ? -> \?
-			keyStr := key.String()
-			safeKey := escapeGJSONPathKey(keyStr)
+			var keyReplacer = strings.NewReplacer(".", "\\.", "*", "\\*", "?", "\\?")
+			safeKey := keyReplacer.Replace(key.String())
 
 			if path == "" {
 				childPath = safeKey
 			} else {
 				childPath = path + "." + safeKey
 			}
-			if keyStr == field {
+			if key.String() == field {
 				*paths = append(*paths, childPath)
 			}
 			Walk(val, childPath, field, paths)
@@ -86,6 +87,15 @@ func RenameKey(jsonStr, oldKeyPath, newKeyPath string) (string, error) {
 	return finalJson, nil
 }
 
+func DeleteKey(jsonStr, keyName string) string {
+	paths := make([]string, 0)
+	Walk(gjson.Parse(jsonStr), "", keyName, &paths)
+	for _, p := range paths {
+		jsonStr, _ = sjson.Delete(jsonStr, p)
+	}
+	return jsonStr
+}
+
 // FixJSON converts non-standard JSON that uses single quotes for strings into
 // RFC 8259-compliant JSON by converting those single-quoted strings to
 // double-quoted strings with proper escaping.
diff --git a/internal/watcher/clients.go b/internal/watcher/clients.go
index cf0ed07600..5cd8b6e6a7 100644
--- a/internal/watcher/clients.go
+++ b/internal/watcher/clients.go
@@ -6,7 +6,6 @@ import (
 	"context"
 	"crypto/sha256"
 	"encoding/hex"
-	"encoding/json"
 	"fmt"
 	"io/fs"
 	"os"
@@ -16,7 +15,6 @@ import (
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/watcher/diff"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	log "github.com/sirupsen/logrus"
 )
@@ -74,7 +72,6 @@ func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string
 		w.clientsMutex.Lock()
 
 		w.lastAuthHashes = make(map[string]string)
-		w.lastAuthContents = make(map[string]*coreauth.Auth)
 		if resolvedAuthDir, errResolveAuthDir := util.ResolveAuthDir(cfg.AuthDir); errResolveAuthDir != nil {
 			log.Errorf("failed to resolve auth directory for hash cache: %v", errResolveAuthDir)
 		} else if resolvedAuthDir != "" {
@@ -87,11 +84,6 @@ func (w *Watcher) reloadClients(rescanAuth bool, affectedOAuthProviders []string
 						sum := sha256.Sum256(data)
 						normalizedPath := w.normalizeAuthPath(path)
 						w.lastAuthHashes[normalizedPath] = hex.EncodeToString(sum[:])
-						// Parse and cache auth content for future diff comparisons
-						var auth coreauth.Auth
-						if errParse := json.Unmarshal(data, &auth); errParse == nil {
-							w.lastAuthContents[normalizedPath] = &auth
-						}
 					}
 				}
 				return nil
@@ -135,13 +127,6 @@ func (w *Watcher) addOrUpdateClient(path string) {
 	curHash := hex.EncodeToString(sum[:])
 	normalized := w.normalizeAuthPath(path)
 
-	// Parse new auth content for diff comparison
-	var newAuth coreauth.Auth
-	if errParse := json.Unmarshal(data, &newAuth); errParse != nil {
-		log.Errorf("failed to parse auth file %s: %v", filepath.Base(path), errParse)
-		return
-	}
-
 	w.clientsMutex.Lock()
 
 	cfg := w.config
@@ -156,26 +141,7 @@ func (w *Watcher) addOrUpdateClient(path string) {
 		return
 	}
 
-	// Get old auth for diff comparison
-	var oldAuth *coreauth.Auth
-	if w.lastAuthContents != nil {
-		oldAuth = w.lastAuthContents[normalized]
-	}
-
-	// Compute and log field changes
-	if changes := diff.BuildAuthChangeDetails(oldAuth, &newAuth); len(changes) > 0 {
-		log.Debugf("auth field changes for %s:", filepath.Base(path))
-		for _, c := range changes {
-			log.Debugf("  %s", c)
-		}
-	}
-
-	// Update caches
 	w.lastAuthHashes[normalized] = curHash
-	if w.lastAuthContents == nil {
-		w.lastAuthContents = make(map[string]*coreauth.Auth)
-	}
-	w.lastAuthContents[normalized] = &newAuth
 
 	w.clientsMutex.Unlock() // Unlock before the callback
 
@@ -194,7 +160,6 @@ func (w *Watcher) removeClient(path string) {
 
 	cfg := w.config
 	delete(w.lastAuthHashes, normalized)
-	delete(w.lastAuthContents, normalized)
 
 	w.clientsMutex.Unlock() // Release the lock before the callback
 
diff --git a/internal/watcher/diff/config_diff.go b/internal/watcher/diff/config_diff.go
index 6687749e59..0ba287bf67 100644
--- a/internal/watcher/diff/config_diff.go
+++ b/internal/watcher/diff/config_diff.go
@@ -27,12 +27,6 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 	if oldCfg.Debug != newCfg.Debug {
 		changes = append(changes, fmt.Sprintf("debug: %t -> %t", oldCfg.Debug, newCfg.Debug))
 	}
-	if oldCfg.Pprof.Enable != newCfg.Pprof.Enable {
-		changes = append(changes, fmt.Sprintf("pprof.enable: %t -> %t", oldCfg.Pprof.Enable, newCfg.Pprof.Enable))
-	}
-	if strings.TrimSpace(oldCfg.Pprof.Addr) != strings.TrimSpace(newCfg.Pprof.Addr) {
-		changes = append(changes, fmt.Sprintf("pprof.addr: %s -> %s", strings.TrimSpace(oldCfg.Pprof.Addr), strings.TrimSpace(newCfg.Pprof.Addr)))
-	}
 	if oldCfg.LoggingToFile != newCfg.LoggingToFile {
 		changes = append(changes, fmt.Sprintf("logging-to-file: %t -> %t", oldCfg.LoggingToFile, newCfg.LoggingToFile))
 	}
@@ -184,9 +178,6 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 			if strings.TrimSpace(o.Prefix) != strings.TrimSpace(n.Prefix) {
 				changes = append(changes, fmt.Sprintf("codex[%d].prefix: %s -> %s", i, strings.TrimSpace(o.Prefix), strings.TrimSpace(n.Prefix)))
 			}
-			if o.Websockets != n.Websockets {
-				changes = append(changes, fmt.Sprintf("codex[%d].websockets: %t -> %t", i, o.Websockets, n.Websockets))
-			}
 			if strings.TrimSpace(o.APIKey) != strings.TrimSpace(n.APIKey) {
 				changes = append(changes, fmt.Sprintf("codex[%d].api-key: updated", i))
 			}
diff --git a/internal/watcher/events.go b/internal/watcher/events.go
index fb96ad2a35..250cf75cb4 100644
--- a/internal/watcher/events.go
+++ b/internal/watcher/events.go
@@ -13,7 +13,6 @@ import (
 	"time"
 
 	"github.com/fsnotify/fsnotify"
-	kiroauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kiro"
 	log "github.com/sirupsen/logrus"
 )
 
@@ -40,35 +39,12 @@ func (w *Watcher) start(ctx context.Context) error {
 	}
 	log.Debugf("watching auth directory: %s", w.authDir)
 
-	w.watchKiroIDETokenFile()
-
 	go w.processEvents(ctx)
 
 	w.reloadClients(true, nil, false)
 	return nil
 }
 
-func (w *Watcher) watchKiroIDETokenFile() {
-	homeDir, err := os.UserHomeDir()
-	if err != nil {
-		log.Debugf("failed to get home directory for Kiro IDE token watch: %v", err)
-		return
-	}
-
-	kiroTokenDir := filepath.Join(homeDir, ".aws", "sso", "cache")
-
-	if _, statErr := os.Stat(kiroTokenDir); os.IsNotExist(statErr) {
-		log.Debugf("Kiro IDE token directory does not exist: %s", kiroTokenDir)
-		return
-	}
-
-	if errAdd := w.watcher.Add(kiroTokenDir); errAdd != nil {
-		log.Debugf("failed to watch Kiro IDE token directory %s: %v", kiroTokenDir, errAdd)
-		return
-	}
-	log.Debugf("watching Kiro IDE token directory: %s", kiroTokenDir)
-}
-
 func (w *Watcher) processEvents(ctx context.Context) {
 	for {
 		select {
@@ -97,17 +73,11 @@ func (w *Watcher) handleEvent(event fsnotify.Event) {
 	isConfigEvent := normalizedName == normalizedConfigPath && event.Op&configOps != 0
 	authOps := fsnotify.Create | fsnotify.Write | fsnotify.Remove | fsnotify.Rename
 	isAuthJSON := strings.HasPrefix(normalizedName, normalizedAuthDir) && strings.HasSuffix(normalizedName, ".json") && event.Op&authOps != 0
-	isKiroIDEToken := w.isKiroIDETokenFile(event.Name) && event.Op&authOps != 0
-	if !isConfigEvent && !isAuthJSON && !isKiroIDEToken {
+	if !isConfigEvent && !isAuthJSON {
 		// Ignore unrelated files (e.g., cookie snapshots *.cookie) and other noise.
 		return
 	}
 
-	if isKiroIDEToken {
-		w.handleKiroIDETokenChange(event)
-		return
-	}
-
 	now := time.Now()
 	log.Debugf("file system event detected: %s %s", event.Op.String(), event.Name)
 
@@ -154,44 +124,6 @@ func (w *Watcher) handleEvent(event fsnotify.Event) {
 	}
 }
 
-func (w *Watcher) isKiroIDETokenFile(path string) bool {
-	normalized := filepath.ToSlash(path)
-	return strings.HasSuffix(normalized, "kiro-auth-token.json") && strings.Contains(normalized, ".aws/sso/cache")
-}
-
-func (w *Watcher) handleKiroIDETokenChange(event fsnotify.Event) {
-	log.Debugf("Kiro IDE token file event detected: %s %s", event.Op.String(), event.Name)
-
-	if event.Op&(fsnotify.Remove|fsnotify.Rename) != 0 {
-		time.Sleep(replaceCheckDelay)
-		if _, statErr := os.Stat(event.Name); statErr != nil {
-			log.Debugf("Kiro IDE token file removed: %s", event.Name)
-			return
-		}
-	}
-
-	// Use retry logic to handle file lock contention (e.g., Kiro IDE writing the file)
-	// This prevents "being used by another process" errors on Windows
-	tokenData, err := kiroauth.LoadKiroIDETokenWithRetry(10, 50*time.Millisecond)
-	if err != nil {
-		log.Debugf("failed to load Kiro IDE token after change: %v", err)
-		return
-	}
-
-	log.Infof("Kiro IDE token file updated, access token refreshed (provider: %s)", tokenData.Provider)
-
-	w.refreshAuthState(true)
-
-	w.clientsMutex.RLock()
-	cfg := w.config
-	w.clientsMutex.RUnlock()
-
-	if w.reloadCallback != nil && cfg != nil {
-		log.Debugf("triggering server update callback after Kiro IDE token change")
-		w.reloadCallback(cfg)
-	}
-}
-
 func (w *Watcher) authFileUnchanged(path string) (bool, error) {
 	data, errRead := os.ReadFile(path)
 	if errRead != nil {
diff --git a/internal/watcher/synthesizer/config.go b/internal/watcher/synthesizer/config.go
index e044117ffe..b1ae588569 100644
--- a/internal/watcher/synthesizer/config.go
+++ b/internal/watcher/synthesizer/config.go
@@ -5,10 +5,8 @@ import (
 	"strconv"
 	"strings"
 
-	kiroauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kiro"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/watcher/diff"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
-	log "github.com/sirupsen/logrus"
 )
 
 // ConfigSynthesizer generates Auth entries from configuration API keys.
@@ -33,8 +31,6 @@ func (s *ConfigSynthesizer) Synthesize(ctx *SynthesisContext) ([]*coreauth.Auth,
 	out = append(out, s.synthesizeClaudeKeys(ctx)...)
 	// Codex API Keys
 	out = append(out, s.synthesizeCodexKeys(ctx)...)
-	// Kiro (AWS CodeWhisperer)
-	out = append(out, s.synthesizeKiroKeys(ctx)...)
 	// OpenAI-compat
 	out = append(out, s.synthesizeOpenAICompat(ctx)...)
 	// Vertex-compat
@@ -164,9 +160,6 @@ func (s *ConfigSynthesizer) synthesizeCodexKeys(ctx *SynthesisContext) []*coreau
 		if ck.BaseURL != "" {
 			attrs["base_url"] = ck.BaseURL
 		}
-		if ck.Websockets {
-			attrs["websockets"] = "true"
-		}
 		if hash := diff.ComputeCodexModelsHash(ck.Models); hash != "" {
 			attrs["models_hash"] = hash
 		}
@@ -324,96 +317,3 @@ func (s *ConfigSynthesizer) synthesizeVertexCompat(ctx *SynthesisContext) []*cor
 	}
 	return out
 }
-
-// synthesizeKiroKeys creates Auth entries for Kiro (AWS CodeWhisperer) tokens.
-func (s *ConfigSynthesizer) synthesizeKiroKeys(ctx *SynthesisContext) []*coreauth.Auth {
-	cfg := ctx.Config
-	now := ctx.Now
-	idGen := ctx.IDGenerator
-
-	if len(cfg.KiroKey) == 0 {
-		return nil
-	}
-
-	out := make([]*coreauth.Auth, 0, len(cfg.KiroKey))
-	kAuth := kiroauth.NewKiroAuth(cfg)
-
-	for i := range cfg.KiroKey {
-		kk := cfg.KiroKey[i]
-		var accessToken, profileArn, refreshToken string
-
-		// Try to load from token file first
-		if kk.TokenFile != "" && kAuth != nil {
-			tokenData, err := kAuth.LoadTokenFromFile(kk.TokenFile)
-			if err != nil {
-				log.Warnf("failed to load kiro token file %s: %v", kk.TokenFile, err)
-			} else {
-				accessToken = tokenData.AccessToken
-				profileArn = tokenData.ProfileArn
-				refreshToken = tokenData.RefreshToken
-			}
-		}
-
-		// Override with direct config values if provided
-		if kk.AccessToken != "" {
-			accessToken = kk.AccessToken
-		}
-		if kk.ProfileArn != "" {
-			profileArn = kk.ProfileArn
-		}
-		if kk.RefreshToken != "" {
-			refreshToken = kk.RefreshToken
-		}
-
-		if accessToken == "" {
-			log.Warnf("kiro config[%d] missing access_token, skipping", i)
-			continue
-		}
-
-		// profileArn is optional for AWS Builder ID users
-		id, token := idGen.Next("kiro:token", accessToken, profileArn)
-		attrs := map[string]string{
-			"source":       fmt.Sprintf("config:kiro[%s]", token),
-			"access_token": accessToken,
-		}
-		if profileArn != "" {
-			attrs["profile_arn"] = profileArn
-		}
-		if kk.Region != "" {
-			attrs["region"] = kk.Region
-		}
-		if kk.AgentTaskType != "" {
-			attrs["agent_task_type"] = kk.AgentTaskType
-		}
-		if kk.PreferredEndpoint != "" {
-			attrs["preferred_endpoint"] = kk.PreferredEndpoint
-		} else if cfg.KiroPreferredEndpoint != "" {
-			// Apply global default if not overridden by specific key
-			attrs["preferred_endpoint"] = cfg.KiroPreferredEndpoint
-		}
-		if refreshToken != "" {
-			attrs["refresh_token"] = refreshToken
-		}
-		proxyURL := strings.TrimSpace(kk.ProxyURL)
-		a := &coreauth.Auth{
-			ID:         id,
-			Provider:   "kiro",
-			Label:      "kiro-token",
-			Status:     coreauth.StatusActive,
-			ProxyURL:   proxyURL,
-			Attributes: attrs,
-			CreatedAt:  now,
-			UpdatedAt:  now,
-		}
-
-		if refreshToken != "" {
-			if a.Metadata == nil {
-				a.Metadata = make(map[string]any)
-			}
-			a.Metadata["refresh_token"] = refreshToken
-		}
-
-		out = append(out, a)
-	}
-	return out
-}
diff --git a/internal/watcher/synthesizer/config_test.go b/internal/watcher/synthesizer/config_test.go
index 437f18d11e..32af7c27fc 100644
--- a/internal/watcher/synthesizer/config_test.go
+++ b/internal/watcher/synthesizer/config_test.go
@@ -231,11 +231,10 @@ func TestConfigSynthesizer_CodexKeys(t *testing.T) {
 		Config: &config.Config{
 			CodexKey: []config.CodexKey{
 				{
-					APIKey:     "codex-key-123",
-					Prefix:     "dev",
-					BaseURL:    "https://api.openai.com",
-					ProxyURL:   "http://proxy.local",
-					Websockets: true,
+					APIKey:   "codex-key-123",
+					Prefix:   "dev",
+					BaseURL:  "https://api.openai.com",
+					ProxyURL: "http://proxy.local",
 				},
 			},
 		},
@@ -260,9 +259,6 @@ func TestConfigSynthesizer_CodexKeys(t *testing.T) {
 	if auths[0].ProxyURL != "http://proxy.local" {
 		t.Errorf("expected proxy_url http://proxy.local, got %s", auths[0].ProxyURL)
 	}
-	if auths[0].Attributes["websockets"] != "true" {
-		t.Errorf("expected websockets=true, got %s", auths[0].Attributes["websockets"])
-	}
 }
 
 func TestConfigSynthesizer_CodexKeys_SkipsEmptyAndHeaders(t *testing.T) {
diff --git a/internal/watcher/synthesizer/file.go b/internal/watcher/synthesizer/file.go
index 4e05311703..c80ebc6630 100644
--- a/internal/watcher/synthesizer/file.go
+++ b/internal/watcher/synthesizer/file.go
@@ -5,7 +5,6 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
-	"strconv"
 	"strings"
 	"time"
 
@@ -93,9 +92,6 @@ func (s *FileSynthesizer) Synthesize(ctx *SynthesisContext) ([]*coreauth.Auth, e
 			status = coreauth.StatusDisabled
 		}
 
-		// Read per-account excluded models from the OAuth JSON file
-		perAccountExcluded := extractExcludedModelsFromMetadata(metadata)
-
 		a := &coreauth.Auth{
 			ID:       id,
 			Provider: provider,
@@ -112,23 +108,11 @@ func (s *FileSynthesizer) Synthesize(ctx *SynthesisContext) ([]*coreauth.Auth, e
 			CreatedAt: now,
 			UpdatedAt: now,
 		}
-		// Read priority from auth file
-		if rawPriority, ok := metadata["priority"]; ok {
-			switch v := rawPriority.(type) {
-			case float64:
-				a.Attributes["priority"] = strconv.Itoa(int(v))
-			case string:
-				priority := strings.TrimSpace(v)
-				if _, errAtoi := strconv.Atoi(priority); errAtoi == nil {
-					a.Attributes["priority"] = priority
-				}
-			}
-		}
-		ApplyAuthExcludedModelsMeta(a, cfg, perAccountExcluded, "oauth")
+		ApplyAuthExcludedModelsMeta(a, cfg, nil, "oauth")
 		if provider == "gemini-cli" {
 			if virtuals := SynthesizeGeminiVirtualAuths(a, metadata, now); len(virtuals) > 0 {
 				for _, v := range virtuals {
-					ApplyAuthExcludedModelsMeta(v, cfg, perAccountExcluded, "oauth")
+					ApplyAuthExcludedModelsMeta(v, cfg, nil, "oauth")
 				}
 				out = append(out, a)
 				out = append(out, virtuals...)
@@ -183,10 +167,6 @@ func SynthesizeGeminiVirtualAuths(primary *coreauth.Auth, metadata map[string]an
 		if authPath != "" {
 			attrs["path"] = authPath
 		}
-		// Propagate priority from primary auth to virtual auths
-		if priorityVal, hasPriority := primary.Attributes["priority"]; hasPriority && priorityVal != "" {
-			attrs["priority"] = priorityVal
-		}
 		metadataCopy := map[string]any{
 			"email":             email,
 			"project_id":        projectID,
@@ -259,40 +239,3 @@ func buildGeminiVirtualID(baseID, projectID string) string {
 	replacer := strings.NewReplacer("/", "_", "\\", "_", " ", "_")
 	return fmt.Sprintf("%s::%s", baseID, replacer.Replace(project))
 }
-
-// extractExcludedModelsFromMetadata reads per-account excluded models from the OAuth JSON metadata.
-// Supports both "excluded_models" and "excluded-models" keys, and accepts both []string and []interface{}.
-func extractExcludedModelsFromMetadata(metadata map[string]any) []string {
-	if metadata == nil {
-		return nil
-	}
-	// Try both key formats
-	raw, ok := metadata["excluded_models"]
-	if !ok {
-		raw, ok = metadata["excluded-models"]
-	}
-	if !ok || raw == nil {
-		return nil
-	}
-	var stringSlice []string
-	switch v := raw.(type) {
-	case []string:
-		stringSlice = v
-	case []interface{}:
-		stringSlice = make([]string, 0, len(v))
-		for _, item := range v {
-			if s, ok := item.(string); ok {
-				stringSlice = append(stringSlice, s)
-			}
-		}
-	default:
-		return nil
-	}
-	result := make([]string, 0, len(stringSlice))
-	for _, s := range stringSlice {
-		if trimmed := strings.TrimSpace(s); trimmed != "" {
-			result = append(result, trimmed)
-		}
-	}
-	return result
-}
diff --git a/internal/watcher/synthesizer/file_test.go b/internal/watcher/synthesizer/file_test.go
index 105d920747..93025fbaa3 100644
--- a/internal/watcher/synthesizer/file_test.go
+++ b/internal/watcher/synthesizer/file_test.go
@@ -297,117 +297,6 @@ func TestFileSynthesizer_Synthesize_PrefixValidation(t *testing.T) {
 	}
 }
 
-func TestFileSynthesizer_Synthesize_PriorityParsing(t *testing.T) {
-	tests := []struct {
-		name     string
-		priority any
-		want     string
-		hasValue bool
-	}{
-		{
-			name:     "string with spaces",
-			priority: " 10 ",
-			want:     "10",
-			hasValue: true,
-		},
-		{
-			name:     "number",
-			priority: 8,
-			want:     "8",
-			hasValue: true,
-		},
-		{
-			name:     "invalid string",
-			priority: "1x",
-			hasValue: false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			tempDir := t.TempDir()
-			authData := map[string]any{
-				"type":     "claude",
-				"priority": tt.priority,
-			}
-			data, _ := json.Marshal(authData)
-			errWriteFile := os.WriteFile(filepath.Join(tempDir, "auth.json"), data, 0644)
-			if errWriteFile != nil {
-				t.Fatalf("failed to write auth file: %v", errWriteFile)
-			}
-
-			synth := NewFileSynthesizer()
-			ctx := &SynthesisContext{
-				Config:      &config.Config{},
-				AuthDir:     tempDir,
-				Now:         time.Now(),
-				IDGenerator: NewStableIDGenerator(),
-			}
-
-			auths, errSynthesize := synth.Synthesize(ctx)
-			if errSynthesize != nil {
-				t.Fatalf("unexpected error: %v", errSynthesize)
-			}
-			if len(auths) != 1 {
-				t.Fatalf("expected 1 auth, got %d", len(auths))
-			}
-
-			value, ok := auths[0].Attributes["priority"]
-			if tt.hasValue {
-				if !ok {
-					t.Fatal("expected priority attribute to be set")
-				}
-				if value != tt.want {
-					t.Fatalf("expected priority %q, got %q", tt.want, value)
-				}
-				return
-			}
-			if ok {
-				t.Fatalf("expected priority attribute to be absent, got %q", value)
-			}
-		})
-	}
-}
-
-func TestFileSynthesizer_Synthesize_OAuthExcludedModelsMerged(t *testing.T) {
-	tempDir := t.TempDir()
-	authData := map[string]any{
-		"type":            "claude",
-		"excluded_models": []string{"custom-model", "MODEL-B"},
-	}
-	data, _ := json.Marshal(authData)
-	errWriteFile := os.WriteFile(filepath.Join(tempDir, "auth.json"), data, 0644)
-	if errWriteFile != nil {
-		t.Fatalf("failed to write auth file: %v", errWriteFile)
-	}
-
-	synth := NewFileSynthesizer()
-	ctx := &SynthesisContext{
-		Config: &config.Config{
-			OAuthExcludedModels: map[string][]string{
-				"claude": {"shared", "model-b"},
-			},
-		},
-		AuthDir:     tempDir,
-		Now:         time.Now(),
-		IDGenerator: NewStableIDGenerator(),
-	}
-
-	auths, errSynthesize := synth.Synthesize(ctx)
-	if errSynthesize != nil {
-		t.Fatalf("unexpected error: %v", errSynthesize)
-	}
-	if len(auths) != 1 {
-		t.Fatalf("expected 1 auth, got %d", len(auths))
-	}
-
-	got := auths[0].Attributes["excluded_models"]
-	want := "custom-model,model-b,shared"
-	if got != want {
-		t.Fatalf("expected excluded_models %q, got %q", want, got)
-	}
-}
-
 func TestSynthesizeGeminiVirtualAuths_NilInputs(t *testing.T) {
 	now := time.Now()
 
@@ -644,7 +533,6 @@ func TestFileSynthesizer_Synthesize_MultiProjectGemini(t *testing.T) {
 		"type":       "gemini",
 		"email":      "multi@example.com",
 		"project_id": "project-a, project-b, project-c",
-		"priority":   " 10 ",
 	}
 	data, _ := json.Marshal(authData)
 	err := os.WriteFile(filepath.Join(tempDir, "gemini-multi.json"), data, 0644)
@@ -677,9 +565,6 @@ func TestFileSynthesizer_Synthesize_MultiProjectGemini(t *testing.T) {
 	if primary.Status != coreauth.StatusDisabled {
 		t.Errorf("expected primary status disabled, got %s", primary.Status)
 	}
-	if gotPriority := primary.Attributes["priority"]; gotPriority != "10" {
-		t.Errorf("expected primary priority 10, got %q", gotPriority)
-	}
 
 	// Remaining auths should be virtuals
 	for i := 1; i < 4; i++ {
@@ -690,9 +575,6 @@ func TestFileSynthesizer_Synthesize_MultiProjectGemini(t *testing.T) {
 		if v.Attributes["gemini_virtual_parent"] != primary.ID {
 			t.Errorf("expected virtual %d parent to be %s, got %s", i, primary.ID, v.Attributes["gemini_virtual_parent"])
 		}
-		if gotPriority := v.Attributes["priority"]; gotPriority != "10" {
-			t.Errorf("expected virtual %d priority 10, got %q", i, gotPriority)
-		}
 	}
 }
 
diff --git a/internal/watcher/synthesizer/helpers.go b/internal/watcher/synthesizer/helpers.go
index 102dc77e22..621f3600f6 100644
--- a/internal/watcher/synthesizer/helpers.go
+++ b/internal/watcher/synthesizer/helpers.go
@@ -53,8 +53,6 @@ func (g *StableIDGenerator) Next(kind string, parts ...string) (string, string)
 
 // ApplyAuthExcludedModelsMeta applies excluded models metadata to an auth entry.
 // It computes a hash of excluded models and sets the auth_kind attribute.
-// For OAuth entries, perKey (from the JSON file's excluded-models field) is merged
-// with the global oauth-excluded-models config for the provider.
 func ApplyAuthExcludedModelsMeta(auth *coreauth.Auth, cfg *config.Config, perKey []string, authKind string) {
 	if auth == nil || cfg == nil {
 		return
@@ -74,13 +72,9 @@ func ApplyAuthExcludedModelsMeta(auth *coreauth.Auth, cfg *config.Config, perKey
 	}
 	if authKindKey == "apikey" {
 		add(perKey)
-	} else {
-		// For OAuth: merge per-account excluded models with global provider-level exclusions
-		add(perKey)
-		if cfg.OAuthExcludedModels != nil {
-			providerKey := strings.ToLower(strings.TrimSpace(auth.Provider))
-			add(cfg.OAuthExcludedModels[providerKey])
-		}
+	} else if cfg.OAuthExcludedModels != nil {
+		providerKey := strings.ToLower(strings.TrimSpace(auth.Provider))
+		add(cfg.OAuthExcludedModels[providerKey])
 	}
 	combined := make([]string, 0, len(seen))
 	for k := range seen {
@@ -94,10 +88,6 @@ func ApplyAuthExcludedModelsMeta(auth *coreauth.Auth, cfg *config.Config, perKey
 	if hash != "" {
 		auth.Attributes["excluded_models_hash"] = hash
 	}
-	// Store the combined excluded models list so that routing can read it at runtime
-	if len(combined) > 0 {
-		auth.Attributes["excluded_models"] = strings.Join(combined, ",")
-	}
 	if authKind != "" {
 		auth.Attributes["auth_kind"] = authKind
 	}
diff --git a/internal/watcher/synthesizer/helpers_test.go b/internal/watcher/synthesizer/helpers_test.go
index 46b9c8a053..229c75bcca 100644
--- a/internal/watcher/synthesizer/helpers_test.go
+++ b/internal/watcher/synthesizer/helpers_test.go
@@ -6,7 +6,6 @@ import (
 	"testing"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/watcher/diff"
 	coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 )
 
@@ -201,30 +200,6 @@ func TestApplyAuthExcludedModelsMeta(t *testing.T) {
 	}
 }
 
-func TestApplyAuthExcludedModelsMeta_OAuthMergeWritesCombinedModels(t *testing.T) {
-	auth := &coreauth.Auth{
-		Provider:   "claude",
-		Attributes: make(map[string]string),
-	}
-	cfg := &config.Config{
-		OAuthExcludedModels: map[string][]string{
-			"claude": {"global-a", "shared"},
-		},
-	}
-
-	ApplyAuthExcludedModelsMeta(auth, cfg, []string{"per", "SHARED"}, "oauth")
-
-	const wantCombined = "global-a,per,shared"
-	if gotCombined := auth.Attributes["excluded_models"]; gotCombined != wantCombined {
-		t.Fatalf("expected excluded_models=%q, got %q", wantCombined, gotCombined)
-	}
-
-	expectedHash := diff.ComputeExcludedModelsHash([]string{"global-a", "per", "shared"})
-	if gotHash := auth.Attributes["excluded_models_hash"]; gotHash != expectedHash {
-		t.Fatalf("expected excluded_models_hash=%q, got %q", expectedHash, gotHash)
-	}
-}
-
 func TestAddConfigHeadersToAttrs(t *testing.T) {
 	tests := []struct {
 		name    string
diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go
index a451ef6eff..77006cf84a 100644
--- a/internal/watcher/watcher.go
+++ b/internal/watcher/watcher.go
@@ -38,7 +38,6 @@ type Watcher struct {
 	reloadCallback    func(*config.Config)
 	watcher           *fsnotify.Watcher
 	lastAuthHashes    map[string]string
-	lastAuthContents  map[string]*coreauth.Auth
 	lastRemoveTimes   map[string]time.Time
 	lastConfigHash    string
 	authQueue         chan<- AuthUpdate
@@ -146,111 +145,3 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 	w.clientsMutex.RUnlock()
 	return snapshotCoreAuths(cfg, w.authDir)
 }
-
-// NotifyTokenRefreshed 处理后台刷新器的 token 更新通知
-// 当后台刷新器成功刷新 token 后调用此方法，更新内存中的 Auth 对象
-// tokenID: token 文件名（如 kiro-xxx.json）
-// accessToken: 新的 access token
-// refreshToken: 新的 refresh token
-// expiresAt: 新的过期时间
-func (w *Watcher) NotifyTokenRefreshed(tokenID, accessToken, refreshToken, expiresAt string) {
-	if w == nil {
-		return
-	}
-
-	w.clientsMutex.Lock()
-	defer w.clientsMutex.Unlock()
-
-	// 遍历 currentAuths，找到匹配的 Auth 并更新
-	updated := false
-	for id, auth := range w.currentAuths {
-		if auth == nil || auth.Metadata == nil {
-			continue
-		}
-
-		// 检查是否是 kiro 类型的 auth
-		authType, _ := auth.Metadata["type"].(string)
-		if authType != "kiro" {
-			continue
-		}
-
-		// 多种匹配方式，解决不同来源的 auth 对象字段差异
-		matched := false
-
-		// 1. 通过 auth.ID 匹配（ID 可能包含文件名）
-		if !matched && auth.ID != "" {
-			if auth.ID == tokenID || strings.HasSuffix(auth.ID, "/"+tokenID) || strings.HasSuffix(auth.ID, "\\"+tokenID) {
-				matched = true
-			}
-			// ID 可能是 "kiro-xxx" 格式（无扩展名），tokenID 是 "kiro-xxx.json"
-			if !matched && strings.TrimSuffix(tokenID, ".json") == auth.ID {
-				matched = true
-			}
-		}
-
-		// 2. 通过 auth.Attributes["path"] 匹配
-		if !matched && auth.Attributes != nil {
-			if authPath := auth.Attributes["path"]; authPath != "" {
-				// 提取文件名部分进行比较
-				pathBase := authPath
-				if idx := strings.LastIndexAny(authPath, "/\\"); idx >= 0 {
-					pathBase = authPath[idx+1:]
-				}
-				if pathBase == tokenID || strings.TrimSuffix(pathBase, ".json") == strings.TrimSuffix(tokenID, ".json") {
-					matched = true
-				}
-			}
-		}
-
-		// 3. 通过 auth.FileName 匹配（原有逻辑）
-		if !matched && auth.FileName != "" {
-			if auth.FileName == tokenID || strings.HasSuffix(auth.FileName, "/"+tokenID) || strings.HasSuffix(auth.FileName, "\\"+tokenID) {
-				matched = true
-			}
-		}
-
-		if matched {
-			// 更新内存中的 token
-			auth.Metadata["access_token"] = accessToken
-			auth.Metadata["refresh_token"] = refreshToken
-			auth.Metadata["expires_at"] = expiresAt
-			auth.Metadata["last_refresh"] = time.Now().Format(time.RFC3339)
-			auth.UpdatedAt = time.Now()
-			auth.LastRefreshedAt = time.Now()
-
-			log.Infof("watcher: updated in-memory auth for token %s (auth ID: %s)", tokenID, id)
-			updated = true
-
-			// 同时更新 runtimeAuths 中的副本（如果存在）
-			if w.runtimeAuths != nil {
-				if runtimeAuth, ok := w.runtimeAuths[id]; ok && runtimeAuth != nil {
-					if runtimeAuth.Metadata == nil {
-						runtimeAuth.Metadata = make(map[string]any)
-					}
-					runtimeAuth.Metadata["access_token"] = accessToken
-					runtimeAuth.Metadata["refresh_token"] = refreshToken
-					runtimeAuth.Metadata["expires_at"] = expiresAt
-					runtimeAuth.Metadata["last_refresh"] = time.Now().Format(time.RFC3339)
-					runtimeAuth.UpdatedAt = time.Now()
-					runtimeAuth.LastRefreshedAt = time.Now()
-				}
-			}
-
-			// 发送更新通知到 authQueue
-			if w.authQueue != nil {
-				go func(authClone *coreauth.Auth) {
-					update := AuthUpdate{
-						Action: AuthUpdateActionModify,
-						ID:     authClone.ID,
-						Auth:   authClone,
-					}
-					w.dispatchAuthUpdates([]AuthUpdate{update})
-				}(auth.Clone())
-			}
-		}
-	}
-
-	if !updated {
-		log.Debugf("watcher: no matching auth found for token %s, will be picked up on next file scan", tokenID)
-	}
-}
diff --git a/sdk/access/errors.go b/sdk/access/errors.go
index 6f344bb0a2..6ea2cc1a2b 100644
--- a/sdk/access/errors.go
+++ b/sdk/access/errors.go
@@ -1,90 +1,12 @@
 package access
 
-import (
-	"fmt"
-	"net/http"
-	"strings"
+import "errors"
+
+var (
+	// ErrNoCredentials indicates no recognizable credentials were supplied.
+	ErrNoCredentials = errors.New("access: no credentials provided")
+	// ErrInvalidCredential signals that supplied credentials were rejected by a provider.
+	ErrInvalidCredential = errors.New("access: invalid credential")
+	// ErrNotHandled tells the manager to continue trying other providers.
+	ErrNotHandled = errors.New("access: not handled")
 )
-
-// AuthErrorCode classifies authentication failures.
-type AuthErrorCode string
-
-const (
-	AuthErrorCodeNoCredentials     AuthErrorCode = "no_credentials"
-	AuthErrorCodeInvalidCredential AuthErrorCode = "invalid_credential"
-	AuthErrorCodeNotHandled        AuthErrorCode = "not_handled"
-	AuthErrorCodeInternal          AuthErrorCode = "internal_error"
-)
-
-// AuthError carries authentication failure details and HTTP status.
-type AuthError struct {
-	Code       AuthErrorCode
-	Message    string
-	StatusCode int
-	Cause      error
-}
-
-func (e *AuthError) Error() string {
-	if e == nil {
-		return ""
-	}
-	message := strings.TrimSpace(e.Message)
-	if message == "" {
-		message = "authentication error"
-	}
-	if e.Cause != nil {
-		return fmt.Sprintf("%s: %v", message, e.Cause)
-	}
-	return message
-}
-
-func (e *AuthError) Unwrap() error {
-	if e == nil {
-		return nil
-	}
-	return e.Cause
-}
-
-// HTTPStatusCode returns a safe fallback for missing status codes.
-func (e *AuthError) HTTPStatusCode() int {
-	if e == nil || e.StatusCode <= 0 {
-		return http.StatusInternalServerError
-	}
-	return e.StatusCode
-}
-
-func newAuthError(code AuthErrorCode, message string, statusCode int, cause error) *AuthError {
-	return &AuthError{
-		Code:       code,
-		Message:    message,
-		StatusCode: statusCode,
-		Cause:      cause,
-	}
-}
-
-func NewNoCredentialsError() *AuthError {
-	return newAuthError(AuthErrorCodeNoCredentials, "Missing API key", http.StatusUnauthorized, nil)
-}
-
-func NewInvalidCredentialError() *AuthError {
-	return newAuthError(AuthErrorCodeInvalidCredential, "Invalid API key", http.StatusUnauthorized, nil)
-}
-
-func NewNotHandledError() *AuthError {
-	return newAuthError(AuthErrorCodeNotHandled, "authentication provider did not handle request", 0, nil)
-}
-
-func NewInternalAuthError(message string, cause error) *AuthError {
-	normalizedMessage := strings.TrimSpace(message)
-	if normalizedMessage == "" {
-		normalizedMessage = "Authentication service error"
-	}
-	return newAuthError(AuthErrorCodeInternal, normalizedMessage, http.StatusInternalServerError, cause)
-}
-
-func IsAuthErrorCode(authErr *AuthError, code AuthErrorCode) bool {
-	if authErr == nil {
-		return false
-	}
-	return authErr.Code == code
-}
diff --git a/sdk/access/manager.go b/sdk/access/manager.go
index 2d4b032639..fb5f8ccab6 100644
--- a/sdk/access/manager.go
+++ b/sdk/access/manager.go
@@ -2,6 +2,7 @@ package access
 
 import (
 	"context"
+	"errors"
 	"net/http"
 	"sync"
 )
@@ -42,7 +43,7 @@ func (m *Manager) Providers() []Provider {
 }
 
 // Authenticate evaluates providers until one succeeds.
-func (m *Manager) Authenticate(ctx context.Context, r *http.Request) (*Result, *AuthError) {
+func (m *Manager) Authenticate(ctx context.Context, r *http.Request) (*Result, error) {
 	if m == nil {
 		return nil, nil
 	}
@@ -60,29 +61,29 @@ func (m *Manager) Authenticate(ctx context.Context, r *http.Request) (*Result, *
 		if provider == nil {
 			continue
 		}
-		res, authErr := provider.Authenticate(ctx, r)
-		if authErr == nil {
+		res, err := provider.Authenticate(ctx, r)
+		if err == nil {
 			return res, nil
 		}
-		if IsAuthErrorCode(authErr, AuthErrorCodeNotHandled) {
+		if errors.Is(err, ErrNotHandled) {
 			continue
 		}
-		if IsAuthErrorCode(authErr, AuthErrorCodeNoCredentials) {
+		if errors.Is(err, ErrNoCredentials) {
 			missing = true
 			continue
 		}
-		if IsAuthErrorCode(authErr, AuthErrorCodeInvalidCredential) {
+		if errors.Is(err, ErrInvalidCredential) {
 			invalid = true
 			continue
 		}
-		return nil, authErr
+		return nil, err
 	}
 
 	if invalid {
-		return nil, NewInvalidCredentialError()
+		return nil, ErrInvalidCredential
 	}
 	if missing {
-		return nil, NewNoCredentialsError()
+		return nil, ErrNoCredentials
 	}
-	return nil, NewNoCredentialsError()
+	return nil, ErrNoCredentials
 }
diff --git a/sdk/access/registry.go b/sdk/access/registry.go
index cbb0d1c555..a29cdd96b6 100644
--- a/sdk/access/registry.go
+++ b/sdk/access/registry.go
@@ -2,15 +2,17 @@ package access
 
 import (
 	"context"
+	"fmt"
 	"net/http"
-	"strings"
 	"sync"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
 )
 
 // Provider validates credentials for incoming requests.
 type Provider interface {
 	Identifier() string
-	Authenticate(ctx context.Context, r *http.Request) (*Result, *AuthError)
+	Authenticate(ctx context.Context, r *http.Request) (*Result, error)
 }
 
 // Result conveys authentication outcome.
@@ -20,64 +22,66 @@ type Result struct {
 	Metadata  map[string]string
 }
 
+// ProviderFactory builds a provider from configuration data.
+type ProviderFactory func(cfg *config.AccessProvider, root *config.SDKConfig) (Provider, error)
+
 var (
 	registryMu sync.RWMutex
-	registry   = make(map[string]Provider)
-	order      []string
+	registry   = make(map[string]ProviderFactory)
 )
 
-// RegisterProvider registers a pre-built provider instance for a given type identifier.
-func RegisterProvider(typ string, provider Provider) {
-	normalizedType := strings.TrimSpace(typ)
-	if normalizedType == "" || provider == nil {
+// RegisterProvider registers a provider factory for a given type identifier.
+func RegisterProvider(typ string, factory ProviderFactory) {
+	if typ == "" || factory == nil {
 		return
 	}
-
 	registryMu.Lock()
-	if _, exists := registry[normalizedType]; !exists {
-		order = append(order, normalizedType)
-	}
-	registry[normalizedType] = provider
+	registry[typ] = factory
 	registryMu.Unlock()
 }
 
-// UnregisterProvider removes a provider by type identifier.
-func UnregisterProvider(typ string) {
-	normalizedType := strings.TrimSpace(typ)
-	if normalizedType == "" {
-		return
+func BuildProvider(cfg *config.AccessProvider, root *config.SDKConfig) (Provider, error) {
+	if cfg == nil {
+		return nil, fmt.Errorf("access: nil provider config")
 	}
-	registryMu.Lock()
-	if _, exists := registry[normalizedType]; !exists {
-		registryMu.Unlock()
-		return
+	registryMu.RLock()
+	factory, ok := registry[cfg.Type]
+	registryMu.RUnlock()
+	if !ok {
+		return nil, fmt.Errorf("access: provider type %q is not registered", cfg.Type)
 	}
-	delete(registry, normalizedType)
-	for index := range order {
-		if order[index] != normalizedType {
-			continue
-		}
-		order = append(order[:index], order[index+1:]...)
-		break
+	provider, err := factory(cfg, root)
+	if err != nil {
+		return nil, fmt.Errorf("access: failed to build provider %q: %w", cfg.Name, err)
 	}
-	registryMu.Unlock()
+	return provider, nil
 }
 
-// RegisteredProviders returns the global provider instances in registration order.
-func RegisteredProviders() []Provider {
-	registryMu.RLock()
-	if len(order) == 0 {
-		registryMu.RUnlock()
-		return nil
+// BuildProviders constructs providers declared in configuration.
+func BuildProviders(root *config.SDKConfig) ([]Provider, error) {
+	if root == nil {
+		return nil, nil
 	}
-	providers := make([]Provider, 0, len(order))
-	for _, providerType := range order {
-		provider, exists := registry[providerType]
-		if !exists || provider == nil {
+	providers := make([]Provider, 0, len(root.Access.Providers))
+	for i := range root.Access.Providers {
+		providerCfg := &root.Access.Providers[i]
+		if providerCfg.Type == "" {
 			continue
 		}
+		provider, err := BuildProvider(providerCfg, root)
+		if err != nil {
+			return nil, err
+		}
 		providers = append(providers, provider)
 	}
-	registryMu.RUnlock()
-	return providers
+	if len(providers) == 0 {
+		if inline := config.MakeInlineAPIKeyProvider(root.APIKeys); inline != nil {
+			provider, err := BuildProvider(inline, root)
+			if err != nil {
+				return nil, err
+			}
+			providers = append(providers, provider)
+		}
+	}
+	return providers, nil
 }
diff --git a/sdk/api/handlers/claude/code_handlers.go b/sdk/api/handlers/claude/code_handlers.go
index 074ffc0d07..22e10fa598 100644
--- a/sdk/api/handlers/claude/code_handlers.go
+++ b/sdk/api/handlers/claude/code_handlers.go
@@ -112,13 +112,12 @@ func (h *ClaudeCodeAPIHandler) ClaudeCountTokens(c *gin.Context) {
 
 	modelName := gjson.GetBytes(rawJSON, "model").String()
 
-	resp, upstreamHeaders, errMsg := h.ExecuteCountWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	resp, errMsg := h.ExecuteCountWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
 		cliCancel(errMsg.Error)
 		return
 	}
-	handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 	_, _ = c.Writer.Write(resp)
 	cliCancel()
 }
@@ -166,7 +165,7 @@ func (h *ClaudeCodeAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSO
 
 	modelName := gjson.GetBytes(rawJSON, "model").String()
 
-	resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
 	stopKeepAlive()
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
@@ -195,7 +194,6 @@ func (h *ClaudeCodeAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSO
 		}
 	}
 
-	handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 	_, _ = c.Writer.Write(resp)
 	cliCancel()
 }
@@ -227,7 +225,7 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [
 	// This allows proper cleanup and cancellation of ongoing requests
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
 
-	dataChan, upstreamHeaders, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
 	setSSEHeaders := func() {
 		c.Header("Content-Type", "text/event-stream")
 		c.Header("Cache-Control", "no-cache")
@@ -259,7 +257,6 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [
 			if !ok {
 				// Stream closed without data? Send DONE or just headers.
 				setSSEHeaders()
-				handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 				flusher.Flush()
 				cliCancel(nil)
 				return
@@ -267,7 +264,6 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [
 
 			// Success! Set headers now.
 			setSSEHeaders()
-			handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 
 			// Write the first chunk
 			if len(chunk) > 0 {
diff --git a/sdk/api/handlers/gemini/gemini-cli_handlers.go b/sdk/api/handlers/gemini/gemini-cli_handlers.go
index b5fd494375..917902e762 100644
--- a/sdk/api/handlers/gemini/gemini-cli_handlers.go
+++ b/sdk/api/handlers/gemini/gemini-cli_handlers.go
@@ -159,8 +159,7 @@ func (h *GeminiCLIAPIHandler) handleInternalStreamGenerateContent(c *gin.Context
 	modelName := modelResult.String()
 
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	dataChan, upstreamHeaders, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
-	handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
 	h.forwardCLIStream(c, flusher, "", func(err error) { cliCancel(err) }, dataChan, errChan)
 	return
 }
@@ -173,13 +172,12 @@ func (h *GeminiCLIAPIHandler) handleInternalGenerateContent(c *gin.Context, rawJ
 	modelName := modelResult.String()
 
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
 		cliCancel(errMsg.Error)
 		return
 	}
-	handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 	_, _ = c.Writer.Write(resp)
 	cliCancel()
 }
@@ -187,7 +185,8 @@ func (h *GeminiCLIAPIHandler) handleInternalGenerateContent(c *gin.Context, rawJ
 func (h *GeminiCLIAPIHandler) forwardCLIStream(c *gin.Context, flusher http.Flusher, alt string, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
 	var keepAliveInterval *time.Duration
 	if alt != "" {
-		keepAliveInterval = new(time.Duration(0))
+		disabled := time.Duration(0)
+		keepAliveInterval = &disabled
 	}
 
 	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
diff --git a/sdk/api/handlers/gemini/gemini_handlers.go b/sdk/api/handlers/gemini/gemini_handlers.go
index e51ad19bc5..71c485ad01 100644
--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -188,7 +188,7 @@ func (h *GeminiAPIHandler) handleStreamGenerateContent(c *gin.Context, modelName
 	}
 
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	dataChan, upstreamHeaders, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
 
 	setSSEHeaders := func() {
 		c.Header("Content-Type", "text/event-stream")
@@ -223,7 +223,6 @@ func (h *GeminiAPIHandler) handleStreamGenerateContent(c *gin.Context, modelName
 				if alt == "" {
 					setSSEHeaders()
 				}
-				handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 				flusher.Flush()
 				cliCancel(nil)
 				return
@@ -233,7 +232,6 @@ func (h *GeminiAPIHandler) handleStreamGenerateContent(c *gin.Context, modelName
 			if alt == "" {
 				setSSEHeaders()
 			}
-			handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 
 			// Write first chunk
 			if alt == "" {
@@ -264,13 +262,12 @@ func (h *GeminiAPIHandler) handleCountTokens(c *gin.Context, modelName string, r
 	c.Header("Content-Type", "application/json")
 	alt := h.GetAlt(c)
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	resp, upstreamHeaders, errMsg := h.ExecuteCountWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	resp, errMsg := h.ExecuteCountWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
 		cliCancel(errMsg.Error)
 		return
 	}
-	handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 	_, _ = c.Writer.Write(resp)
 	cliCancel()
 }
@@ -289,14 +286,13 @@ func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName strin
 	alt := h.GetAlt(c)
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
 	stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
-	resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
 	stopKeepAlive()
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
 		cliCancel(errMsg.Error)
 		return
 	}
-	handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 	_, _ = c.Writer.Write(resp)
 	cliCancel()
 }
@@ -304,7 +300,8 @@ func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName strin
 func (h *GeminiAPIHandler) forwardGeminiStream(c *gin.Context, flusher http.Flusher, alt string, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
 	var keepAliveInterval *time.Duration
 	if alt != "" {
-		keepAliveInterval = new(time.Duration(0))
+		disabled := time.Duration(0)
+		keepAliveInterval = &disabled
 	}
 
 	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
diff --git a/sdk/api/handlers/handlers.go b/sdk/api/handlers/handlers.go
index 5d43fc58fa..36ffe2074d 100644
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -14,7 +14,7 @@ import (
 
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
-	"github.com/router-for-me/CLIProxyAPI/v6/pkg/llmproxy/interfaces"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
@@ -52,45 +52,6 @@ const (
 	defaultStreamingBootstrapRetries = 0
 )
 
-type pinnedAuthContextKey struct{}
-type selectedAuthCallbackContextKey struct{}
-type executionSessionContextKey struct{}
-
-// WithPinnedAuthID returns a child context that requests execution on a specific auth ID.
-func WithPinnedAuthID(ctx context.Context, authID string) context.Context {
-	authID = strings.TrimSpace(authID)
-	if authID == "" {
-		return ctx
-	}
-	if ctx == nil {
-		ctx = context.Background()
-	}
-	return context.WithValue(ctx, pinnedAuthContextKey{}, authID)
-}
-
-// WithSelectedAuthIDCallback returns a child context that receives the selected auth ID.
-func WithSelectedAuthIDCallback(ctx context.Context, callback func(string)) context.Context {
-	if callback == nil {
-		return ctx
-	}
-	if ctx == nil {
-		ctx = context.Background()
-	}
-	return context.WithValue(ctx, selectedAuthCallbackContextKey{}, callback)
-}
-
-// WithExecutionSessionID returns a child context tagged with a long-lived execution session ID.
-func WithExecutionSessionID(ctx context.Context, sessionID string) context.Context {
-	sessionID = strings.TrimSpace(sessionID)
-	if sessionID == "" {
-		return ctx
-	}
-	if ctx == nil {
-		ctx = context.Background()
-	}
-	return context.WithValue(ctx, executionSessionContextKey{}, sessionID)
-}
-
 // BuildErrorResponseBody builds an OpenAI-compatible JSON error response body.
 // If errText is already valid JSON, it is returned as-is to preserve upstream error payloads.
 func BuildErrorResponseBody(status int, errText string) []byte {
@@ -179,12 +140,6 @@ func StreamingBootstrapRetries(cfg *config.SDKConfig) int {
 	return retries
 }
 
-// PassthroughHeadersEnabled returns whether upstream response headers should be forwarded to clients.
-// Default is false.
-func PassthroughHeadersEnabled(cfg *config.SDKConfig) bool {
-	return cfg != nil && cfg.PassthroughHeaders
-}
-
 func requestExecutionMetadata(ctx context.Context) map[string]any {
 	// Idempotency-Key is an optional client-supplied header used to correlate retries.
 	// It is forwarded as execution metadata; when absent we generate a UUID.
@@ -197,59 +152,21 @@ func requestExecutionMetadata(ctx context.Context) map[string]any {
 	if key == "" {
 		key = uuid.NewString()
 	}
-
-	meta := map[string]any{idempotencyKeyMetadataKey: key}
-	if pinnedAuthID := pinnedAuthIDFromContext(ctx); pinnedAuthID != "" {
-		meta[coreexecutor.PinnedAuthMetadataKey] = pinnedAuthID
-	}
-	if selectedCallback := selectedAuthIDCallbackFromContext(ctx); selectedCallback != nil {
-		meta[coreexecutor.SelectedAuthCallbackMetadataKey] = selectedCallback
-	}
-	if executionSessionID := executionSessionIDFromContext(ctx); executionSessionID != "" {
-		meta[coreexecutor.ExecutionSessionMetadataKey] = executionSessionID
-	}
-	return meta
-}
-
-func pinnedAuthIDFromContext(ctx context.Context) string {
-	if ctx == nil {
-		return ""
-	}
-	raw := ctx.Value(pinnedAuthContextKey{})
-	switch v := raw.(type) {
-	case string:
-		return strings.TrimSpace(v)
-	case []byte:
-		return strings.TrimSpace(string(v))
-	default:
-		return ""
-	}
+	return map[string]any{idempotencyKeyMetadataKey: key}
 }
 
-func selectedAuthIDCallbackFromContext(ctx context.Context) func(string) {
-	if ctx == nil {
+func mergeMetadata(base, overlay map[string]any) map[string]any {
+	if len(base) == 0 && len(overlay) == 0 {
 		return nil
 	}
-	raw := ctx.Value(selectedAuthCallbackContextKey{})
-	if callback, ok := raw.(func(string)); ok && callback != nil {
-		return callback
-	}
-	return nil
-}
-
-func executionSessionIDFromContext(ctx context.Context) string {
-	if ctx == nil {
-		return ""
+	out := make(map[string]any, len(base)+len(overlay))
+	for k, v := range base {
+		out[k] = v
 	}
-	raw := ctx.Value(executionSessionContextKey{})
-	switch v := raw.(type) {
-	case string:
-		return strings.TrimSpace(v)
-	case []byte:
-		return strings.TrimSpace(string(v))
-	default:
-		return ""
+	for k, v := range overlay {
+		out[k] = v
 	}
+	return out
 }
 
 // BaseAPIHandler contains the handlers for API endpoints.
@@ -273,11 +190,10 @@ type BaseAPIHandler struct {
 // Returns:
 //   - *BaseAPIHandler: A new API handlers instance
 func NewBaseAPIHandlers(cfg *config.SDKConfig, authManager *coreauth.Manager) *BaseAPIHandler {
-	h := &BaseAPIHandler{
+	return &BaseAPIHandler{
 		Cfg:         cfg,
 		AuthManager: authManager,
 	}
-	return h
 }
 
 // UpdateClients updates the handlers' client list and configuration.
@@ -339,16 +255,15 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
 			parentCtx = logging.WithRequestID(parentCtx, requestID)
 		}
 	}
-	newCtx, cancel := context.WithCancel(parentCtx)
-	if requestCtx != nil && requestCtx != parentCtx {
-		go func() {
-			select {
-			case <-requestCtx.Done():
-				cancel()
-			case <-newCtx.Done():
-			}
-		}()
+
+	// Use requestCtx as base if available to preserve amp context values (fallback_models, etc.)
+	// Falls back to parentCtx if no request context
+	baseCtx := parentCtx
+	if requestCtx != nil {
+		baseCtx = requestCtx
 	}
+
+	newCtx, cancel := context.WithCancel(baseCtx)
 	newCtx = context.WithValue(newCtx, "gin", c)
 	newCtx = context.WithValue(newCtx, "handler", handler)
 	return newCtx, func(params ...interface{}) {
@@ -468,25 +383,21 @@ func appendAPIResponse(c *gin.Context, data []byte) {
 
 // ExecuteWithAuthManager executes a non-streaming request via the core auth manager.
 // This path is the only supported execution route.
-func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, http.Header, *interfaces.ErrorMessage) {
+func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
 	providers, normalizedModel, errMsg := h.getRequestDetails(modelName)
 	if errMsg != nil {
-		return nil, nil, errMsg
+		return nil, errMsg
 	}
 	reqMeta := requestExecutionMetadata(ctx)
 	reqMeta[coreexecutor.RequestedModelMetadataKey] = normalizedModel
-	payload := rawJSON
-	if len(payload) == 0 {
-		payload = nil
-	}
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
-		Payload: payload,
+		Payload: cloneBytes(rawJSON),
 	}
 	opts := coreexecutor.Options{
 		Stream:          false,
 		Alt:             alt,
-		OriginalRequest: rawJSON,
+		OriginalRequest: cloneBytes(rawJSON),
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
 	opts.Metadata = reqMeta
@@ -504,35 +415,28 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
 				addon = hdr.Clone()
 			}
 		}
-		return nil, nil, &interfaces.ErrorMessage{StatusCode: status, Error: err, Addon: addon}
+		return nil, &interfaces.ErrorMessage{StatusCode: status, Error: err, Addon: addon}
 	}
-	if !PassthroughHeadersEnabled(h.Cfg) {
-		return resp.Payload, nil, nil
-	}
-	return resp.Payload, FilterUpstreamHeaders(resp.Headers), nil
+	return cloneBytes(resp.Payload), nil
 }
 
 // ExecuteCountWithAuthManager executes a non-streaming request via the core auth manager.
 // This path is the only supported execution route.
-func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, http.Header, *interfaces.ErrorMessage) {
+func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
 	providers, normalizedModel, errMsg := h.getRequestDetails(modelName)
 	if errMsg != nil {
-		return nil, nil, errMsg
+		return nil, errMsg
 	}
 	reqMeta := requestExecutionMetadata(ctx)
 	reqMeta[coreexecutor.RequestedModelMetadataKey] = normalizedModel
-	payload := rawJSON
-	if len(payload) == 0 {
-		payload = nil
-	}
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
-		Payload: payload,
+		Payload: cloneBytes(rawJSON),
 	}
 	opts := coreexecutor.Options{
 		Stream:          false,
 		Alt:             alt,
-		OriginalRequest: rawJSON,
+		OriginalRequest: cloneBytes(rawJSON),
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
 	opts.Metadata = reqMeta
@@ -550,43 +454,35 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
 				addon = hdr.Clone()
 			}
 		}
-		return nil, nil, &interfaces.ErrorMessage{StatusCode: status, Error: err, Addon: addon}
-	}
-	if !PassthroughHeadersEnabled(h.Cfg) {
-		return resp.Payload, nil, nil
+		return nil, &interfaces.ErrorMessage{StatusCode: status, Error: err, Addon: addon}
 	}
-	return resp.Payload, FilterUpstreamHeaders(resp.Headers), nil
+	return cloneBytes(resp.Payload), nil
 }
 
 // ExecuteStreamWithAuthManager executes a streaming request via the core auth manager.
 // This path is the only supported execution route.
-// The returned http.Header carries upstream response headers captured before streaming begins.
-func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) (<-chan []byte, http.Header, <-chan *interfaces.ErrorMessage) {
+func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) (<-chan []byte, <-chan *interfaces.ErrorMessage) {
 	providers, normalizedModel, errMsg := h.getRequestDetails(modelName)
 	if errMsg != nil {
 		errChan := make(chan *interfaces.ErrorMessage, 1)
 		errChan <- errMsg
 		close(errChan)
-		return nil, nil, errChan
+		return nil, errChan
 	}
 	reqMeta := requestExecutionMetadata(ctx)
 	reqMeta[coreexecutor.RequestedModelMetadataKey] = normalizedModel
-	payload := rawJSON
-	if len(payload) == 0 {
-		payload = nil
-	}
 	req := coreexecutor.Request{
 		Model:   normalizedModel,
-		Payload: payload,
+		Payload: cloneBytes(rawJSON),
 	}
 	opts := coreexecutor.Options{
 		Stream:          true,
 		Alt:             alt,
-		OriginalRequest: rawJSON,
+		OriginalRequest: cloneBytes(rawJSON),
 		SourceFormat:    sdktranslator.FromString(handlerType),
 	}
 	opts.Metadata = reqMeta
-	streamResult, err := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
+	chunks, err := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
 	if err != nil {
 		errChan := make(chan *interfaces.ErrorMessage, 1)
 		status := http.StatusInternalServerError
@@ -603,19 +499,8 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 		}
 		errChan <- &interfaces.ErrorMessage{StatusCode: status, Error: err, Addon: addon}
 		close(errChan)
-		return nil, nil, errChan
-	}
-	passthroughHeadersEnabled := PassthroughHeadersEnabled(h.Cfg)
-	// Capture upstream headers from the initial connection synchronously before the goroutine starts.
-	// Keep a mutable map so bootstrap retries can replace it before first payload is sent.
-	var upstreamHeaders http.Header
-	if passthroughHeadersEnabled {
-		upstreamHeaders = cloneHeader(FilterUpstreamHeaders(streamResult.Headers))
-		if upstreamHeaders == nil {
-			upstreamHeaders = make(http.Header)
-		}
+		return nil, errChan
 	}
-	chunks := streamResult.Chunks
 	dataChan := make(chan []byte)
 	errChan := make(chan *interfaces.ErrorMessage, 1)
 	go func() {
@@ -689,12 +574,9 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 					if !sentPayload {
 						if bootstrapRetries < maxBootstrapRetries && bootstrapEligible(streamErr) {
 							bootstrapRetries++
-							retryResult, retryErr := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
+							retryChunks, retryErr := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
 							if retryErr == nil {
-								if passthroughHeadersEnabled {
-									replaceHeader(upstreamHeaders, FilterUpstreamHeaders(retryResult.Headers))
-								}
-								chunks = retryResult.Chunks
+								chunks = retryChunks
 								continue outer
 							}
 							streamErr = retryErr
@@ -725,7 +607,7 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 			}
 		}
 	}()
-	return dataChan, upstreamHeaders, errChan
+	return dataChan, errChan
 }
 
 func statusFromError(err error) int {
@@ -785,33 +667,24 @@ func cloneBytes(src []byte) []byte {
 	return dst
 }
 
-func cloneHeader(src http.Header) http.Header {
-	if src == nil {
+func cloneMetadata(src map[string]any) map[string]any {
+	if len(src) == 0 {
 		return nil
 	}
-	dst := make(http.Header, len(src))
-	for key, values := range src {
-		dst[key] = append([]string(nil), values...)
+	dst := make(map[string]any, len(src))
+	for k, v := range src {
+		dst[k] = v
 	}
 	return dst
 }
 
-func replaceHeader(dst http.Header, src http.Header) {
-	for key := range dst {
-		delete(dst, key)
-	}
-	for key, values := range src {
-		dst[key] = append([]string(nil), values...)
-	}
-}
-
 // WriteErrorResponse writes an error message to the response writer using the HTTP status embedded in the message.
 func (h *BaseAPIHandler) WriteErrorResponse(c *gin.Context, msg *interfaces.ErrorMessage) {
 	status := http.StatusInternalServerError
 	if msg != nil && msg.StatusCode > 0 {
 		status = msg.StatusCode
 	}
-	if msg != nil && msg.Addon != nil && PassthroughHeadersEnabled(h.Cfg) {
+	if msg != nil && msg.Addon != nil {
 		for key, values := range msg.Addon {
 			if len(values) == 0 {
 				continue
@@ -835,7 +708,7 @@ func (h *BaseAPIHandler) WriteErrorResponse(c *gin.Context, msg *interfaces.Erro
 	var previous []byte
 	if existing, exists := c.Get("API_RESPONSE"); exists {
 		if existingBytes, ok := existing.([]byte); ok && len(existingBytes) > 0 {
-			previous = existingBytes
+			previous = bytes.Clone(existingBytes)
 		}
 	}
 	appendAPIResponse(c, body)
diff --git a/sdk/api/handlers/handlers_stream_bootstrap_test.go b/sdk/api/handlers/handlers_stream_bootstrap_test.go
index ba9dcac598..7814ff1b86 100644
--- a/sdk/api/handlers/handlers_stream_bootstrap_test.go
+++ b/sdk/api/handlers/handlers_stream_bootstrap_test.go
@@ -23,7 +23,7 @@ func (e *failOnceStreamExecutor) Execute(context.Context, *coreauth.Auth, coreex
 	return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "Execute not implemented"}
 }
 
-func (e *failOnceStreamExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (*coreexecutor.StreamResult, error) {
+func (e *failOnceStreamExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (<-chan coreexecutor.StreamChunk, error) {
 	e.mu.Lock()
 	e.calls++
 	call := e.calls
@@ -40,18 +40,12 @@ func (e *failOnceStreamExecutor) ExecuteStream(context.Context, *coreauth.Auth,
 			},
 		}
 		close(ch)
-		return &coreexecutor.StreamResult{
-			Headers: http.Header{"X-Upstream-Attempt": {"1"}},
-			Chunks:  ch,
-		}, nil
+		return ch, nil
 	}
 
 	ch <- coreexecutor.StreamChunk{Payload: []byte("ok")}
 	close(ch)
-	return &coreexecutor.StreamResult{
-		Headers: http.Header{"X-Upstream-Attempt": {"2"}},
-		Chunks:  ch,
-	}, nil
+	return ch, nil
 }
 
 func (e *failOnceStreamExecutor) Refresh(ctx context.Context, auth *coreauth.Auth) (*coreauth.Auth, error) {
@@ -87,7 +81,7 @@ func (e *payloadThenErrorStreamExecutor) Execute(context.Context, *coreauth.Auth
 	return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "Execute not implemented"}
 }
 
-func (e *payloadThenErrorStreamExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (*coreexecutor.StreamResult, error) {
+func (e *payloadThenErrorStreamExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (<-chan coreexecutor.StreamChunk, error) {
 	e.mu.Lock()
 	e.calls++
 	e.mu.Unlock()
@@ -103,7 +97,7 @@ func (e *payloadThenErrorStreamExecutor) ExecuteStream(context.Context, *coreaut
 		},
 	}
 	close(ch)
-	return &coreexecutor.StreamResult{Chunks: ch}, nil
+	return ch, nil
 }
 
 func (e *payloadThenErrorStreamExecutor) Refresh(ctx context.Context, auth *coreauth.Auth) (*coreauth.Auth, error) {
@@ -128,82 +122,6 @@ func (e *payloadThenErrorStreamExecutor) Calls() int {
 	return e.calls
 }
 
-type authAwareStreamExecutor struct {
-	mu      sync.Mutex
-	calls   int
-	authIDs []string
-}
-
-func (e *authAwareStreamExecutor) Identifier() string { return "codex" }
-
-func (e *authAwareStreamExecutor) Execute(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
-	return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "Execute not implemented"}
-}
-
-func (e *authAwareStreamExecutor) ExecuteStream(ctx context.Context, auth *coreauth.Auth, req coreexecutor.Request, opts coreexecutor.Options) (*coreexecutor.StreamResult, error) {
-	_ = ctx
-	_ = req
-	_ = opts
-	ch := make(chan coreexecutor.StreamChunk, 1)
-
-	authID := ""
-	if auth != nil {
-		authID = auth.ID
-	}
-
-	e.mu.Lock()
-	e.calls++
-	e.authIDs = append(e.authIDs, authID)
-	e.mu.Unlock()
-
-	if authID == "auth1" {
-		ch <- coreexecutor.StreamChunk{
-			Err: &coreauth.Error{
-				Code:       "unauthorized",
-				Message:    "unauthorized",
-				Retryable:  false,
-				HTTPStatus: http.StatusUnauthorized,
-			},
-		}
-		close(ch)
-		return &coreexecutor.StreamResult{Chunks: ch}, nil
-	}
-
-	ch <- coreexecutor.StreamChunk{Payload: []byte("ok")}
-	close(ch)
-	return &coreexecutor.StreamResult{Chunks: ch}, nil
-}
-
-func (e *authAwareStreamExecutor) Refresh(ctx context.Context, auth *coreauth.Auth) (*coreauth.Auth, error) {
-	return auth, nil
-}
-
-func (e *authAwareStreamExecutor) CountTokens(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
-	return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "CountTokens not implemented"}
-}
-
-func (e *authAwareStreamExecutor) HttpRequest(ctx context.Context, auth *coreauth.Auth, req *http.Request) (*http.Response, error) {
-	return nil, &coreauth.Error{
-		Code:       "not_implemented",
-		Message:    "HttpRequest not implemented",
-		HTTPStatus: http.StatusNotImplemented,
-	}
-}
-
-func (e *authAwareStreamExecutor) Calls() int {
-	e.mu.Lock()
-	defer e.mu.Unlock()
-	return e.calls
-}
-
-func (e *authAwareStreamExecutor) AuthIDs() []string {
-	e.mu.Lock()
-	defer e.mu.Unlock()
-	out := make([]string, len(e.authIDs))
-	copy(out, e.authIDs)
-	return out
-}
-
 func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) {
 	executor := &failOnceStreamExecutor{}
 	manager := coreauth.NewManager(nil, nil, nil)
@@ -237,12 +155,11 @@ func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) {
 	})
 
 	handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{
-		PassthroughHeaders: true,
 		Streaming: sdkconfig.StreamingConfig{
 			BootstrapRetries: 1,
 		},
 	}, manager)
-	dataChan, upstreamHeaders, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "")
+	dataChan, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "")
 	if dataChan == nil || errChan == nil {
 		t.Fatalf("expected non-nil channels")
 	}
@@ -264,70 +181,6 @@ func TestExecuteStreamWithAuthManager_RetriesBeforeFirstByte(t *testing.T) {
 	if executor.Calls() != 2 {
 		t.Fatalf("expected 2 stream attempts, got %d", executor.Calls())
 	}
-	upstreamAttemptHeader := upstreamHeaders.Get("X-Upstream-Attempt")
-	if upstreamAttemptHeader != "2" {
-		t.Fatalf("expected upstream header from retry attempt, got %q", upstreamAttemptHeader)
-	}
-}
-
-func TestExecuteStreamWithAuthManager_HeaderPassthroughDisabledByDefault(t *testing.T) {
-	executor := &failOnceStreamExecutor{}
-	manager := coreauth.NewManager(nil, nil, nil)
-	manager.RegisterExecutor(executor)
-
-	auth1 := &coreauth.Auth{
-		ID:       "auth1",
-		Provider: "codex",
-		Status:   coreauth.StatusActive,
-		Metadata: map[string]any{"email": "test1@example.com"},
-	}
-	if _, err := manager.Register(context.Background(), auth1); err != nil {
-		t.Fatalf("manager.Register(auth1): %v", err)
-	}
-
-	auth2 := &coreauth.Auth{
-		ID:       "auth2",
-		Provider: "codex",
-		Status:   coreauth.StatusActive,
-		Metadata: map[string]any{"email": "test2@example.com"},
-	}
-	if _, err := manager.Register(context.Background(), auth2); err != nil {
-		t.Fatalf("manager.Register(auth2): %v", err)
-	}
-
-	registry.GetGlobalRegistry().RegisterClient(auth1.ID, auth1.Provider, []*registry.ModelInfo{{ID: "test-model"}})
-	registry.GetGlobalRegistry().RegisterClient(auth2.ID, auth2.Provider, []*registry.ModelInfo{{ID: "test-model"}})
-	t.Cleanup(func() {
-		registry.GetGlobalRegistry().UnregisterClient(auth1.ID)
-		registry.GetGlobalRegistry().UnregisterClient(auth2.ID)
-	})
-
-	handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{
-		Streaming: sdkconfig.StreamingConfig{
-			BootstrapRetries: 1,
-		},
-	}, manager)
-	dataChan, upstreamHeaders, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "")
-	if dataChan == nil || errChan == nil {
-		t.Fatalf("expected non-nil channels")
-	}
-
-	var got []byte
-	for chunk := range dataChan {
-		got = append(got, chunk...)
-	}
-	for msg := range errChan {
-		if msg != nil {
-			t.Fatalf("unexpected error: %+v", msg)
-		}
-	}
-
-	if string(got) != "ok" {
-		t.Fatalf("expected payload ok, got %q", string(got))
-	}
-	if upstreamHeaders != nil {
-		t.Fatalf("expected nil upstream headers when passthrough is disabled, got %#v", upstreamHeaders)
-	}
 }
 
 func TestExecuteStreamWithAuthManager_DoesNotRetryAfterFirstByte(t *testing.T) {
@@ -367,7 +220,7 @@ func TestExecuteStreamWithAuthManager_DoesNotRetryAfterFirstByte(t *testing.T) {
 			BootstrapRetries: 1,
 		},
 	}, manager)
-	dataChan, _, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "")
+	dataChan, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "")
 	if dataChan == nil || errChan == nil {
 		t.Fatalf("expected non-nil channels")
 	}
@@ -399,128 +252,3 @@ func TestExecuteStreamWithAuthManager_DoesNotRetryAfterFirstByte(t *testing.T) {
 		t.Fatalf("expected 1 stream attempt, got %d", executor.Calls())
 	}
 }
-
-func TestExecuteStreamWithAuthManager_PinnedAuthKeepsSameUpstream(t *testing.T) {
-	executor := &authAwareStreamExecutor{}
-	manager := coreauth.NewManager(nil, nil, nil)
-	manager.RegisterExecutor(executor)
-
-	auth1 := &coreauth.Auth{
-		ID:       "auth1",
-		Provider: "codex",
-		Status:   coreauth.StatusActive,
-		Metadata: map[string]any{"email": "test1@example.com"},
-	}
-	if _, err := manager.Register(context.Background(), auth1); err != nil {
-		t.Fatalf("manager.Register(auth1): %v", err)
-	}
-
-	auth2 := &coreauth.Auth{
-		ID:       "auth2",
-		Provider: "codex",
-		Status:   coreauth.StatusActive,
-		Metadata: map[string]any{"email": "test2@example.com"},
-	}
-	if _, err := manager.Register(context.Background(), auth2); err != nil {
-		t.Fatalf("manager.Register(auth2): %v", err)
-	}
-
-	registry.GetGlobalRegistry().RegisterClient(auth1.ID, auth1.Provider, []*registry.ModelInfo{{ID: "test-model"}})
-	registry.GetGlobalRegistry().RegisterClient(auth2.ID, auth2.Provider, []*registry.ModelInfo{{ID: "test-model"}})
-	t.Cleanup(func() {
-		registry.GetGlobalRegistry().UnregisterClient(auth1.ID)
-		registry.GetGlobalRegistry().UnregisterClient(auth2.ID)
-	})
-
-	handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{
-		Streaming: sdkconfig.StreamingConfig{
-			BootstrapRetries: 1,
-		},
-	}, manager)
-	ctx := WithPinnedAuthID(context.Background(), "auth1")
-	dataChan, _, errChan := handler.ExecuteStreamWithAuthManager(ctx, "openai", "test-model", []byte(`{"model":"test-model"}`), "")
-	if dataChan == nil || errChan == nil {
-		t.Fatalf("expected non-nil channels")
-	}
-
-	var got []byte
-	for chunk := range dataChan {
-		got = append(got, chunk...)
-	}
-
-	var gotErr error
-	for msg := range errChan {
-		if msg != nil && msg.Error != nil {
-			gotErr = msg.Error
-		}
-	}
-
-	if len(got) != 0 {
-		t.Fatalf("expected empty payload, got %q", string(got))
-	}
-	if gotErr == nil {
-		t.Fatalf("expected terminal error, got nil")
-	}
-	authIDs := executor.AuthIDs()
-	if len(authIDs) == 0 {
-		t.Fatalf("expected at least one upstream attempt")
-	}
-	for _, authID := range authIDs {
-		if authID != "auth1" {
-			t.Fatalf("expected all attempts on auth1, got sequence %v", authIDs)
-		}
-	}
-}
-
-func TestExecuteStreamWithAuthManager_SelectedAuthCallbackReceivesAuthID(t *testing.T) {
-	executor := &authAwareStreamExecutor{}
-	manager := coreauth.NewManager(nil, nil, nil)
-	manager.RegisterExecutor(executor)
-
-	auth2 := &coreauth.Auth{
-		ID:       "auth2",
-		Provider: "codex",
-		Status:   coreauth.StatusActive,
-		Metadata: map[string]any{"email": "test2@example.com"},
-	}
-	if _, err := manager.Register(context.Background(), auth2); err != nil {
-		t.Fatalf("manager.Register(auth2): %v", err)
-	}
-
-	registry.GetGlobalRegistry().RegisterClient(auth2.ID, auth2.Provider, []*registry.ModelInfo{{ID: "test-model"}})
-	t.Cleanup(func() {
-		registry.GetGlobalRegistry().UnregisterClient(auth2.ID)
-	})
-
-	handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{
-		Streaming: sdkconfig.StreamingConfig{
-			BootstrapRetries: 0,
-		},
-	}, manager)
-
-	selectedAuthID := ""
-	ctx := WithSelectedAuthIDCallback(context.Background(), func(authID string) {
-		selectedAuthID = authID
-	})
-	dataChan, _, errChan := handler.ExecuteStreamWithAuthManager(ctx, "openai", "test-model", []byte(`{"model":"test-model"}`), "")
-	if dataChan == nil || errChan == nil {
-		t.Fatalf("expected non-nil channels")
-	}
-
-	var got []byte
-	for chunk := range dataChan {
-		got = append(got, chunk...)
-	}
-	for msg := range errChan {
-		if msg != nil {
-			t.Fatalf("unexpected error: %+v", msg)
-		}
-	}
-
-	if string(got) != "ok" {
-		t.Fatalf("expected payload ok, got %q", string(got))
-	}
-	if selectedAuthID != "auth2" {
-		t.Fatalf("selectedAuthID = %q, want %q", selectedAuthID, "auth2")
-	}
-}
diff --git a/sdk/api/handlers/openai/openai_handlers.go b/sdk/api/handlers/openai/openai_handlers.go
index 2e85dcf851..09471ce1d6 100644
--- a/sdk/api/handlers/openai/openai_handlers.go
+++ b/sdk/api/handlers/openai/openai_handlers.go
@@ -17,7 +17,6 @@ import (
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	codexconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/codex/openai/chat-completions"
 	responsesconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/openai/responses"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
 	"github.com/tidwall/gjson"
@@ -113,23 +112,6 @@ func (h *OpenAIAPIHandler) ChatCompletions(c *gin.Context) {
 	streamResult := gjson.GetBytes(rawJSON, "stream")
 	stream := streamResult.Type == gjson.True
 
-	modelName := gjson.GetBytes(rawJSON, "model").String()
-	if overrideEndpoint, ok := resolveEndpointOverride(modelName, openAIChatEndpoint); ok && overrideEndpoint == openAIResponsesEndpoint {
-		originalChat := rawJSON
-		if shouldTreatAsResponsesFormat(rawJSON) {
-			// Already responses-style payload; no conversion needed.
-		} else {
-			rawJSON = codexconverter.ConvertOpenAIRequestToCodex(modelName, rawJSON, stream)
-		}
-		stream = gjson.GetBytes(rawJSON, "stream").Bool()
-		if stream {
-			h.handleStreamingResponseViaResponses(c, rawJSON, originalChat)
-		} else {
-			h.handleNonStreamingResponseViaResponses(c, rawJSON, originalChat)
-		}
-		return
-	}
-
 	// Some clients send OpenAI Responses-format payloads to /v1/chat/completions.
 	// Convert them to Chat Completions so downstream translators preserve tool metadata.
 	if shouldTreatAsResponsesFormat(rawJSON) {
@@ -263,76 +245,6 @@ func convertCompletionsRequestToChatCompletions(rawJSON []byte) []byte {
 	return []byte(out)
 }
 
-func convertResponsesObjectToChatCompletion(ctx context.Context, modelName string, originalChatJSON, responsesRequestJSON, responsesPayload []byte) []byte {
-	if len(responsesPayload) == 0 {
-		return nil
-	}
-	wrapped := wrapResponsesPayloadAsCompleted(responsesPayload)
-	if len(wrapped) == 0 {
-		return nil
-	}
-	var param any
-	converted := codexconverter.ConvertCodexResponseToOpenAINonStream(ctx, modelName, originalChatJSON, responsesRequestJSON, wrapped, &param)
-	if converted == "" {
-		return nil
-	}
-	return []byte(converted)
-}
-
-func wrapResponsesPayloadAsCompleted(payload []byte) []byte {
-	if gjson.GetBytes(payload, "type").Exists() {
-		return payload
-	}
-	if gjson.GetBytes(payload, "object").String() != "response" {
-		return payload
-	}
-	wrapped := `{"type":"response.completed","response":{}}`
-	wrapped, _ = sjson.SetRaw(wrapped, "response", string(payload))
-	return []byte(wrapped)
-}
-
-func writeConvertedResponsesChunk(c *gin.Context, ctx context.Context, modelName string, originalChatJSON, responsesRequestJSON, chunk []byte, param *any) {
-	outputs := codexconverter.ConvertCodexResponseToOpenAI(ctx, modelName, originalChatJSON, responsesRequestJSON, chunk, param)
-	for _, out := range outputs {
-		if out == "" {
-			continue
-		}
-		_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", out)
-	}
-}
-
-func (h *OpenAIAPIHandler) forwardResponsesAsChatStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage, ctx context.Context, modelName string, originalChatJSON, responsesRequestJSON []byte, param *any) {
-	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
-		WriteChunk: func(chunk []byte) {
-			outputs := codexconverter.ConvertCodexResponseToOpenAI(ctx, modelName, originalChatJSON, responsesRequestJSON, chunk, param)
-			for _, out := range outputs {
-				if out == "" {
-					continue
-				}
-				_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", out)
-			}
-		},
-		WriteTerminalError: func(errMsg *interfaces.ErrorMessage) {
-			if errMsg == nil {
-				return
-			}
-			status := http.StatusInternalServerError
-			if errMsg.StatusCode > 0 {
-				status = errMsg.StatusCode
-			}
-			errText := http.StatusText(status)
-			if errMsg.Error != nil && errMsg.Error.Error() != "" {
-				errText = errMsg.Error.Error()
-			}
-			body := handlers.BuildErrorResponseBody(status, errText)
-			_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(body))
-		},
-		WriteDone: func() {
-			_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
-		},
-	})
-}
-
 // convertChatCompletionsResponseToCompletions converts chat completions API response back to completions format.
 // This ensures the completions endpoint returns data in the expected format.
 //
@@ -420,7 +332,6 @@ func convertChatCompletionsStreamChunkToCompletions(chunkData []byte) []byte {
 
 	// Check if this chunk has any meaningful content
 	hasContent := false
-	hasUsage := root.Get("usage").Exists()
 	if chatChoices := root.Get("choices"); chatChoices.Exists() && chatChoices.IsArray() {
 		chatChoices.ForEach(func(_, choice gjson.Result) bool {
 			// Check if delta has content or finish_reason
@@ -439,8 +350,8 @@ func convertChatCompletionsStreamChunkToCompletions(chunkData []byte) []byte {
 		})
 	}
 
-	// If no meaningful content and no usage, return nil to indicate this chunk should be skipped
-	if !hasContent && !hasUsage {
+	// If no meaningful content, return nil to indicate this chunk should be skipped
+	if !hasContent {
 		return nil
 	}
 
@@ -499,11 +410,6 @@ func convertChatCompletionsStreamChunkToCompletions(chunkData []byte) []byte {
 		out, _ = sjson.SetRaw(out, "choices", string(choicesJSON))
 	}
 
-	// Copy usage if present
-	if usage := root.Get("usage"); usage.Exists() {
-		out, _ = sjson.SetRaw(out, "usage", usage.Raw)
-	}
-
 	return []byte(out)
 }
 
@@ -519,42 +425,16 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []
 
 	modelName := gjson.GetBytes(rawJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, h.GetAlt(c))
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, h.GetAlt(c))
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
 		cliCancel(errMsg.Error)
 		return
 	}
-	handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 	_, _ = c.Writer.Write(resp)
 	cliCancel()
 }
 
-func (h *OpenAIAPIHandler) handleNonStreamingResponseViaResponses(c *gin.Context, rawJSON []byte, originalChatJSON []byte) {
-	c.Header("Content-Type", "application/json")
-
-	modelName := gjson.GetBytes(rawJSON, "model").String()
-	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, OpenaiResponse, modelName, rawJSON, h.GetAlt(c))
-	if errMsg != nil {
-		h.WriteErrorResponse(c, errMsg)
-		cliCancel(errMsg.Error)
-		return
-	}
-	handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
-	converted := convertResponsesObjectToChatCompletion(cliCtx, modelName, originalChatJSON, rawJSON, resp)
-	if converted == nil {
-		h.WriteErrorResponse(c, &interfaces.ErrorMessage{
-			StatusCode: http.StatusInternalServerError,
-			Error:      fmt.Errorf("failed to convert response to chat completion format"),
-		})
-		cliCancel(fmt.Errorf("response conversion failed"))
-		return
-	}
-	_, _ = c.Writer.Write(converted)
-	cliCancel()
-}
-
 // handleStreamingResponse handles streaming responses for Gemini models.
 // It establishes a streaming connection with the backend service and forwards
 // the response chunks to the client in real-time using Server-Sent Events.
@@ -577,7 +457,7 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt
 
 	modelName := gjson.GetBytes(rawJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	dataChan, upstreamHeaders, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, h.GetAlt(c))
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, h.GetAlt(c))
 
 	setSSEHeaders := func() {
 		c.Header("Content-Type", "text/event-stream")
@@ -610,7 +490,6 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt
 			if !ok {
 				// Stream closed without data? Send DONE or just headers.
 				setSSEHeaders()
-				handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 				_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
 				flusher.Flush()
 				cliCancel(nil)
@@ -619,7 +498,6 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt
 
 			// Success! Commit to streaming headers.
 			setSSEHeaders()
-			handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 
 			_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(chunk))
 			flusher.Flush()
@@ -631,69 +509,6 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt
 	}
 }
 
-func (h *OpenAIAPIHandler) handleStreamingResponseViaResponses(c *gin.Context, rawJSON []byte, originalChatJSON []byte) {
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	modelName := gjson.GetBytes(rawJSON, "model").String()
-	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	dataChan, upstreamHeaders, errChan := h.ExecuteStreamWithAuthManager(cliCtx, OpenaiResponse, modelName, rawJSON, h.GetAlt(c))
-	var param any
-
-	setSSEHeaders := func() {
-		c.Header("Content-Type", "text/event-stream")
-		c.Header("Cache-Control", "no-cache")
-		c.Header("Connection", "keep-alive")
-		c.Header("Access-Control-Allow-Origin", "*")
-	}
-
-	// Peek for first usable chunk
-	for {
-		select {
-		case <-c.Request.Context().Done():
-			cliCancel(c.Request.Context().Err())
-			return
-		case errMsg, ok := <-errChan:
-			if !ok {
-				errChan = nil
-				continue
-			}
-			h.WriteErrorResponse(c, errMsg)
-			if errMsg != nil {
-				cliCancel(errMsg.Error)
-			} else {
-				cliCancel(nil)
-			}
-			return
-		case chunk, ok := <-dataChan:
-			if !ok {
-				setSSEHeaders()
-				handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
-				_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
-				flusher.Flush()
-				cliCancel(nil)
-				return
-			}
-
-			setSSEHeaders()
-			handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
-			writeConvertedResponsesChunk(c, cliCtx, modelName, originalChatJSON, rawJSON, chunk, &param)
-			flusher.Flush()
-
-			h.forwardResponsesAsChatStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan, cliCtx, modelName, originalChatJSON, rawJSON, &param)
-			return
-		}
-	}
-}
-
 // handleCompletionsNonStreamingResponse handles non-streaming completions responses.
 // It converts completions request to chat completions format, sends to backend,
 // then converts the response back to completions format before sending to client.
@@ -710,14 +525,13 @@ func (h *OpenAIAPIHandler) handleCompletionsNonStreamingResponse(c *gin.Context,
 	modelName := gjson.GetBytes(chatCompletionsJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
 	stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
-	resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, chatCompletionsJSON, "")
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, chatCompletionsJSON, "")
 	stopKeepAlive()
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
 		cliCancel(errMsg.Error)
 		return
 	}
-	handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 	completionsResp := convertChatCompletionsResponseToCompletions(resp)
 	_, _ = c.Writer.Write(completionsResp)
 	cliCancel()
@@ -748,7 +562,7 @@ func (h *OpenAIAPIHandler) handleCompletionsStreamingResponse(c *gin.Context, ra
 
 	modelName := gjson.GetBytes(chatCompletionsJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	dataChan, upstreamHeaders, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, chatCompletionsJSON, "")
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, chatCompletionsJSON, "")
 
 	setSSEHeaders := func() {
 		c.Header("Content-Type", "text/event-stream")
@@ -779,7 +593,6 @@ func (h *OpenAIAPIHandler) handleCompletionsStreamingResponse(c *gin.Context, ra
 		case chunk, ok := <-dataChan:
 			if !ok {
 				setSSEHeaders()
-				handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 				_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
 				flusher.Flush()
 				cliCancel(nil)
@@ -788,7 +601,6 @@ func (h *OpenAIAPIHandler) handleCompletionsStreamingResponse(c *gin.Context, ra
 
 			// Success! Set headers.
 			setSSEHeaders()
-			handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 
 			// Write the first chunk
 			converted := convertChatCompletionsStreamChunkToCompletions(chunk)
diff --git a/sdk/api/handlers/openai/openai_responses_compact_test.go b/sdk/api/handlers/openai/openai_responses_compact_test.go
index dcfcc99a7c..a62a9682db 100644
--- a/sdk/api/handlers/openai/openai_responses_compact_test.go
+++ b/sdk/api/handlers/openai/openai_responses_compact_test.go
@@ -31,7 +31,7 @@ func (e *compactCaptureExecutor) Execute(ctx context.Context, auth *coreauth.Aut
 	return coreexecutor.Response{Payload: []byte(`{"ok":true}`)}, nil
 }
 
-func (e *compactCaptureExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (*coreexecutor.StreamResult, error) {
+func (e *compactCaptureExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (<-chan coreexecutor.StreamChunk, error) {
 	return nil, errors.New("not implemented")
 }
 
diff --git a/sdk/api/handlers/openai/openai_responses_handlers.go b/sdk/api/handlers/openai/openai_responses_handlers.go
index f10e8d51f7..4b611af39b 100644
--- a/sdk/api/handlers/openai/openai_responses_handlers.go
+++ b/sdk/api/handlers/openai/openai_responses_handlers.go
@@ -16,7 +16,6 @@ import (
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	responsesconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/openai/responses"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
@@ -85,21 +84,7 @@ func (h *OpenAIResponsesAPIHandler) Responses(c *gin.Context) {
 
 	// Check if the client requested a streaming response.
 	streamResult := gjson.GetBytes(rawJSON, "stream")
-	stream := streamResult.Type == gjson.True
-
-	modelName := gjson.GetBytes(rawJSON, "model").String()
-	if overrideEndpoint, ok := resolveEndpointOverride(modelName, openAIResponsesEndpoint); ok && overrideEndpoint == openAIChatEndpoint {
-		chatJSON := responsesconverter.ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName, rawJSON, stream)
-		stream = gjson.GetBytes(chatJSON, "stream").Bool()
-		if stream {
-			h.handleStreamingResponseViaChat(c, rawJSON, chatJSON)
-		} else {
-			h.handleNonStreamingResponseViaChat(c, rawJSON, chatJSON)
-		}
-		return
-	}
-
-	if stream {
+	if streamResult.Type == gjson.True {
 		h.handleStreamingResponse(c, rawJSON)
 	} else {
 		h.handleNonStreamingResponse(c, rawJSON)
@@ -139,14 +124,13 @@ func (h *OpenAIResponsesAPIHandler) Compact(c *gin.Context) {
 	modelName := gjson.GetBytes(rawJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
 	stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
-	resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "responses/compact")
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "responses/compact")
 	stopKeepAlive()
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
 		cliCancel(errMsg.Error)
 		return
 	}
-	handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 	_, _ = c.Writer.Write(resp)
 	cliCancel()
 }
@@ -165,44 +149,17 @@ func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponse(c *gin.Context, r
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
 	stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
 
-	resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
 	stopKeepAlive()
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
 		cliCancel(errMsg.Error)
 		return
 	}
-	handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 	_, _ = c.Writer.Write(resp)
 	cliCancel()
 }
 
-func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponseViaChat(c *gin.Context, originalResponsesJSON, chatJSON []byte) {
-	c.Header("Content-Type", "application/json")
-
-	modelName := gjson.GetBytes(chatJSON, "model").String()
-	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	resp, upstreamHeaders, errMsg := h.ExecuteWithAuthManager(cliCtx, OpenAI, modelName, chatJSON, "")
-	if errMsg != nil {
-		h.WriteErrorResponse(c, errMsg)
-		cliCancel(errMsg.Error)
-		return
-	}
-	handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
-	var param any
-	converted := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(cliCtx, modelName, originalResponsesJSON, originalResponsesJSON, resp, &param)
-	if converted == "" {
-		h.WriteErrorResponse(c, &interfaces.ErrorMessage{
-			StatusCode: http.StatusInternalServerError,
-			Error:      fmt.Errorf("failed to convert chat completion response to responses format"),
-		})
-		cliCancel(fmt.Errorf("response conversion failed"))
-		return
-	}
-	_, _ = c.Writer.Write([]byte(converted))
-	cliCancel()
-}
-
 // handleStreamingResponse handles streaming responses for Gemini models.
 // It establishes a streaming connection with the backend service and forwards
 // the response chunks to the client in real-time using Server-Sent Events.
@@ -226,7 +183,7 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ
 	// New core execution path
 	modelName := gjson.GetBytes(rawJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	dataChan, upstreamHeaders, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
 
 	setSSEHeaders := func() {
 		c.Header("Content-Type", "text/event-stream")
@@ -259,7 +216,6 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ
 			if !ok {
 				// Stream closed without data? Send headers and done.
 				setSSEHeaders()
-				handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 				_, _ = c.Writer.Write([]byte("\n"))
 				flusher.Flush()
 				cliCancel(nil)
@@ -268,7 +224,6 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ
 
 			// Success! Set headers.
 			setSSEHeaders()
-			handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
 
 			// Write first chunk logic (matching forwardResponsesStream)
 			if bytes.HasPrefix(chunk, []byte("event:")) {
@@ -285,118 +240,6 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ
 	}
 }
 
-func (h *OpenAIResponsesAPIHandler) handleStreamingResponseViaChat(c *gin.Context, originalResponsesJSON, chatJSON []byte) {
-	flusher, ok := c.Writer.(http.Flusher)
-	if !ok {
-		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
-			Error: handlers.ErrorDetail{
-				Message: "Streaming not supported",
-				Type:    "server_error",
-			},
-		})
-		return
-	}
-
-	modelName := gjson.GetBytes(chatJSON, "model").String()
-	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	dataChan, upstreamHeaders, errChan := h.ExecuteStreamWithAuthManager(cliCtx, OpenAI, modelName, chatJSON, "")
-	var param any
-
-	setSSEHeaders := func() {
-		c.Header("Content-Type", "text/event-stream")
-		c.Header("Cache-Control", "no-cache")
-		c.Header("Connection", "keep-alive")
-		c.Header("Access-Control-Allow-Origin", "*")
-	}
-
-	for {
-		select {
-		case <-c.Request.Context().Done():
-			cliCancel(c.Request.Context().Err())
-			return
-		case errMsg, ok := <-errChan:
-			if !ok {
-				errChan = nil
-				continue
-			}
-			h.WriteErrorResponse(c, errMsg)
-			if errMsg != nil {
-				cliCancel(errMsg.Error)
-			} else {
-				cliCancel(nil)
-			}
-			return
-		case chunk, ok := <-dataChan:
-			if !ok {
-				setSSEHeaders()
-				handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
-				_, _ = c.Writer.Write([]byte("\n"))
-				flusher.Flush()
-				cliCancel(nil)
-				return
-			}
-
-			setSSEHeaders()
-			handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
-			writeChatAsResponsesChunk(c, cliCtx, modelName, originalResponsesJSON, chunk, &param)
-			flusher.Flush()
-
-			h.forwardChatAsResponsesStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan, cliCtx, modelName, originalResponsesJSON, &param)
-			return
-		}
-	}
-}
-
-func writeChatAsResponsesChunk(c *gin.Context, ctx context.Context, modelName string, originalResponsesJSON, chunk []byte, param *any) {
-	outputs := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx, modelName, originalResponsesJSON, originalResponsesJSON, chunk, param)
-	for _, out := range outputs {
-		if out == "" {
-			continue
-		}
-		if bytes.HasPrefix([]byte(out), []byte("event:")) {
-			_, _ = c.Writer.Write([]byte("\n"))
-		}
-		_, _ = c.Writer.Write([]byte(out))
-		_, _ = c.Writer.Write([]byte("\n"))
-	}
-}
-
-func (h *OpenAIResponsesAPIHandler) forwardChatAsResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage, ctx context.Context, modelName string, originalResponsesJSON []byte, param *any) {
-	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
-		WriteChunk: func(chunk []byte) {
-			outputs := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx, modelName, originalResponsesJSON, originalResponsesJSON, chunk, param)
-			for _, out := range outputs {
-				if out == "" {
-					continue
-				}
-				if bytes.HasPrefix([]byte(out), []byte("event:")) {
-					_, _ = c.Writer.Write([]byte("\n"))
-				}
-				_, _ = c.Writer.Write([]byte(out))
-				_, _ = c.Writer.Write([]byte("\n"))
-			}
-		},
-		WriteTerminalError: func(errMsg *interfaces.ErrorMessage) {
-			if errMsg == nil {
-				return
-			}
-			status := http.StatusInternalServerError
-			if errMsg.StatusCode > 0 {
-				status = errMsg.StatusCode
-			}
-			errText := http.StatusText(status)
-			if errMsg.Error != nil && errMsg.Error.Error() != "" {
-				errText = errMsg.Error.Error()
-			}
-			body := handlers.BuildErrorResponseBody(status, errText)
-			_, _ = fmt.Fprintf(c.Writer, "\nevent: error\ndata: %s\n\n", string(body))
-		},
-		WriteDone: func() {
-			_, _ = c.Writer.Write([]byte("\n"))
-		},
-	})
-}
-
 func (h *OpenAIResponsesAPIHandler) forwardResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
 	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
 		WriteChunk: func(chunk []byte) {
diff --git a/sdk/api/management.go b/sdk/api/management.go
index 6fd3b709be..66af41ae91 100644
--- a/sdk/api/management.go
+++ b/sdk/api/management.go
@@ -18,7 +18,6 @@ type ManagementTokenRequester interface {
 	RequestCodexToken(*gin.Context)
 	RequestAntigravityToken(*gin.Context)
 	RequestQwenToken(*gin.Context)
-	RequestKimiToken(*gin.Context)
 	RequestIFlowToken(*gin.Context)
 	RequestIFlowCookieToken(*gin.Context)
 	GetAuthStatus(c *gin.Context)
@@ -56,10 +55,6 @@ func (m *managementTokenRequester) RequestQwenToken(c *gin.Context) {
 	m.handler.RequestQwenToken(c)
 }
 
-func (m *managementTokenRequester) RequestKimiToken(c *gin.Context) {
-	m.handler.RequestKimiToken(c)
-}
-
 func (m *managementTokenRequester) RequestIFlowToken(c *gin.Context) {
 	m.handler.RequestIFlowToken(c)
 }
diff --git a/sdk/auth/antigravity.go b/sdk/auth/antigravity.go
index 6ed31d6d72..ecca0a0041 100644
--- a/sdk/auth/antigravity.go
+++ b/sdk/auth/antigravity.go
@@ -28,7 +28,8 @@ func (AntigravityAuthenticator) Provider() string { return "antigravity" }
 
 // RefreshLead instructs the manager to refresh five minutes before expiry.
 func (AntigravityAuthenticator) RefreshLead() *time.Duration {
-	return new(5 * time.Minute)
+	lead := 5 * time.Minute
+	return &lead
 }
 
 // Login launches a local OAuth flow to obtain antigravity tokens and persists them.
diff --git a/sdk/auth/claude.go b/sdk/auth/claude.go
index 706763b3ea..a6b19af576 100644
--- a/sdk/auth/claude.go
+++ b/sdk/auth/claude.go
@@ -32,7 +32,8 @@ func (a *ClaudeAuthenticator) Provider() string {
 }
 
 func (a *ClaudeAuthenticator) RefreshLead() *time.Duration {
-	return new(4 * time.Hour)
+	d := 4 * time.Hour
+	return &d
 }
 
 func (a *ClaudeAuthenticator) Login(ctx context.Context, cfg *config.Config, opts *LoginOptions) (*coreauth.Auth, error) {
diff --git a/sdk/auth/codex.go b/sdk/auth/codex.go
index c81842eb3c..b655a23945 100644
--- a/sdk/auth/codex.go
+++ b/sdk/auth/codex.go
@@ -34,7 +34,8 @@ func (a *CodexAuthenticator) Provider() string {
 }
 
 func (a *CodexAuthenticator) RefreshLead() *time.Duration {
-	return new(5 * 24 * time.Hour)
+	d := 5 * 24 * time.Hour
+	return &d
 }
 
 func (a *CodexAuthenticator) Login(ctx context.Context, cfg *config.Config, opts *LoginOptions) (*coreauth.Auth, error) {
diff --git a/sdk/auth/filestore.go b/sdk/auth/filestore.go
index 4715d7f7b1..0bb7ff7da3 100644
--- a/sdk/auth/filestore.go
+++ b/sdk/auth/filestore.go
@@ -4,10 +4,8 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
-	"io"
 	"io/fs"
 	"net/http"
-	"net/url"
 	"os"
 	"path/filepath"
 	"strings"
@@ -188,21 +186,15 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
 	if provider == "" {
 		provider = "unknown"
 	}
-	if provider == "antigravity" || provider == "gemini" {
+	if provider == "antigravity" {
 		projectID := ""
 		if pid, ok := metadata["project_id"].(string); ok {
 			projectID = strings.TrimSpace(pid)
 		}
 		if projectID == "" {
-			accessToken := extractAccessToken(metadata)
-			// For gemini type, the stored access_token is likely expired (~1h lifetime).
-			// Refresh it using the long-lived refresh_token before querying.
-			if provider == "gemini" {
-				if tokenMap, ok := metadata["token"].(map[string]any); ok {
-					if refreshed, errRefresh := refreshGeminiAccessToken(tokenMap, http.DefaultClient); errRefresh == nil {
-						accessToken = refreshed
-					}
-				}
+			accessToken := ""
+			if token, ok := metadata["access_token"].(string); ok {
+				accessToken = strings.TrimSpace(token)
 			}
 			if accessToken != "" {
 				fetchedProjectID, errFetch := FetchAntigravityProjectID(context.Background(), accessToken, http.DefaultClient)
@@ -228,15 +220,6 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
 	if disabled {
 		status = cliproxyauth.StatusDisabled
 	}
-
-	// Calculate NextRefreshAfter from expires_at (20 minutes before expiry)
-	var nextRefreshAfter time.Time
-	if expiresAtStr, ok := metadata["expires_at"].(string); ok && expiresAtStr != "" {
-		if expiresAt, err := time.Parse(time.RFC3339, expiresAtStr); err == nil {
-			nextRefreshAfter = expiresAt.Add(-20 * time.Minute)
-		}
-	}
-
 	auth := &cliproxyauth.Auth{
 		ID:               id,
 		Provider:         provider,
@@ -249,7 +232,7 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
 		CreatedAt:        info.ModTime(),
 		UpdatedAt:        info.ModTime(),
 		LastRefreshedAt:  time.Time{},
-		NextRefreshAfter: nextRefreshAfter,
+		NextRefreshAfter: time.Time{},
 	}
 	if email, ok := metadata["email"].(string); ok && email != "" {
 		auth.Attributes["email"] = email
@@ -321,67 +304,6 @@ func (s *FileTokenStore) baseDirSnapshot() string {
 	return s.baseDir
 }
 
-func extractAccessToken(metadata map[string]any) string {
-	if at, ok := metadata["access_token"].(string); ok {
-		if v := strings.TrimSpace(at); v != "" {
-			return v
-		}
-	}
-	if tokenMap, ok := metadata["token"].(map[string]any); ok {
-		if at, ok := tokenMap["access_token"].(string); ok {
-			if v := strings.TrimSpace(at); v != "" {
-				return v
-			}
-		}
-	}
-	return ""
-}
-
-func refreshGeminiAccessToken(tokenMap map[string]any, httpClient *http.Client) (string, error) {
-	refreshToken, _ := tokenMap["refresh_token"].(string)
-	clientID, _ := tokenMap["client_id"].(string)
-	clientSecret, _ := tokenMap["client_secret"].(string)
-	tokenURI, _ := tokenMap["token_uri"].(string)
-
-	if refreshToken == "" || clientID == "" || clientSecret == "" {
-		return "", fmt.Errorf("missing refresh credentials")
-	}
-	if tokenURI == "" {
-		tokenURI = "https://oauth2.googleapis.com/token"
-	}
-
-	data := url.Values{
-		"grant_type":    {"refresh_token"},
-		"refresh_token": {refreshToken},
-		"client_id":     {clientID},
-		"client_secret": {clientSecret},
-	}
-
-	resp, err := httpClient.PostForm(tokenURI, data)
-	if err != nil {
-		return "", fmt.Errorf("refresh request: %w", err)
-	}
-	defer func() { _ = resp.Body.Close() }()
-
-	body, _ := io.ReadAll(resp.Body)
-	if resp.StatusCode != http.StatusOK {
-		return "", fmt.Errorf("refresh failed: status %d", resp.StatusCode)
-	}
-
-	var result map[string]any
-	if errUnmarshal := json.Unmarshal(body, &result); errUnmarshal != nil {
-		return "", fmt.Errorf("decode refresh response: %w", errUnmarshal)
-	}
-
-	newAccessToken, _ := result["access_token"].(string)
-	if newAccessToken == "" {
-		return "", fmt.Errorf("no access_token in refresh response")
-	}
-
-	tokenMap["access_token"] = newAccessToken
-	return newAccessToken, nil
-}
-
 // jsonEqual compares two JSON blobs by parsing them into Go objects and deep comparing.
 func jsonEqual(a, b []byte) bool {
 	var objA any
diff --git a/sdk/auth/iflow.go b/sdk/auth/iflow.go
index a695311db2..6d4ff9466b 100644
--- a/sdk/auth/iflow.go
+++ b/sdk/auth/iflow.go
@@ -26,7 +26,8 @@ func (a *IFlowAuthenticator) Provider() string { return "iflow" }
 
 // RefreshLead indicates how soon before expiry a refresh should be attempted.
 func (a *IFlowAuthenticator) RefreshLead() *time.Duration {
-	return new(24 * time.Hour)
+	d := 24 * time.Hour
+	return &d
 }
 
 // Login performs the OAuth code flow using a local callback server.
diff --git a/sdk/auth/manager.go b/sdk/auth/manager.go
index d630f128e3..c6469a7d19 100644
--- a/sdk/auth/manager.go
+++ b/sdk/auth/manager.go
@@ -74,16 +74,3 @@ func (m *Manager) Login(ctx context.Context, provider string, cfg *config.Config
 	}
 	return record, savedPath, nil
 }
-
-// SaveAuth persists an auth record directly without going through the login flow.
-func (m *Manager) SaveAuth(record *coreauth.Auth, cfg *config.Config) (string, error) {
-	if m.store == nil {
-		return "", fmt.Errorf("no store configured")
-	}
-	if cfg != nil {
-		if dirSetter, ok := m.store.(interface{ SetBaseDir(string) }); ok {
-			dirSetter.SetBaseDir(cfg.AuthDir)
-		}
-	}
-	return m.store.Save(context.Background(), record)
-}
diff --git a/sdk/auth/qwen.go b/sdk/auth/qwen.go
index 310d498760..151fba6816 100644
--- a/sdk/auth/qwen.go
+++ b/sdk/auth/qwen.go
@@ -27,7 +27,8 @@ func (a *QwenAuthenticator) Provider() string {
 }
 
 func (a *QwenAuthenticator) RefreshLead() *time.Duration {
-	return new(3 * time.Hour)
+	d := 3 * time.Hour
+	return &d
 }
 
 func (a *QwenAuthenticator) Login(ctx context.Context, cfg *config.Config, opts *LoginOptions) (*coreauth.Auth, error) {
diff --git a/sdk/auth/refresh_registry.go b/sdk/auth/refresh_registry.go
index ecf8e820af..e82ac68487 100644
--- a/sdk/auth/refresh_registry.go
+++ b/sdk/auth/refresh_registry.go
@@ -14,9 +14,6 @@ func init() {
 	registerRefreshLead("gemini", func() Authenticator { return NewGeminiAuthenticator() })
 	registerRefreshLead("gemini-cli", func() Authenticator { return NewGeminiAuthenticator() })
 	registerRefreshLead("antigravity", func() Authenticator { return NewAntigravityAuthenticator() })
-	registerRefreshLead("kimi", func() Authenticator { return NewKimiAuthenticator() })
-	registerRefreshLead("kiro", func() Authenticator { return NewKiroAuthenticator() })
-	registerRefreshLead("github-copilot", func() Authenticator { return NewGitHubCopilotAuthenticator() })
 }
 
 func registerRefreshLead(provider string, factory func() Authenticator) {
diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go
index a20f864551..26c538d76d 100644
--- a/sdk/cliproxy/auth/conductor.go
+++ b/sdk/cliproxy/auth/conductor.go
@@ -18,6 +18,7 @@ import (
 	internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/routing/ctxkeys"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -30,9 +31,8 @@ type ProviderExecutor interface {
 	Identifier() string
 	// Execute handles non-streaming execution and returns the provider response payload.
 	Execute(ctx context.Context, auth *Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error)
-	// ExecuteStream handles streaming execution and returns a StreamResult containing
-	// upstream headers and a channel of provider chunks.
-	ExecuteStream(ctx context.Context, auth *Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (*cliproxyexecutor.StreamResult, error)
+	// ExecuteStream handles streaming execution and returns a channel of provider chunks.
+	ExecuteStream(ctx context.Context, auth *Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (<-chan cliproxyexecutor.StreamChunk, error)
 	// Refresh attempts to refresh provider credentials and returns the updated auth state.
 	Refresh(ctx context.Context, auth *Auth) (*Auth, error)
 	// CountTokens returns the token count for the given request.
@@ -42,26 +42,15 @@ type ProviderExecutor interface {
 	HttpRequest(ctx context.Context, auth *Auth, req *http.Request) (*http.Response, error)
 }
 
-// ExecutionSessionCloser allows executors to release per-session runtime resources.
-type ExecutionSessionCloser interface {
-	CloseExecutionSession(sessionID string)
-}
-
-const (
-	// CloseAllExecutionSessionsID asks an executor to release all active execution sessions.
-	// Executors that do not support this marker may ignore it.
-	CloseAllExecutionSessionsID = "__all_execution_sessions__"
-)
-
 // RefreshEvaluator allows runtime state to override refresh decisions.
 type RefreshEvaluator interface {
 	ShouldRefresh(now time.Time, auth *Auth) bool
 }
 
 const (
-	refreshCheckInterval  = 30 * time.Second
+	refreshCheckInterval  = 5 * time.Second
 	refreshPendingBackoff = time.Minute
-	refreshFailureBackoff = 1 * time.Minute
+	refreshFailureBackoff = 5 * time.Minute
 	quotaBackoffBase      = time.Second
 	quotaBackoffMax       = 30 * time.Minute
 )
@@ -401,23 +390,9 @@ func (m *Manager) RegisterExecutor(executor ProviderExecutor) {
 	if executor == nil {
 		return
 	}
-	provider := strings.TrimSpace(executor.Identifier())
-	if provider == "" {
-		return
-	}
-
-	var replaced ProviderExecutor
 	m.mu.Lock()
-	replaced = m.executors[provider]
-	m.executors[provider] = executor
-	m.mu.Unlock()
-
-	if replaced == nil || replaced == executor {
-		return
-	}
-	if closer, ok := replaced.(ExecutionSessionCloser); ok && closer != nil {
-		closer.CloseExecutionSession(CloseAllExecutionSessionsID)
-	}
+	defer m.mu.Unlock()
+	m.executors[executor.Identifier()] = executor
 }
 
 // UnregisterExecutor removes the executor associated with the provider key.
@@ -559,7 +534,7 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip
 
 // ExecuteStream performs a streaming execution using the configured selector and executor.
 // It supports multiple providers for the same model and round-robins the starting provider per model.
-func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (*cliproxyexecutor.StreamResult, error) {
+func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (<-chan cliproxyexecutor.StreamChunk, error) {
 	normalized := m.normalizeProviders(providers)
 	if len(normalized) == 0 {
 		return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
@@ -569,9 +544,9 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
 
 	var lastErr error
 	for attempt := 0; ; attempt++ {
-		result, errStream := m.executeStreamMixedOnce(ctx, normalized, req, opts)
+		chunks, errStream := m.executeStreamMixedOnce(ctx, normalized, req, opts)
 		if errStream == nil {
-			return result, nil
+			return chunks, nil
 		}
 		lastErr = errStream
 		wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, normalized, req.Model, maxWait)
@@ -588,203 +563,188 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
 	return nil, &Error{Code: "auth_not_found", Message: "no auth available"}
 }
 
-func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	if len(providers) == 0 {
-		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
-	}
+func (m *Manager) executeWithFallback(
+	ctx context.Context,
+	initialProviders []string,
+	req cliproxyexecutor.Request,
+	opts cliproxyexecutor.Options,
+	exec func(ctx context.Context, executor ProviderExecutor, auth *Auth, provider, routeModel string) error,
+) error {
 	routeModel := req.Model
+	providers := initialProviders
 	opts = ensureRequestedModelMetadata(opts, routeModel)
 	tried := make(map[string]struct{})
 	var lastErr error
+
+	// Track fallback models from context (provided by Amp module fallback_models key)
+	var fallbacks []string
+	if v := ctx.Value(ctxkeys.FallbackModels); v != nil {
+		if fs, ok := v.([]string); ok {
+			fallbacks = fs
+		}
+	}
+	fallbackIdx := -1
+
 	for {
 		auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried)
 		if errPick != nil {
+			// No more auths for current model. Try next fallback model if available.
+			if fallbackIdx+1 < len(fallbacks) {
+				fallbackIdx++
+				routeModel = fallbacks[fallbackIdx]
+				log.Debugf("no more auths for current model, trying fallback model: %s (fallback %d/%d)", routeModel, fallbackIdx+1, len(fallbacks))
+
+				// Reset tried set for the new model and find its providers
+				tried = make(map[string]struct{})
+				providers = util.GetProviderName(thinking.ParseSuffix(routeModel).ModelName)
+				// Reset opts for the new model
+				opts = ensureRequestedModelMetadata(opts, routeModel)
+				if len(providers) == 0 {
+					log.Debugf("fallback model %s has no providers, skipping", routeModel)
+					continue // Try next fallback if this one has no providers
+				}
+				continue
+			}
+
 			if lastErr != nil {
-				return cliproxyexecutor.Response{}, lastErr
+				return lastErr
 			}
-			return cliproxyexecutor.Response{}, errPick
+			return errPick
 		}
 
-		entry := logEntryWithRequestID(ctx)
-		debugLogAuthSelection(entry, auth, provider, req.Model)
-		publishSelectedAuthMetadata(opts.Metadata, auth.ID)
-
 		tried[auth.ID] = struct{}{}
-		execCtx := ctx
-		if rt := m.roundTripperFor(auth); rt != nil {
-			execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt)
-			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
-		}
-		execReq := req
-		execReq.Model = rewriteModelForAuth(routeModel, auth)
-		execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
-		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
-		resp, errExec := executor.Execute(execCtx, auth, execReq, opts)
-		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
-		if errExec != nil {
-			if errCtx := execCtx.Err(); errCtx != nil {
-				return cliproxyexecutor.Response{}, errCtx
-			}
-			result.Error = &Error{Message: errExec.Error()}
-			if se, ok := errors.AsType[cliproxyexecutor.StatusError](errExec); ok && se != nil {
-				result.Error.HTTPStatus = se.StatusCode()
-			}
-			if ra := retryAfterFromError(errExec); ra != nil {
-				result.RetryAfter = ra
-			}
-			m.MarkResult(execCtx, result)
-			if isRequestInvalidError(errExec) {
-				return cliproxyexecutor.Response{}, errExec
+		if err := exec(ctx, executor, auth, provider, routeModel); err != nil {
+			if errCtx := ctx.Err(); errCtx != nil {
+				return errCtx
 			}
-			lastErr = errExec
+			lastErr = err
 			continue
 		}
-		m.MarkResult(execCtx, result)
-		return resp, nil
+		return nil
 	}
 }
 
-func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+func (m *Manager) executeMixedAttempt(
+	ctx context.Context,
+	auth *Auth,
+	provider, routeModel string,
+	req cliproxyexecutor.Request,
+	opts cliproxyexecutor.Options,
+	exec func(ctx context.Context, execReq cliproxyexecutor.Request) error,
+) error {
+	entry := logEntryWithRequestID(ctx)
+	debugLogAuthSelection(entry, auth, provider, req.Model)
+
+	execCtx := ctx
+	if rt := m.roundTripperFor(auth); rt != nil {
+		execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt)
+		execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
+	}
+
+	execReq := req
+	execReq.Model = rewriteModelForAuth(routeModel, auth)
+	execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
+	execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
+
+	err := exec(execCtx, execReq)
+	result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: err == nil}
+	if err != nil {
+		result.Error = &Error{Message: err.Error()}
+		var se cliproxyexecutor.StatusError
+		if errors.As(err, &se) && se != nil {
+			result.Error.HTTPStatus = se.StatusCode()
+		}
+		if ra := retryAfterFromError(err); ra != nil {
+			result.RetryAfter = ra
+		}
+	}
+	m.MarkResult(execCtx, result)
+	return err
+}
+
+func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
 	if len(providers) == 0 {
 		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
-	routeModel := req.Model
-	opts = ensureRequestedModelMetadata(opts, routeModel)
-	tried := make(map[string]struct{})
-	var lastErr error
-	for {
-		auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried)
-		if errPick != nil {
-			if lastErr != nil {
-				return cliproxyexecutor.Response{}, lastErr
-			}
-			return cliproxyexecutor.Response{}, errPick
-		}
 
-		entry := logEntryWithRequestID(ctx)
-		debugLogAuthSelection(entry, auth, provider, req.Model)
-		publishSelectedAuthMetadata(opts.Metadata, auth.ID)
+	var resp cliproxyexecutor.Response
+	err := m.executeWithFallback(ctx, providers, req, opts, func(ctx context.Context, executor ProviderExecutor, auth *Auth, provider, routeModel string) error {
+		return m.executeMixedAttempt(ctx, auth, provider, routeModel, req, opts, func(execCtx context.Context, execReq cliproxyexecutor.Request) error {
+			var errExec error
+			resp, errExec = executor.Execute(execCtx, auth, execReq, opts)
+			return errExec
+		})
+	})
+	return resp, err
+}
 
-		tried[auth.ID] = struct{}{}
-		execCtx := ctx
-		if rt := m.roundTripperFor(auth); rt != nil {
-			execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt)
-			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
-		}
-		execReq := req
-		execReq.Model = rewriteModelForAuth(routeModel, auth)
-		execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
-		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
-		resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts)
-		result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil}
-		if errExec != nil {
-			if errCtx := execCtx.Err(); errCtx != nil {
-				return cliproxyexecutor.Response{}, errCtx
-			}
-			result.Error = &Error{Message: errExec.Error()}
-			if se, ok := errors.AsType[cliproxyexecutor.StatusError](errExec); ok && se != nil {
-				result.Error.HTTPStatus = se.StatusCode()
-			}
-			if ra := retryAfterFromError(errExec); ra != nil {
-				result.RetryAfter = ra
-			}
-			m.MarkResult(execCtx, result)
-			if isRequestInvalidError(errExec) {
-				return cliproxyexecutor.Response{}, errExec
-			}
-			lastErr = errExec
-			continue
-		}
-		m.MarkResult(execCtx, result)
-		return resp, nil
+func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	if len(providers) == 0 {
+		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
+
+	var resp cliproxyexecutor.Response
+	err := m.executeWithFallback(ctx, providers, req, opts, func(ctx context.Context, executor ProviderExecutor, auth *Auth, provider, routeModel string) error {
+		return m.executeMixedAttempt(ctx, auth, provider, routeModel, req, opts, func(execCtx context.Context, execReq cliproxyexecutor.Request) error {
+			var errExec error
+			resp, errExec = executor.CountTokens(execCtx, auth, execReq, opts)
+			return errExec
+		})
+	})
+	return resp, err
 }
 
-func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (*cliproxyexecutor.StreamResult, error) {
+func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (<-chan cliproxyexecutor.StreamChunk, error) {
 	if len(providers) == 0 {
 		return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
 	}
-	routeModel := req.Model
-	opts = ensureRequestedModelMetadata(opts, routeModel)
-	tried := make(map[string]struct{})
-	var lastErr error
-	for {
-		auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried)
-		if errPick != nil {
-			if lastErr != nil {
-				return nil, lastErr
-			}
-			return nil, errPick
-		}
-
-		entry := logEntryWithRequestID(ctx)
-		debugLogAuthSelection(entry, auth, provider, req.Model)
-		publishSelectedAuthMetadata(opts.Metadata, auth.ID)
 
-		tried[auth.ID] = struct{}{}
-		execCtx := ctx
-		if rt := m.roundTripperFor(auth); rt != nil {
-			execCtx = context.WithValue(execCtx, roundTripperContextKey{}, rt)
-			execCtx = context.WithValue(execCtx, "cliproxy.roundtripper", rt)
-		}
-		execReq := req
-		execReq.Model = rewriteModelForAuth(routeModel, auth)
-		execReq.Model = m.applyOAuthModelAlias(auth, execReq.Model)
-		execReq.Model = m.applyAPIKeyModelAlias(auth, execReq.Model)
-		streamResult, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts)
-		if errStream != nil {
-			if errCtx := execCtx.Err(); errCtx != nil {
-				return nil, errCtx
-			}
-			rerr := &Error{Message: errStream.Error()}
-			if se, ok := errors.AsType[cliproxyexecutor.StatusError](errStream); ok && se != nil {
-				rerr.HTTPStatus = se.StatusCode()
-			}
-			result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: false, Error: rerr}
-			result.RetryAfter = retryAfterFromError(errStream)
-			m.MarkResult(execCtx, result)
-			if isRequestInvalidError(errStream) {
-				return nil, errStream
+	var chunks <-chan cliproxyexecutor.StreamChunk
+	err := m.executeWithFallback(ctx, providers, req, opts, func(ctx context.Context, executor ProviderExecutor, auth *Auth, provider, routeModel string) error {
+		return m.executeMixedAttempt(ctx, auth, provider, routeModel, req, opts, func(execCtx context.Context, execReq cliproxyexecutor.Request) error {
+			var errExec error
+			chunks, errExec = executor.ExecuteStream(execCtx, auth, execReq, opts)
+			if errExec != nil {
+				return errExec
 			}
-			lastErr = errStream
-			continue
-		}
-		out := make(chan cliproxyexecutor.StreamChunk)
-		go func(streamCtx context.Context, streamAuth *Auth, streamProvider string, streamChunks <-chan cliproxyexecutor.StreamChunk) {
-			defer close(out)
-			var failed bool
-			forward := true
-			for chunk := range streamChunks {
-				if chunk.Err != nil && !failed {
-					failed = true
-					rerr := &Error{Message: chunk.Err.Error()}
-					if se, ok := errors.AsType[cliproxyexecutor.StatusError](chunk.Err); ok && se != nil {
-						rerr.HTTPStatus = se.StatusCode()
+
+			out := make(chan cliproxyexecutor.StreamChunk)
+			go func(streamCtx context.Context, streamAuth *Auth, streamProvider string, streamChunks <-chan cliproxyexecutor.StreamChunk) {
+				defer close(out)
+				var failed bool
+				forward := true
+				for chunk := range streamChunks {
+					if chunk.Err != nil && !failed {
+						failed = true
+						rerr := &Error{Message: chunk.Err.Error()}
+						var se cliproxyexecutor.StatusError
+						if errors.As(chunk.Err, &se) && se != nil {
+							rerr.HTTPStatus = se.StatusCode()
+						}
+						m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: false, Error: rerr})
+					}
+					if !forward {
+						continue
+					}
+					if streamCtx == nil {
+						out <- chunk
+						continue
+					}
+					select {
+					case <-streamCtx.Done():
+						forward = false
+					case out <- chunk:
 					}
-					m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: false, Error: rerr})
-				}
-				if !forward {
-					continue
-				}
-				if streamCtx == nil {
-					out <- chunk
-					continue
 				}
-				select {
-				case <-streamCtx.Done():
-					forward = false
-				case out <- chunk:
+				if !failed {
+					m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: true})
 				}
-			}
-			if !failed {
-				m.MarkResult(streamCtx, Result{AuthID: streamAuth.ID, Provider: streamProvider, Model: routeModel, Success: true})
-			}
-		}(execCtx, auth.Clone(), provider, streamResult.Chunks)
-		return &cliproxyexecutor.StreamResult{
-			Headers: streamResult.Headers,
-			Chunks:  out,
-		}, nil
-	}
+			}(execCtx, auth.Clone(), provider, chunks)
+			chunks = out
+			return nil
+		})
+	})
+	return chunks, err
 }
 
 func ensureRequestedModelMetadata(opts cliproxyexecutor.Options, requestedModel string) cliproxyexecutor.Options {
@@ -826,38 +786,6 @@ func hasRequestedModelMetadata(meta map[string]any) bool {
 	}
 }
 
-func pinnedAuthIDFromMetadata(meta map[string]any) string {
-	if len(meta) == 0 {
-		return ""
-	}
-	raw, ok := meta[cliproxyexecutor.PinnedAuthMetadataKey]
-	if !ok || raw == nil {
-		return ""
-	}
-	switch val := raw.(type) {
-	case string:
-		return strings.TrimSpace(val)
-	case []byte:
-		return strings.TrimSpace(string(val))
-	default:
-		return ""
-	}
-}
-
-func publishSelectedAuthMetadata(meta map[string]any, authID string) {
-	if len(meta) == 0 {
-		return
-	}
-	authID = strings.TrimSpace(authID)
-	if authID == "" {
-		return
-	}
-	meta[cliproxyexecutor.SelectedAuthMetadataKey] = authID
-	if callback, ok := meta[cliproxyexecutor.SelectedAuthCallbackMetadataKey].(func(string)); ok && callback != nil {
-		callback(authID)
-	}
-}
-
 func rewriteModelForAuth(model string, auth *Auth) string {
 	if auth == nil || model == "" {
 		return model
@@ -1179,9 +1107,6 @@ func (m *Manager) shouldRetryAfterError(err error, attempt int, providers []stri
 	if status := statusCodeFromError(err); status == http.StatusOK {
 		return 0, false
 	}
-	if isRequestInvalidError(err) {
-		return 0, false
-	}
 	wait, found := m.closestCooldownWait(providers, model, attempt)
 	if !found || wait > maxWait {
 		return 0, false
@@ -1371,7 +1296,7 @@ func updateAggregatedAvailability(auth *Auth, now time.Time) {
 			stateUnavailable = true
 		} else if state.Unavailable {
 			if state.NextRetryAfter.IsZero() {
-				stateUnavailable = false
+				stateUnavailable = true
 			} else if state.NextRetryAfter.After(now) {
 				stateUnavailable = true
 				if earliestRetry.IsZero() || state.NextRetryAfter.Before(earliestRetry) {
@@ -1491,7 +1416,8 @@ func retryAfterFromError(err error) *time.Duration {
 	if retryAfter == nil {
 		return nil
 	}
-	return new(*retryAfter)
+	val := *retryAfter
+	return &val
 }
 
 func statusCodeFromResult(err *Error) int {
@@ -1501,21 +1427,6 @@ func statusCodeFromResult(err *Error) int {
 	return err.StatusCode()
 }
 
-// isRequestInvalidError returns true if the error represents a client request
-// error that should not be retried. Specifically, it checks for 400 Bad Request
-// with "invalid_request_error" in the message, indicating the request itself is
-// malformed and switching to a different auth will not help.
-func isRequestInvalidError(err error) bool {
-	if err == nil {
-		return false
-	}
-	status := statusCodeFromError(err)
-	if status != http.StatusBadRequest {
-		return false
-	}
-	return strings.Contains(err.Error(), "invalid_request_error")
-}
-
 func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Duration, now time.Time) {
 	if auth == nil {
 		return
@@ -1614,56 +1525,7 @@ func (m *Manager) GetByID(id string) (*Auth, bool) {
 	return auth.Clone(), true
 }
 
-// Executor returns the registered provider executor for a provider key.
-func (m *Manager) Executor(provider string) (ProviderExecutor, bool) {
-	if m == nil {
-		return nil, false
-	}
-	provider = strings.TrimSpace(provider)
-	if provider == "" {
-		return nil, false
-	}
-
-	m.mu.RLock()
-	executor, okExecutor := m.executors[provider]
-	if !okExecutor {
-		lowerProvider := strings.ToLower(provider)
-		if lowerProvider != provider {
-			executor, okExecutor = m.executors[lowerProvider]
-		}
-	}
-	m.mu.RUnlock()
-
-	if !okExecutor || executor == nil {
-		return nil, false
-	}
-	return executor, true
-}
-
-// CloseExecutionSession asks all registered executors to release the supplied execution session.
-func (m *Manager) CloseExecutionSession(sessionID string) {
-	sessionID = strings.TrimSpace(sessionID)
-	if m == nil || sessionID == "" {
-		return
-	}
-
-	m.mu.RLock()
-	executors := make([]ProviderExecutor, 0, len(m.executors))
-	for _, exec := range m.executors {
-		executors = append(executors, exec)
-	}
-	m.mu.RUnlock()
-
-	for i := range executors {
-		if closer, ok := executors[i].(ExecutionSessionCloser); ok && closer != nil {
-			closer.CloseExecutionSession(sessionID)
-		}
-	}
-}
-
 func (m *Manager) pickNext(ctx context.Context, provider, model string, opts cliproxyexecutor.Options, tried map[string]struct{}) (*Auth, ProviderExecutor, error) {
-	pinnedAuthID := pinnedAuthIDFromMetadata(opts.Metadata)
-
 	m.mu.RLock()
 	executor, okExecutor := m.executors[provider]
 	if !okExecutor {
@@ -1684,9 +1546,6 @@ func (m *Manager) pickNext(ctx context.Context, provider, model string, opts cli
 		if candidate.Provider != provider || candidate.Disabled {
 			continue
 		}
-		if pinnedAuthID != "" && candidate.ID != pinnedAuthID {
-			continue
-		}
 		if _, used := tried[candidate.ID]; used {
 			continue
 		}
@@ -1722,8 +1581,6 @@ func (m *Manager) pickNext(ctx context.Context, provider, model string, opts cli
 }
 
 func (m *Manager) pickNextMixed(ctx context.Context, providers []string, model string, opts cliproxyexecutor.Options, tried map[string]struct{}) (*Auth, ProviderExecutor, string, error) {
-	pinnedAuthID := pinnedAuthIDFromMetadata(opts.Metadata)
-
 	providerSet := make(map[string]struct{}, len(providers))
 	for _, provider := range providers {
 		p := strings.TrimSpace(strings.ToLower(provider))
@@ -1751,9 +1608,6 @@ func (m *Manager) pickNextMixed(ctx context.Context, providers []string, model s
 		if candidate == nil || candidate.Disabled {
 			continue
 		}
-		if pinnedAuthID != "" && candidate.ID != pinnedAuthID {
-			continue
-		}
 		providerKey := strings.TrimSpace(strings.ToLower(candidate.Provider))
 		if providerKey == "" {
 			continue
@@ -2156,9 +2010,7 @@ func (m *Manager) refreshAuth(ctx context.Context, id string) {
 		updated.Runtime = auth.Runtime
 	}
 	updated.LastRefreshedAt = now
-	// Preserve NextRefreshAfter set by the Authenticator
-	// If the Authenticator set a reasonable refresh time, it should not be overwritten
-	// If the Authenticator did not set it (zero value), shouldRefresh will use default logic
+	updated.NextRefreshAfter = time.Time{}
 	updated.LastError = nil
 	updated.UpdatedAt = now
 	_, _ = m.Update(ctx, updated)
@@ -2238,7 +2090,6 @@ func debugLogAuthSelection(entry *log.Entry, auth *Auth, provider string, model
 	}
 	switch accountType {
 	case "api_key":
-		// nolint:gosec // false positive: model alias, not actual API key
 		entry.Debugf("Use API key %s for model %s%s", util.HideAPIKey(accountInfo), model, suffix)
 	case "oauth":
 		ident := formatOauthIdentity(auth, provider, accountInfo)
diff --git a/sdk/cliproxy/auth/oauth_model_alias.go b/sdk/cliproxy/auth/oauth_model_alias.go
index 8563aac463..4111663e97 100644
--- a/sdk/cliproxy/auth/oauth_model_alias.go
+++ b/sdk/cliproxy/auth/oauth_model_alias.go
@@ -221,7 +221,7 @@ func modelAliasChannel(auth *Auth) string {
 // and auth kind. Returns empty string if the provider/authKind combination doesn't support
 // OAuth model alias (e.g., API key authentication).
 //
-// Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot, kimi.
+// Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow.
 func OAuthModelAliasChannel(provider, authKind string) string {
 	provider = strings.ToLower(strings.TrimSpace(provider))
 	authKind = strings.ToLower(strings.TrimSpace(authKind))
@@ -245,7 +245,7 @@ func OAuthModelAliasChannel(provider, authKind string) string {
 			return ""
 		}
 		return "codex"
-	case "gemini-cli", "aistudio", "antigravity", "qwen", "iflow", "kiro", "github-copilot", "kimi":
+	case "gemini-cli", "aistudio", "antigravity", "qwen", "iflow":
 		return provider
 	default:
 		return ""
diff --git a/sdk/cliproxy/auth/oauth_model_alias_test.go b/sdk/cliproxy/auth/oauth_model_alias_test.go
index e12b65975f..6956411c97 100644
--- a/sdk/cliproxy/auth/oauth_model_alias_test.go
+++ b/sdk/cliproxy/auth/oauth_model_alias_test.go
@@ -43,15 +43,6 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 			input:   "gemini-2.5-pro",
 			want:    "gemini-2.5-pro-exp-03-25",
 		},
-		{
-			name: "kiro alias resolves",
-			aliases: map[string][]internalconfig.OAuthModelAlias{
-				"kiro": {{Name: "kiro-claude-sonnet-4-5", Alias: "sonnet"}},
-			},
-			channel: "kiro",
-			input:   "sonnet",
-			want:    "kiro-claude-sonnet-4-5",
-		},
 		{
 			name: "config suffix takes priority",
 			aliases: map[string][]internalconfig.OAuthModelAlias{
@@ -79,33 +70,6 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 			input:   "gemini-2.5-pro(none)",
 			want:    "gemini-2.5-pro-exp-03-25(none)",
 		},
-		{
-			name: "github-copilot suffix preserved",
-			aliases: map[string][]internalconfig.OAuthModelAlias{
-				"github-copilot": {{Name: "claude-opus-4.6", Alias: "opus"}},
-			},
-			channel: "github-copilot",
-			input:   "opus(medium)",
-			want:    "claude-opus-4.6(medium)",
-		},
-		{
-			name: "github-copilot no suffix",
-			aliases: map[string][]internalconfig.OAuthModelAlias{
-				"github-copilot": {{Name: "claude-opus-4.6", Alias: "opus"}},
-			},
-			channel: "github-copilot",
-			input:   "opus",
-			want:    "claude-opus-4.6",
-		},
-		{
-			name: "kimi suffix preserved",
-			aliases: map[string][]internalconfig.OAuthModelAlias{
-				"kimi": {{Name: "kimi-k2.5", Alias: "k2.5"}},
-			},
-			channel: "kimi",
-			input:   "k2.5(high)",
-			want:    "kimi-k2.5(high)",
-		},
 		{
 			name: "case insensitive alias lookup with suffix",
 			aliases: map[string][]internalconfig.OAuthModelAlias{
@@ -188,41 +152,11 @@ func createAuthForChannel(channel string) *Auth {
 		return &Auth{Provider: "qwen"}
 	case "iflow":
 		return &Auth{Provider: "iflow"}
-	case "kimi":
-		return &Auth{Provider: "kimi"}
-	case "kiro":
-		return &Auth{Provider: "kiro"}
-	case "github-copilot":
-		return &Auth{Provider: "github-copilot"}
 	default:
 		return &Auth{Provider: channel}
 	}
 }
 
-func TestOAuthModelAliasChannel_Kimi(t *testing.T) {
-	t.Parallel()
-
-	if got := OAuthModelAliasChannel("kimi", "oauth"); got != "kimi" {
-		t.Fatalf("OAuthModelAliasChannel() = %q, want %q", got, "kimi")
-	}
-}
-
-func TestOAuthModelAliasChannel_GitHubCopilot(t *testing.T) {
-	t.Parallel()
-
-	if got := OAuthModelAliasChannel("github-copilot", ""); got != "github-copilot" {
-		t.Fatalf("OAuthModelAliasChannel() = %q, want %q", got, "github-copilot")
-	}
-}
-
-func TestOAuthModelAliasChannel_Kiro(t *testing.T) {
-	t.Parallel()
-
-	if got := OAuthModelAliasChannel("kiro", ""); got != "kiro" {
-		t.Fatalf("OAuthModelAliasChannel() = %q, want %q", got, "kiro")
-	}
-}
-
 func TestApplyOAuthModelAlias_SuffixPreservation(t *testing.T) {
 	t.Parallel()
 
diff --git a/sdk/cliproxy/auth/selector.go b/sdk/cliproxy/auth/selector.go
index a173ed0178..7febf219da 100644
--- a/sdk/cliproxy/auth/selector.go
+++ b/sdk/cliproxy/auth/selector.go
@@ -12,7 +12,6 @@ import (
 	"sync"
 	"time"
 
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 )
 
@@ -20,7 +19,6 @@ import (
 type RoundRobinSelector struct {
 	mu      sync.Mutex
 	cursors map[string]int
-	maxKeys int
 }
 
 // FillFirstSelector selects the first available credential (deterministic ordering).
@@ -121,75 +119,6 @@ func authPriority(auth *Auth) int {
 	return parsed
 }
 
-func canonicalModelKey(model string) string {
-	model = strings.TrimSpace(model)
-	if model == "" {
-		return ""
-	}
-	parsed := thinking.ParseSuffix(model)
-	modelName := strings.TrimSpace(parsed.ModelName)
-	if modelName == "" {
-		return model
-	}
-	return modelName
-}
-
-func authWebsocketsEnabled(auth *Auth) bool {
-	if auth == nil {
-		return false
-	}
-	if len(auth.Attributes) > 0 {
-		if raw := strings.TrimSpace(auth.Attributes["websockets"]); raw != "" {
-			parsed, errParse := strconv.ParseBool(raw)
-			if errParse == nil {
-				return parsed
-			}
-		}
-	}
-	if len(auth.Metadata) == 0 {
-		return false
-	}
-	raw, ok := auth.Metadata["websockets"]
-	if !ok || raw == nil {
-		return false
-	}
-	switch v := raw.(type) {
-	case bool:
-		return v
-	case string:
-		parsed, errParse := strconv.ParseBool(strings.TrimSpace(v))
-		if errParse == nil {
-			return parsed
-		}
-	default:
-	}
-	return false
-}
-
-func preferCodexWebsocketAuths(ctx context.Context, provider string, available []*Auth) []*Auth {
-	if len(available) == 0 {
-		return available
-	}
-	if !cliproxyexecutor.DownstreamWebsocket(ctx) {
-		return available
-	}
-	if !strings.EqualFold(strings.TrimSpace(provider), "codex") {
-		return available
-	}
-
-	wsEnabled := make([]*Auth, 0, len(available))
-	for i := 0; i < len(available); i++ {
-		candidate := available[i]
-		if authWebsocketsEnabled(candidate) {
-			wsEnabled = append(wsEnabled, candidate)
-		}
-	}
-	if len(wsEnabled) > 0 {
-		return wsEnabled
-	}
-	return available
-}
-
 func collectAvailableByPriority(auths []*Auth, model string, now time.Time) (available map[int][]*Auth, cooldownCount int, earliest time.Time) {
 	available = make(map[int][]*Auth)
 	for i := 0; i < len(auths); i++ {
@@ -249,25 +178,18 @@ func getAvailableAuths(auths []*Auth, provider, model string, now time.Time) ([]
 
 // Pick selects the next available auth for the provider in a round-robin manner.
 func (s *RoundRobinSelector) Pick(ctx context.Context, provider, model string, opts cliproxyexecutor.Options, auths []*Auth) (*Auth, error) {
+	_ = ctx
 	_ = opts
 	now := time.Now()
 	available, err := getAvailableAuths(auths, provider, model, now)
 	if err != nil {
 		return nil, err
 	}
-	available = preferCodexWebsocketAuths(ctx, provider, available)
-	key := provider + ":" + canonicalModelKey(model)
+	key := provider + ":" + model
 	s.mu.Lock()
 	if s.cursors == nil {
 		s.cursors = make(map[string]int)
 	}
-	limit := s.maxKeys
-	if limit <= 0 {
-		limit = 4096
-	}
-	if _, ok := s.cursors[key]; !ok && len(s.cursors) >= limit {
-		s.cursors = make(map[string]int)
-	}
 	index := s.cursors[key]
 
 	if index >= 2_147_483_640 {
@@ -282,13 +204,13 @@ func (s *RoundRobinSelector) Pick(ctx context.Context, provider, model string, o
 
 // Pick selects the first available auth for the provider in a deterministic manner.
 func (s *FillFirstSelector) Pick(ctx context.Context, provider, model string, opts cliproxyexecutor.Options, auths []*Auth) (*Auth, error) {
+	_ = ctx
 	_ = opts
 	now := time.Now()
 	available, err := getAvailableAuths(auths, provider, model, now)
 	if err != nil {
 		return nil, err
 	}
-	available = preferCodexWebsocketAuths(ctx, provider, available)
 	return available[0], nil
 }
 
@@ -301,14 +223,7 @@ func isAuthBlockedForModel(auth *Auth, model string, now time.Time) (bool, block
 	}
 	if model != "" {
 		if len(auth.ModelStates) > 0 {
-			state, ok := auth.ModelStates[model]
-			if (!ok || state == nil) && model != "" {
-				baseModel := canonicalModelKey(model)
-				if baseModel != "" && baseModel != model {
-					state, ok = auth.ModelStates[baseModel]
-				}
-			}
-			if ok && state != nil {
+			if state, ok := auth.ModelStates[model]; ok && state != nil {
 				if state.Status == StatusDisabled {
 					return true, blockReasonDisabled, time.Time{}
 				}
diff --git a/sdk/cliproxy/auth/selector_test.go b/sdk/cliproxy/auth/selector_test.go
index fe1cf15eb6..91a7ed14f0 100644
--- a/sdk/cliproxy/auth/selector_test.go
+++ b/sdk/cliproxy/auth/selector_test.go
@@ -2,9 +2,7 @@ package auth
 
 import (
 	"context"
-	"encoding/json"
 	"errors"
-	"net/http"
 	"sync"
 	"testing"
 	"time"
@@ -177,228 +175,3 @@ func TestRoundRobinSelectorPick_Concurrent(t *testing.T) {
 	default:
 	}
 }
-
-func TestSelectorPick_AllCooldownReturnsModelCooldownError(t *testing.T) {
-	t.Parallel()
-
-	model := "test-model"
-	now := time.Now()
-	next := now.Add(60 * time.Second)
-	auths := []*Auth{
-		{
-			ID: "a",
-			ModelStates: map[string]*ModelState{
-				model: {
-					Status:         StatusActive,
-					Unavailable:    true,
-					NextRetryAfter: next,
-					Quota: QuotaState{
-						Exceeded:      true,
-						NextRecoverAt: next,
-					},
-				},
-			},
-		},
-		{
-			ID: "b",
-			ModelStates: map[string]*ModelState{
-				model: {
-					Status:         StatusActive,
-					Unavailable:    true,
-					NextRetryAfter: next,
-					Quota: QuotaState{
-						Exceeded:      true,
-						NextRecoverAt: next,
-					},
-				},
-			},
-		},
-	}
-
-	t.Run("mixed provider redacts provider field", func(t *testing.T) {
-		t.Parallel()
-
-		selector := &FillFirstSelector{}
-		_, err := selector.Pick(context.Background(), "mixed", model, cliproxyexecutor.Options{}, auths)
-		if err == nil {
-			t.Fatalf("Pick() error = nil")
-		}
-
-		var mce *modelCooldownError
-		if !errors.As(err, &mce) {
-			t.Fatalf("Pick() error = %T, want *modelCooldownError", err)
-		}
-		if mce.StatusCode() != http.StatusTooManyRequests {
-			t.Fatalf("StatusCode() = %d, want %d", mce.StatusCode(), http.StatusTooManyRequests)
-		}
-
-		headers := mce.Headers()
-		if got := headers.Get("Retry-After"); got == "" {
-			t.Fatalf("Headers().Get(Retry-After) = empty")
-		}
-
-		var payload map[string]any
-		if err := json.Unmarshal([]byte(mce.Error()), &payload); err != nil {
-			t.Fatalf("json.Unmarshal(Error()) error = %v", err)
-		}
-		rawErr, ok := payload["error"].(map[string]any)
-		if !ok {
-			t.Fatalf("Error() payload missing error object: %v", payload)
-		}
-		if got, _ := rawErr["code"].(string); got != "model_cooldown" {
-			t.Fatalf("Error().error.code = %q, want %q", got, "model_cooldown")
-		}
-		if _, ok := rawErr["provider"]; ok {
-			t.Fatalf("Error().error.provider exists for mixed provider: %v", rawErr["provider"])
-		}
-	})
-
-	t.Run("non-mixed provider includes provider field", func(t *testing.T) {
-		t.Parallel()
-
-		selector := &FillFirstSelector{}
-		_, err := selector.Pick(context.Background(), "gemini", model, cliproxyexecutor.Options{}, auths)
-		if err == nil {
-			t.Fatalf("Pick() error = nil")
-		}
-
-		var mce *modelCooldownError
-		if !errors.As(err, &mce) {
-			t.Fatalf("Pick() error = %T, want *modelCooldownError", err)
-		}
-
-		var payload map[string]any
-		if err := json.Unmarshal([]byte(mce.Error()), &payload); err != nil {
-			t.Fatalf("json.Unmarshal(Error()) error = %v", err)
-		}
-		rawErr, ok := payload["error"].(map[string]any)
-		if !ok {
-			t.Fatalf("Error() payload missing error object: %v", payload)
-		}
-		if got, _ := rawErr["provider"].(string); got != "gemini" {
-			t.Fatalf("Error().error.provider = %q, want %q", got, "gemini")
-		}
-	})
-}
-
-func TestIsAuthBlockedForModel_UnavailableWithoutNextRetryIsNotBlocked(t *testing.T) {
-	t.Parallel()
-
-	now := time.Now()
-	model := "test-model"
-	auth := &Auth{
-		ID: "a",
-		ModelStates: map[string]*ModelState{
-			model: {
-				Status:      StatusActive,
-				Unavailable: true,
-				Quota: QuotaState{
-					Exceeded: true,
-				},
-			},
-		},
-	}
-
-	blocked, reason, next := isAuthBlockedForModel(auth, model, now)
-	if blocked {
-		t.Fatalf("blocked = true, want false")
-	}
-	if reason != blockReasonNone {
-		t.Fatalf("reason = %v, want %v", reason, blockReasonNone)
-	}
-	if !next.IsZero() {
-		t.Fatalf("next = %v, want zero", next)
-	}
-}
-
-func TestFillFirstSelectorPick_ThinkingSuffixFallsBackToBaseModelState(t *testing.T) {
-	t.Parallel()
-
-	selector := &FillFirstSelector{}
-	now := time.Now()
-
-	baseModel := "test-model"
-	requestedModel := "test-model(high)"
-
-	high := &Auth{
-		ID:         "high",
-		Attributes: map[string]string{"priority": "10"},
-		ModelStates: map[string]*ModelState{
-			baseModel: {
-				Status:         StatusActive,
-				Unavailable:    true,
-				NextRetryAfter: now.Add(30 * time.Minute),
-				Quota: QuotaState{
-					Exceeded: true,
-				},
-			},
-		},
-	}
-	low := &Auth{
-		ID:         "low",
-		Attributes: map[string]string{"priority": "0"},
-	}
-
-	got, err := selector.Pick(context.Background(), "mixed", requestedModel, cliproxyexecutor.Options{}, []*Auth{high, low})
-	if err != nil {
-		t.Fatalf("Pick() error = %v", err)
-	}
-	if got == nil {
-		t.Fatalf("Pick() auth = nil")
-	}
-	if got.ID != "low" {
-		t.Fatalf("Pick() auth.ID = %q, want %q", got.ID, "low")
-	}
-}
-
-func TestRoundRobinSelectorPick_ThinkingSuffixSharesCursor(t *testing.T) {
-	t.Parallel()
-
-	selector := &RoundRobinSelector{}
-	auths := []*Auth{
-		{ID: "b"},
-		{ID: "a"},
-	}
-
-	first, err := selector.Pick(context.Background(), "gemini", "test-model(high)", cliproxyexecutor.Options{}, auths)
-	if err != nil {
-		t.Fatalf("Pick() first error = %v", err)
-	}
-	second, err := selector.Pick(context.Background(), "gemini", "test-model(low)", cliproxyexecutor.Options{}, auths)
-	if err != nil {
-		t.Fatalf("Pick() second error = %v", err)
-	}
-	if first == nil || second == nil {
-		t.Fatalf("Pick() returned nil auth")
-	}
-	if first.ID != "a" {
-		t.Fatalf("Pick() first auth.ID = %q, want %q", first.ID, "a")
-	}
-	if second.ID != "b" {
-		t.Fatalf("Pick() second auth.ID = %q, want %q", second.ID, "b")
-	}
-}
-
-func TestRoundRobinSelectorPick_CursorKeyCap(t *testing.T) {
-	t.Parallel()
-
-	selector := &RoundRobinSelector{maxKeys: 2}
-	auths := []*Auth{{ID: "a"}}
-
-	_, _ = selector.Pick(context.Background(), "gemini", "m1", cliproxyexecutor.Options{}, auths)
-	_, _ = selector.Pick(context.Background(), "gemini", "m2", cliproxyexecutor.Options{}, auths)
-	_, _ = selector.Pick(context.Background(), "gemini", "m3", cliproxyexecutor.Options{}, auths)
-
-	selector.mu.Lock()
-	defer selector.mu.Unlock()
-
-	if selector.cursors == nil {
-		t.Fatalf("selector.cursors = nil")
-	}
-	if len(selector.cursors) != 1 {
-		t.Fatalf("len(selector.cursors) = %d, want %d", len(selector.cursors), 1)
-	}
-	if _, ok := selector.cursors["gemini:m3"]; !ok {
-		t.Fatalf("selector.cursors missing key %q", "gemini:m3")
-	}
-}
diff --git a/sdk/cliproxy/auth/types.go b/sdk/cliproxy/auth/types.go
index 88d0ea5241..b2bbe0a2ea 100644
--- a/sdk/cliproxy/auth/types.go
+++ b/sdk/cliproxy/auth/types.go
@@ -213,23 +213,6 @@ func (a *Auth) DisableCoolingOverride() (bool, bool) {
 	return false, false
 }
 
-// ToolPrefixDisabled returns whether the proxy_ tool name prefix should be
-// skipped for this auth. When true, tool names are sent to Anthropic unchanged.
-// The value is read from metadata key "tool_prefix_disabled" (or "tool-prefix-disabled").
-func (a *Auth) ToolPrefixDisabled() bool {
-	if a == nil || a.Metadata == nil {
-		return false
-	}
-	for _, key := range []string{"tool_prefix_disabled", "tool-prefix-disabled"} {
-		if val, ok := a.Metadata[key]; ok {
-			if parsed, okParse := parseBoolAny(val); okParse {
-				return parsed
-			}
-		}
-	}
-	return false
-}
-
 // RequestRetryOverride returns the auth-file scoped request_retry override when present.
 // The value is read from metadata key "request_retry" (or legacy "request-retry").
 func (a *Auth) RequestRetryOverride() (int, bool) {
@@ -346,18 +329,6 @@ func (a *Auth) AccountInfo() (string, string) {
 		}
 	}
 
-	// For GitHub provider (including github-copilot), return username
-	if strings.HasPrefix(strings.ToLower(a.Provider), "github") {
-		if a.Metadata != nil {
-			if username, ok := a.Metadata["username"].(string); ok {
-				username = strings.TrimSpace(username)
-				if username != "" {
-					return "oauth", username
-				}
-			}
-		}
-	}
-
 	// Check metadata for email first (OAuth-style auth)
 	if a.Metadata != nil {
 		if v, ok := a.Metadata["email"].(string); ok {
diff --git a/sdk/cliproxy/builder.go b/sdk/cliproxy/builder.go
index 60ca07f5fe..5eba18a01d 100644
--- a/sdk/cliproxy/builder.go
+++ b/sdk/cliproxy/builder.go
@@ -7,7 +7,6 @@ import (
 	"fmt"
 	"strings"
 
-	configaccess "github.com/router-for-me/CLIProxyAPI/v6/internal/access/config_access"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/api"
 	sdkaccess "github.com/router-for-me/CLIProxyAPI/v6/sdk/access"
 	sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
@@ -187,8 +186,11 @@ func (b *Builder) Build() (*Service, error) {
 		accessManager = sdkaccess.NewManager()
 	}
 
-	configaccess.Register(&b.cfg.SDKConfig)
-	accessManager.SetProviders(sdkaccess.RegisteredProviders())
+	providers, err := sdkaccess.BuildProviders(&b.cfg.SDKConfig)
+	if err != nil {
+		return nil, err
+	}
+	accessManager.SetProviders(providers)
 
 	coreManager := b.coreManager
 	if coreManager == nil {
diff --git a/sdk/cliproxy/executor/types.go b/sdk/cliproxy/executor/types.go
index 4ea8103947..8c11bbc463 100644
--- a/sdk/cliproxy/executor/types.go
+++ b/sdk/cliproxy/executor/types.go
@@ -10,17 +10,6 @@ import (
 // RequestedModelMetadataKey stores the client-requested model name in Options.Metadata.
 const RequestedModelMetadataKey = "requested_model"
 
-const (
-	// PinnedAuthMetadataKey locks execution to a specific auth ID.
-	PinnedAuthMetadataKey = "pinned_auth_id"
-	// SelectedAuthMetadataKey stores the auth ID selected by the scheduler.
-	SelectedAuthMetadataKey = "selected_auth_id"
-	// SelectedAuthCallbackMetadataKey carries an optional callback invoked with the selected auth ID.
-	SelectedAuthCallbackMetadataKey = "selected_auth_callback"
-	// ExecutionSessionMetadataKey identifies a long-lived downstream execution session.
-	ExecutionSessionMetadataKey = "execution_session_id"
-)
-
 // Request encapsulates the translated payload that will be sent to a provider executor.
 type Request struct {
 	// Model is the upstream model identifier after translation.
@@ -57,8 +46,6 @@ type Response struct {
 	Payload []byte
 	// Metadata exposes optional structured data for translators.
 	Metadata map[string]any
-	// Headers carries upstream HTTP response headers for passthrough to clients.
-	Headers http.Header
 }
 
 // StreamChunk represents a single streaming payload unit emitted by provider executors.
@@ -69,15 +56,6 @@ type StreamChunk struct {
 	Err error
 }
 
-// StreamResult wraps the streaming response, providing both the chunk channel
-// and the upstream HTTP response headers captured before streaming begins.
-type StreamResult struct {
-	// Headers carries upstream HTTP response headers from the initial connection.
-	Headers http.Header
-	// Chunks is the channel of streaming payload units.
-	Chunks <-chan StreamChunk
-}
-
 // StatusError represents an error that carries an HTTP-like status code.
 // Provider executors should implement this when possible to enable
 // better auth state updates on failures (e.g., 401/402/429).
diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go
index 2bd12d0ace..63eaf9ebd9 100644
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -13,7 +13,6 @@ import (
 	"time"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/api"
-	kiroauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/kiro"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/usage"
@@ -58,9 +57,6 @@ type Service struct {
 	// server is the HTTP API server instance.
 	server *api.Server
 
-	// pprofServer manages the optional pprof HTTP debug server.
-	pprofServer *pprofServer
-
 	// serverErr channel for server startup/shutdown errors.
 	serverErr chan error
 
@@ -101,16 +97,6 @@ func (s *Service) RegisterUsagePlugin(plugin usage.Plugin) {
 	usage.RegisterPlugin(plugin)
 }
 
-// GetWatcher returns the underlying WatcherWrapper instance.
-// This allows external components (e.g., RefreshManager) to interact with the watcher.
-// Returns nil if the service or watcher is not initialized.
-func (s *Service) GetWatcher() *WatcherWrapper {
-	if s == nil {
-		return nil
-	}
-	return s.watcher
-}
-
 // newDefaultAuthManager creates a default authentication manager with all supported providers.
 func newDefaultAuthManager() *sdkAuth.Manager {
 	return sdkAuth.NewManager(
@@ -284,42 +270,27 @@ func (s *Service) wsOnDisconnected(channelID string, reason error) {
 }
 
 func (s *Service) applyCoreAuthAddOrUpdate(ctx context.Context, auth *coreauth.Auth) {
-	if s == nil || s.coreManager == nil || auth == nil || auth.ID == "" {
+	if s == nil || auth == nil || auth.ID == "" {
+		return
+	}
+	if s.coreManager == nil {
 		return
 	}
 	auth = auth.Clone()
 	s.ensureExecutorsForAuth(auth)
-
-	// IMPORTANT: Update coreManager FIRST, before model registration.
-	// This ensures that configuration changes (proxy_url, prefix, etc.) take effect
-	// immediately for API calls, rather than waiting for model registration to complete.
-	// Model registration may involve network calls (e.g., FetchAntigravityModels) that
-	// could timeout if the new proxy_url is unreachable.
-	op := "register"
-	var err error
-	if existing, ok := s.coreManager.GetByID(auth.ID); ok {
+	s.registerModelsForAuth(auth)
+	if existing, ok := s.coreManager.GetByID(auth.ID); ok && existing != nil {
 		auth.CreatedAt = existing.CreatedAt
 		auth.LastRefreshedAt = existing.LastRefreshedAt
 		auth.NextRefreshAfter = existing.NextRefreshAfter
-		op = "update"
-		_, err = s.coreManager.Update(ctx, auth)
-	} else {
-		_, err = s.coreManager.Register(ctx, auth)
-	}
-	if err != nil {
-		log.Errorf("failed to %s auth %s: %v", op, auth.ID, err)
-		current, ok := s.coreManager.GetByID(auth.ID)
-		if !ok || current.Disabled {
-			GlobalModelRegistry().UnregisterClient(auth.ID)
-			return
+		if _, err := s.coreManager.Update(ctx, auth); err != nil {
+			log.Errorf("failed to update auth %s: %v", auth.ID, err)
 		}
-		auth = current
+		return
+	}
+	if _, err := s.coreManager.Register(ctx, auth); err != nil {
+		log.Errorf("failed to register auth %s: %v", auth.ID, err)
 	}
-
-	// Register models after auth is updated in coreManager.
-	// This operation may block on network calls, but the auth configuration
-	// is already effective at this point.
-	s.registerModelsForAuth(auth)
 }
 
 func (s *Service) applyCoreAuthRemoval(ctx context.Context, id string) {
@@ -336,9 +307,6 @@ func (s *Service) applyCoreAuthRemoval(ctx context.Context, id string) {
 		if _, err := s.coreManager.Update(ctx, existing); err != nil {
 			log.Errorf("failed to disable auth %s: %v", id, err)
 		}
-		if strings.EqualFold(strings.TrimSpace(existing.Provider), "codex") {
-			s.ensureExecutorsForAuth(existing)
-		}
 	}
 }
 
@@ -371,24 +339,7 @@ func openAICompatInfoFromAuth(a *coreauth.Auth) (providerKey string, compatName
 }
 
 func (s *Service) ensureExecutorsForAuth(a *coreauth.Auth) {
-	s.ensureExecutorsForAuthWithMode(a, false)
-}
-
-func (s *Service) ensureExecutorsForAuthWithMode(a *coreauth.Auth, forceReplace bool) {
-	if s == nil || s.coreManager == nil || a == nil {
-		return
-	}
-	if strings.EqualFold(strings.TrimSpace(a.Provider), "codex") {
-		if !forceReplace {
-			existingExecutor, hasExecutor := s.coreManager.Executor("codex")
-			if hasExecutor {
-				_, isCodexAutoExecutor := existingExecutor.(*executor.CodexAutoExecutor)
-				if isCodexAutoExecutor {
-					return
-				}
-			}
-		}
-		s.coreManager.RegisterExecutor(executor.NewCodexAutoExecutor(s.cfg))
+	if s == nil || a == nil {
 		return
 	}
 	// Skip disabled auth entries when (re)binding executors.
@@ -423,18 +374,12 @@ func (s *Service) ensureExecutorsForAuthWithMode(a *coreauth.Auth, forceReplace
 		s.coreManager.RegisterExecutor(executor.NewAntigravityExecutor(s.cfg))
 	case "claude":
 		s.coreManager.RegisterExecutor(executor.NewClaudeExecutor(s.cfg))
+	case "codex":
+		s.coreManager.RegisterExecutor(executor.NewCodexExecutor(s.cfg))
 	case "qwen":
 		s.coreManager.RegisterExecutor(executor.NewQwenExecutor(s.cfg))
 	case "iflow":
 		s.coreManager.RegisterExecutor(executor.NewIFlowExecutor(s.cfg))
-	case "kimi":
-		s.coreManager.RegisterExecutor(executor.NewKimiExecutor(s.cfg))
-	case "kiro":
-		s.coreManager.RegisterExecutor(executor.NewKiroExecutor(s.cfg))
-	case "kilo":
-		s.coreManager.RegisterExecutor(executor.NewKiloExecutor(s.cfg))
-	case "github-copilot":
-		s.coreManager.RegisterExecutor(executor.NewGitHubCopilotExecutor(s.cfg))
 	default:
 		providerKey := strings.ToLower(strings.TrimSpace(a.Provider))
 		if providerKey == "" {
@@ -450,15 +395,8 @@ func (s *Service) rebindExecutors() {
 		return
 	}
 	auths := s.coreManager.List()
-	reboundCodex := false
 	for _, auth := range auths {
-		if auth != nil && strings.EqualFold(strings.TrimSpace(auth.Provider), "codex") {
-			if reboundCodex {
-				continue
-			}
-			reboundCodex = true
-		}
-		s.ensureExecutorsForAuthWithMode(auth, true)
+		s.ensureExecutorsForAuth(auth)
 	}
 }
 
@@ -563,8 +501,6 @@ func (s *Service) Run(ctx context.Context) error {
 	time.Sleep(100 * time.Millisecond)
 	fmt.Printf("API server started successfully on: %s:%d\n", s.cfg.Host, s.cfg.Port)
 
-	s.applyPprofConfig(s.cfg)
-
 	if s.hooks.OnAfterStart != nil {
 		s.hooks.OnAfterStart(s)
 	}
@@ -610,7 +546,6 @@ func (s *Service) Run(ctx context.Context) error {
 		}
 
 		s.applyRetryConfig(newCfg)
-		s.applyPprofConfig(newCfg)
 		if s.server != nil {
 			s.server.UpdateClients(newCfg)
 		}
@@ -635,18 +570,6 @@ func (s *Service) Run(ctx context.Context) error {
 	}
 	watcherWrapper.SetConfig(s.cfg)
 
-	// 方案 A: 连接 Kiro 后台刷新器回调到 Watcher
-	// 当后台刷新器成功刷新 token 后，立即通知 Watcher 更新内存中的 Auth 对象
-	// 这解决了后台刷新与内存 Auth 对象之间的时间差问题
-	kiroauth.GetRefreshManager().SetOnTokenRefreshed(func(tokenID string, tokenData *kiroauth.KiroTokenData) {
-		if tokenData == nil || watcherWrapper == nil {
-			return
-		}
-		log.Debugf("kiro refresh callback: notifying watcher for token %s", tokenID)
-		watcherWrapper.NotifyTokenRefreshed(tokenID, tokenData.AccessToken, tokenData.RefreshToken, tokenData.ExpiresAt)
-	})
-	log.Debug("kiro: connected background refresh callback to watcher")
-
 	watcherCtx, watcherCancel := context.WithCancel(context.Background())
 	s.watcherCancel = watcherCancel
 	if err = watcherWrapper.Start(watcherCtx); err != nil {
@@ -716,13 +639,6 @@ func (s *Service) Shutdown(ctx context.Context) error {
 			s.authQueueStop = nil
 		}
 
-		if errShutdownPprof := s.shutdownPprof(ctx); errShutdownPprof != nil {
-			log.Errorf("failed to stop pprof server: %v", errShutdownPprof)
-			if shutdownErr == nil {
-				shutdownErr = errShutdownPprof
-			}
-		}
-
 		// no legacy clients to persist
 
 		if s.server != nil {
@@ -794,13 +710,6 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 		provider = "openai-compatibility"
 	}
 	excluded := s.oauthExcludedModels(provider, authKind)
-	// The synthesizer pre-merges per-account and global exclusions into the "excluded_models" attribute.
-	// If this attribute is present, it represents the complete list of exclusions and overrides the global config.
-	if a.Attributes != nil {
-		if val, ok := a.Attributes["excluded_models"]; ok && strings.TrimSpace(val) != "" {
-			excluded = strings.Split(val, ",")
-		}
-	}
 	var models []*ModelInfo
 	switch provider {
 	case "gemini":
@@ -862,18 +771,6 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 	case "iflow":
 		models = registry.GetIFlowModels()
 		models = applyExcludedModels(models, excluded)
-	case "kimi":
-		models = registry.GetKimiModels()
-    models = applyExcludedModels(models, excluded)
-	case "github-copilot":
-		models = registry.GetGitHubCopilotModels()
-		models = applyExcludedModels(models, excluded)
-	case "kiro":
-		models = s.fetchKiroModels(a)
-		models = applyExcludedModels(models, excluded)
-	case "kilo":
-		models = executor.FetchKiloModels(context.Background(), a, s.cfg)
-		models = applyExcludedModels(models, excluded)
 	default:
 		// Handle OpenAI-compatibility providers by name using config
 		if s.cfg != nil {
@@ -1435,216 +1332,3 @@ func applyOAuthModelAlias(cfg *config.Config, provider, authKind string, models
 	}
 	return out
 }
-
-// fetchKiroModels attempts to dynamically fetch Kiro models from the API.
-// If dynamic fetch fails, it falls back to static registry.GetKiroModels().
-func (s *Service) fetchKiroModels(a *coreauth.Auth) []*ModelInfo {
-	if a == nil {
-		log.Debug("kiro: auth is nil, using static models")
-		return registry.GetKiroModels()
-	}
-
-	// Extract token data from auth attributes
-	tokenData := s.extractKiroTokenData(a)
-	if tokenData == nil || tokenData.AccessToken == "" {
-		log.Debug("kiro: no valid token data in auth, using static models")
-		return registry.GetKiroModels()
-	}
-
-	// Create KiroAuth instance
-	kAuth := kiroauth.NewKiroAuth(s.cfg)
-	if kAuth == nil {
-		log.Warn("kiro: failed to create KiroAuth instance, using static models")
-		return registry.GetKiroModels()
-	}
-
-	// Use timeout context for API call
-	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
-	defer cancel()
-
-	// Attempt to fetch dynamic models
-	apiModels, err := kAuth.ListAvailableModels(ctx, tokenData)
-	if err != nil {
-		log.Warnf("kiro: failed to fetch dynamic models: %v, using static models", err)
-		return registry.GetKiroModels()
-	}
-
-	if len(apiModels) == 0 {
-		log.Debug("kiro: API returned no models, using static models")
-		return registry.GetKiroModels()
-	}
-
-	// Convert API models to ModelInfo
-	models := convertKiroAPIModels(apiModels)
-
-	// Generate agentic variants
-	models = generateKiroAgenticVariants(models)
-
-	log.Infof("kiro: successfully fetched %d models from API (including agentic variants)", len(models))
-	return models
-}
-
-// extractKiroTokenData extracts KiroTokenData from auth attributes and metadata.
-// It supports both config-based tokens (stored in Attributes) and file-based tokens (stored in Metadata).
-func (s *Service) extractKiroTokenData(a *coreauth.Auth) *kiroauth.KiroTokenData {
-	if a == nil {
-		return nil
-	}
-
-	var accessToken, profileArn, refreshToken string
-
-	// Priority 1: Try to get from Attributes (config.yaml source)
-	if a.Attributes != nil {
-		accessToken = strings.TrimSpace(a.Attributes["access_token"])
-		profileArn = strings.TrimSpace(a.Attributes["profile_arn"])
-		refreshToken = strings.TrimSpace(a.Attributes["refresh_token"])
-	}
-
-	// Priority 2: If not found in Attributes, try Metadata (JSON file source)
-	if accessToken == "" && a.Metadata != nil {
-		if at, ok := a.Metadata["access_token"].(string); ok {
-			accessToken = strings.TrimSpace(at)
-		}
-		if pa, ok := a.Metadata["profile_arn"].(string); ok {
-			profileArn = strings.TrimSpace(pa)
-		}
-		if rt, ok := a.Metadata["refresh_token"].(string); ok {
-			refreshToken = strings.TrimSpace(rt)
-		}
-	}
-
-	// access_token is required
-	if accessToken == "" {
-		return nil
-	}
-
-	return &kiroauth.KiroTokenData{
-		AccessToken:  accessToken,
-		ProfileArn:   profileArn,
-		RefreshToken: refreshToken,
-	}
-}
-
-// convertKiroAPIModels converts Kiro API models to ModelInfo slice.
-func convertKiroAPIModels(apiModels []*kiroauth.KiroModel) []*ModelInfo {
-	if len(apiModels) == 0 {
-		return nil
-	}
-
-	now := time.Now().Unix()
-	models := make([]*ModelInfo, 0, len(apiModels))
-
-	for _, m := range apiModels {
-		if m == nil || m.ModelID == "" {
-			continue
-		}
-
-		// Create model ID with kiro- prefix
-		modelID := "kiro-" + normalizeKiroModelID(m.ModelID)
-
-		info := &ModelInfo{
-			ID:                  modelID,
-			Object:              "model",
-			Created:             now,
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         formatKiroDisplayName(m.ModelName, m.RateMultiplier),
-			Description:         m.Description,
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &registry.ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
-		}
-
-		if m.MaxInputTokens > 0 {
-			info.ContextLength = m.MaxInputTokens
-		}
-
-		models = append(models, info)
-	}
-
-	return models
-}
-
-// normalizeKiroModelID normalizes a Kiro model ID by converting dots to dashes
-// and removing common prefixes.
-func normalizeKiroModelID(modelID string) string {
-	// Remove common prefixes
-	modelID = strings.TrimPrefix(modelID, "anthropic.")
-	modelID = strings.TrimPrefix(modelID, "amazon.")
-
-	// Replace dots with dashes for consistency
-	modelID = strings.ReplaceAll(modelID, ".", "-")
-
-	// Replace underscores with dashes
-	modelID = strings.ReplaceAll(modelID, "_", "-")
-
-	return strings.ToLower(modelID)
-}
-
-// formatKiroDisplayName formats the display name with rate multiplier info.
-func formatKiroDisplayName(modelName string, rateMultiplier float64) string {
-	if modelName == "" {
-		return ""
-	}
-
-	displayName := "Kiro " + modelName
-	if rateMultiplier > 0 && rateMultiplier != 1.0 {
-		displayName += fmt.Sprintf(" (%.1fx credit)", rateMultiplier)
-	}
-
-	return displayName
-}
-
-// generateKiroAgenticVariants generates agentic variants for Kiro models.
-// Agentic variants have optimized system prompts for coding agents.
-func generateKiroAgenticVariants(models []*ModelInfo) []*ModelInfo {
-	if len(models) == 0 {
-		return models
-	}
-
-	result := make([]*ModelInfo, 0, len(models)*2)
-	result = append(result, models...)
-
-	for _, m := range models {
-		if m == nil {
-			continue
-		}
-
-		// Skip if already an agentic variant
-		if strings.HasSuffix(m.ID, "-agentic") {
-			continue
-		}
-
-		// Skip auto models from agentic variant generation
-		if strings.Contains(m.ID, "-auto") {
-			continue
-		}
-
-		// Create agentic variant
-		agentic := &ModelInfo{
-			ID:                  m.ID + "-agentic",
-			Object:              m.Object,
-			Created:             m.Created,
-			OwnedBy:             m.OwnedBy,
-			Type:                m.Type,
-			DisplayName:         m.DisplayName + " (Agentic)",
-			Description:         m.Description + " - Optimized for coding agents (chunked writes)",
-			ContextLength:       m.ContextLength,
-			MaxCompletionTokens: m.MaxCompletionTokens,
-		}
-
-		// Copy thinking support if present
-		if m.Thinking != nil {
-			agentic.Thinking = &registry.ThinkingSupport{
-				Min:            m.Thinking.Min,
-				Max:            m.Thinking.Max,
-				ZeroAllowed:    m.Thinking.ZeroAllowed,
-				DynamicAllowed: m.Thinking.DynamicAllowed,
-			}
-		}
-
-		result = append(result, agentic)
-	}
-
-	return result
-}
diff --git a/sdk/cliproxy/service_oauth_model_alias_test.go b/sdk/cliproxy/service_oauth_model_alias_test.go
index 2f90d1dfb0..2caf7a178f 100644
--- a/sdk/cliproxy/service_oauth_model_alias_test.go
+++ b/sdk/cliproxy/service_oauth_model_alias_test.go
@@ -90,26 +90,3 @@ func TestApplyOAuthModelAlias_ForkAddsMultipleAliases(t *testing.T) {
 		t.Fatalf("expected forked model name %q, got %q", "models/g5-2", out[2].Name)
 	}
 }
-
-func TestApplyOAuthModelAlias_DefaultGitHubCopilotAliasViaSanitize(t *testing.T) {
-	cfg := &config.Config{}
-	cfg.SanitizeOAuthModelAlias()
-
-	models := []*ModelInfo{
-		{ID: "claude-opus-4.6", Name: "models/claude-opus-4.6"},
-	}
-
-	out := applyOAuthModelAlias(cfg, "github-copilot", "oauth", models)
-	if len(out) != 2 {
-		t.Fatalf("expected 2 models (original + default alias), got %d", len(out))
-	}
-	if out[0].ID != "claude-opus-4.6" {
-		t.Fatalf("expected first model id %q, got %q", "claude-opus-4.6", out[0].ID)
-	}
-	if out[1].ID != "claude-opus-4-6" {
-		t.Fatalf("expected second model id %q, got %q", "claude-opus-4-6", out[1].ID)
-	}
-	if out[1].Name != "models/claude-opus-4-6" {
-		t.Fatalf("expected aliased model name %q, got %q", "models/claude-opus-4-6", out[1].Name)
-	}
-}
diff --git a/sdk/cliproxy/types.go b/sdk/cliproxy/types.go
index ee8f761d08..1521dffee4 100644
--- a/sdk/cliproxy/types.go
+++ b/sdk/cliproxy/types.go
@@ -89,7 +89,6 @@ type WatcherWrapper struct {
 	snapshotAuths         func() []*coreauth.Auth
 	setUpdateQueue        func(queue chan<- watcher.AuthUpdate)
 	dispatchRuntimeUpdate func(update watcher.AuthUpdate) bool
-	notifyTokenRefreshed  func(tokenID, accessToken, refreshToken, expiresAt string) // 方案 A: 后台刷新通知
 }
 
 // Start proxies to the underlying watcher Start implementation.
@@ -147,16 +146,3 @@ func (w *WatcherWrapper) SetAuthUpdateQueue(queue chan<- watcher.AuthUpdate) {
 	}
 	w.setUpdateQueue(queue)
 }
-
-// NotifyTokenRefreshed 通知 Watcher 后台刷新器已更新 token
-// 这是方案 A 的核心方法，用于解决后台刷新与内存 Auth 对象的时间差问题
-// tokenID: token 文件名（如 kiro-xxx.json）
-// accessToken: 新的 access token
-// refreshToken: 新的 refresh token
-// expiresAt: 新的过期时间（RFC3339 格式）
-func (w *WatcherWrapper) NotifyTokenRefreshed(tokenID, accessToken, refreshToken, expiresAt string) {
-	if w == nil || w.notifyTokenRefreshed == nil {
-		return
-	}
-	w.notifyTokenRefreshed(tokenID, accessToken, refreshToken, expiresAt)
-}
diff --git a/sdk/cliproxy/watcher.go b/sdk/cliproxy/watcher.go
index e6e91bddb7..caeadf19b9 100644
--- a/sdk/cliproxy/watcher.go
+++ b/sdk/cliproxy/watcher.go
@@ -31,8 +31,5 @@ func defaultWatcherFactory(configPath, authDir string, reload func(*config.Confi
 		dispatchRuntimeUpdate: func(update watcher.AuthUpdate) bool {
 			return w.DispatchRuntimeAuthUpdate(update)
 		},
-		notifyTokenRefreshed: func(tokenID, accessToken, refreshToken, expiresAt string) {
-			w.NotifyTokenRefreshed(tokenID, accessToken, refreshToken, expiresAt)
-		},
 	}, nil
 }
diff --git a/sdk/config/config.go b/sdk/config/config.go
index 14163418f7..a9b5c2c3e5 100644
--- a/sdk/config/config.go
+++ b/sdk/config/config.go
@@ -7,6 +7,8 @@ package config
 import internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 
 type SDKConfig = internalconfig.SDKConfig
+type AccessConfig = internalconfig.AccessConfig
+type AccessProvider = internalconfig.AccessProvider
 
 type Config = internalconfig.Config
 
@@ -32,9 +34,15 @@ type OpenAICompatibilityModel = internalconfig.OpenAICompatibilityModel
 type TLS = internalconfig.TLSConfig
 
 const (
-	DefaultPanelGitHubRepository = internalconfig.DefaultPanelGitHubRepository
+	AccessProviderTypeConfigAPIKey = internalconfig.AccessProviderTypeConfigAPIKey
+	DefaultAccessProviderName      = internalconfig.DefaultAccessProviderName
+	DefaultPanelGitHubRepository   = internalconfig.DefaultPanelGitHubRepository
 )
 
+func MakeInlineAPIKeyProvider(keys []string) *AccessProvider {
+	return internalconfig.MakeInlineAPIKeyProvider(keys)
+}
+
 func LoadConfig(configFile string) (*Config, error) { return internalconfig.LoadConfig(configFile) }
 
 func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
diff --git a/test/builtin_tools_translation_test.go b/test/builtin_tools_translation_test.go
index 07d7671544..b4ca7b0da6 100644
--- a/test/builtin_tools_translation_test.go
+++ b/test/builtin_tools_translation_test.go
@@ -33,7 +33,7 @@ func TestOpenAIToCodex_PreservesBuiltinTools(t *testing.T) {
 	}
 }
 
-func TestOpenAIResponsesToOpenAI_IgnoresBuiltinTools(t *testing.T) {
+func TestOpenAIResponsesToOpenAI_PreservesBuiltinTools(t *testing.T) {
 	in := []byte(`{
 		"model":"gpt-5",
 		"input":[{"role":"user","content":[{"type":"input_text","text":"hi"}]}],
@@ -42,7 +42,13 @@ func TestOpenAIResponsesToOpenAI_IgnoresBuiltinTools(t *testing.T) {
 
 	out := sdktranslator.TranslateRequest(sdktranslator.FormatOpenAIResponse, sdktranslator.FormatOpenAI, "gpt-5", in, false)
 
-	if got := gjson.GetBytes(out, "tools.#").Int(); got != 0 {
-		t.Fatalf("expected 0 tools (builtin tools not supported in Chat Completions), got %d: %s", got, string(out))
+	if got := gjson.GetBytes(out, "tools.#").Int(); got != 1 {
+		t.Fatalf("expected 1 tool, got %d: %s", got, string(out))
+	}
+	if got := gjson.GetBytes(out, "tools.0.type").String(); got != "web_search" {
+		t.Fatalf("expected tools[0].type=web_search, got %q: %s", got, string(out))
+	}
+	if got := gjson.GetBytes(out, "tools.0.search_context_size").String(); got != "low" {
+		t.Fatalf("expected tools[0].search_context_size=low, got %q: %s", got, string(out))
 	}
 }
diff --git a/test/config_migration_test.go b/test/config_migration_test.go
new file mode 100644
index 0000000000..2ed8788277
--- /dev/null
+++ b/test/config_migration_test.go
@@ -0,0 +1,195 @@
+package test
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+)
+
+func TestLegacyConfigMigration(t *testing.T) {
+	t.Run("onlyLegacyFields", func(t *testing.T) {
+		path := writeConfig(t, `
+port: 8080
+generative-language-api-key:
+  - "legacy-gemini-1"
+openai-compatibility:
+  - name: "legacy-provider"
+    base-url: "https://example.com"
+    api-keys:
+      - "legacy-openai-1"
+amp-upstream-url: "https://amp.example.com"
+amp-upstream-api-key: "amp-legacy-key"
+amp-restrict-management-to-localhost: false
+amp-model-mappings:
+  - from: "old-model"
+    to: "new-model"
+`)
+		cfg, err := config.LoadConfig(path)
+		if err != nil {
+			t.Fatalf("load legacy config: %v", err)
+		}
+		if got := len(cfg.GeminiKey); got != 1 || cfg.GeminiKey[0].APIKey != "legacy-gemini-1" {
+			t.Fatalf("gemini migration mismatch: %+v", cfg.GeminiKey)
+		}
+		if got := len(cfg.OpenAICompatibility); got != 1 {
+			t.Fatalf("expected 1 openai-compat provider, got %d", got)
+		}
+		if entries := cfg.OpenAICompatibility[0].APIKeyEntries; len(entries) != 1 || entries[0].APIKey != "legacy-openai-1" {
+			t.Fatalf("openai-compat migration mismatch: %+v", entries)
+		}
+		if cfg.AmpCode.UpstreamURL != "https://amp.example.com" || cfg.AmpCode.UpstreamAPIKey != "amp-legacy-key" {
+			t.Fatalf("amp migration failed: %+v", cfg.AmpCode)
+		}
+		if cfg.AmpCode.RestrictManagementToLocalhost {
+			t.Fatalf("expected amp restriction to be false after migration")
+		}
+		if got := len(cfg.AmpCode.ModelMappings); got != 1 || cfg.AmpCode.ModelMappings[0].From != "old-model" {
+			t.Fatalf("amp mappings migration mismatch: %+v", cfg.AmpCode.ModelMappings)
+		}
+		updated := readFile(t, path)
+		if strings.Contains(updated, "generative-language-api-key") {
+			t.Fatalf("legacy gemini key still present:\n%s", updated)
+		}
+		if strings.Contains(updated, "amp-upstream-url") || strings.Contains(updated, "amp-restrict-management-to-localhost") {
+			t.Fatalf("legacy amp keys still present:\n%s", updated)
+		}
+		if strings.Contains(updated, "\n    api-keys:") {
+			t.Fatalf("legacy openai compat keys still present:\n%s", updated)
+		}
+	})
+
+	t.Run("mixedLegacyAndNewFields", func(t *testing.T) {
+		path := writeConfig(t, `
+gemini-api-key:
+  - api-key: "new-gemini"
+generative-language-api-key:
+  - "new-gemini"
+  - "legacy-gemini-only"
+openai-compatibility:
+  - name: "mixed-provider"
+    base-url: "https://mixed.example.com"
+    api-key-entries:
+      - api-key: "new-entry"
+    api-keys:
+      - "legacy-entry"
+      - "new-entry"
+`)
+		cfg, err := config.LoadConfig(path)
+		if err != nil {
+			t.Fatalf("load mixed config: %v", err)
+		}
+		if got := len(cfg.GeminiKey); got != 2 {
+			t.Fatalf("expected 2 gemini entries, got %d: %+v", got, cfg.GeminiKey)
+		}
+		seen := make(map[string]struct{}, len(cfg.GeminiKey))
+		for _, entry := range cfg.GeminiKey {
+			if _, exists := seen[entry.APIKey]; exists {
+				t.Fatalf("duplicate gemini key %q after migration", entry.APIKey)
+			}
+			seen[entry.APIKey] = struct{}{}
+		}
+		provider := cfg.OpenAICompatibility[0]
+		if got := len(provider.APIKeyEntries); got != 2 {
+			t.Fatalf("expected 2 openai entries, got %d: %+v", got, provider.APIKeyEntries)
+		}
+		entrySeen := make(map[string]struct{}, len(provider.APIKeyEntries))
+		for _, entry := range provider.APIKeyEntries {
+			if _, ok := entrySeen[entry.APIKey]; ok {
+				t.Fatalf("duplicate openai key %q after migration", entry.APIKey)
+			}
+			entrySeen[entry.APIKey] = struct{}{}
+		}
+	})
+
+	t.Run("onlyNewFields", func(t *testing.T) {
+		path := writeConfig(t, `
+gemini-api-key:
+  - api-key: "new-only"
+openai-compatibility:
+  - name: "new-only-provider"
+    base-url: "https://new-only.example.com"
+    api-key-entries:
+      - api-key: "new-only-entry"
+ampcode:
+  upstream-url: "https://amp.new"
+  upstream-api-key: "new-amp-key"
+  restrict-management-to-localhost: true
+  model-mappings:
+    - from: "a"
+      to: "b"
+`)
+		cfg, err := config.LoadConfig(path)
+		if err != nil {
+			t.Fatalf("load new config: %v", err)
+		}
+		if len(cfg.GeminiKey) != 1 || cfg.GeminiKey[0].APIKey != "new-only" {
+			t.Fatalf("unexpected gemini entries: %+v", cfg.GeminiKey)
+		}
+		if len(cfg.OpenAICompatibility) != 1 || len(cfg.OpenAICompatibility[0].APIKeyEntries) != 1 {
+			t.Fatalf("unexpected openai compat entries: %+v", cfg.OpenAICompatibility)
+		}
+		if cfg.AmpCode.UpstreamURL != "https://amp.new" || cfg.AmpCode.UpstreamAPIKey != "new-amp-key" {
+			t.Fatalf("unexpected amp config: %+v", cfg.AmpCode)
+		}
+	})
+
+	t.Run("duplicateNamesDifferentBase", func(t *testing.T) {
+		path := writeConfig(t, `
+openai-compatibility:
+  - name: "dup-provider"
+    base-url: "https://provider-a"
+    api-keys:
+      - "key-a"
+  - name: "dup-provider"
+    base-url: "https://provider-b"
+    api-keys:
+      - "key-b"
+`)
+		cfg, err := config.LoadConfig(path)
+		if err != nil {
+			t.Fatalf("load duplicate config: %v", err)
+		}
+		if len(cfg.OpenAICompatibility) != 2 {
+			t.Fatalf("expected 2 providers, got %d", len(cfg.OpenAICompatibility))
+		}
+		for _, entry := range cfg.OpenAICompatibility {
+			if len(entry.APIKeyEntries) != 1 {
+				t.Fatalf("expected 1 key entry per provider: %+v", entry)
+			}
+			switch entry.BaseURL {
+			case "https://provider-a":
+				if entry.APIKeyEntries[0].APIKey != "key-a" {
+					t.Fatalf("provider-a key mismatch: %+v", entry.APIKeyEntries)
+				}
+			case "https://provider-b":
+				if entry.APIKeyEntries[0].APIKey != "key-b" {
+					t.Fatalf("provider-b key mismatch: %+v", entry.APIKeyEntries)
+				}
+			default:
+				t.Fatalf("unexpected provider base url: %s", entry.BaseURL)
+			}
+		}
+	})
+}
+
+func writeConfig(t *testing.T, content string) string {
+	t.Helper()
+	dir := t.TempDir()
+	path := filepath.Join(dir, "config.yaml")
+	if err := os.WriteFile(path, []byte(strings.TrimSpace(content)+"\n"), 0o644); err != nil {
+		t.Fatalf("write temp config: %v", err)
+	}
+	return path
+}
+
+func readFile(t *testing.T, path string) string {
+	t.Helper()
+	data, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read temp config: %v", err)
+	}
+	return string(data)
+}
diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go
index e7beb1a351..fc20199ed4 100644
--- a/test/thinking_conversion_test.go
+++ b/test/thinking_conversion_test.go
@@ -15,7 +15,6 @@ import (
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow"
-	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/kimi"
 	_ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
@@ -1316,122 +1315,6 @@ func TestThinkingE2EMatrix_Suffix(t *testing.T) {
 			includeThoughts: "true",
 			expectErr:       false,
 		},
-
-		// GitHub Copilot tests: gpt-5, gpt-5.1, gpt-5.2 (Levels=low/medium/high, some with none/xhigh)
-		// Testing /chat/completions endpoint (openai format) - with suffix
-
-		// Case 112: OpenAI to gpt-5, level high → high
-		{
-			name:        "112",
-			from:        "openai",
-			to:          "github-copilot",
-			model:       "gpt-5(high)",
-			inputJSON:   `{"model":"gpt-5(high)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning_effort",
-			expectValue: "high",
-			expectErr:   false,
-		},
-		// Case 113: OpenAI to gpt-5, level none → clamped to low (ZeroAllowed=false)
-		{
-			name:        "113",
-			from:        "openai",
-			to:          "github-copilot",
-			model:       "gpt-5(none)",
-			inputJSON:   `{"model":"gpt-5(none)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning_effort",
-			expectValue: "low",
-			expectErr:   false,
-		},
-		// Case 114: OpenAI to gpt-5.1, level none → none (ZeroAllowed=true)
-		{
-			name:        "114",
-			from:        "openai",
-			to:          "github-copilot",
-			model:       "gpt-5.1(none)",
-			inputJSON:   `{"model":"gpt-5.1(none)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning_effort",
-			expectValue: "none",
-			expectErr:   false,
-		},
-		// Case 115: OpenAI to gpt-5.2, level xhigh → xhigh
-		{
-			name:        "115",
-			from:        "openai",
-			to:          "github-copilot",
-			model:       "gpt-5.2(xhigh)",
-			inputJSON:   `{"model":"gpt-5.2(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning_effort",
-			expectValue: "xhigh",
-			expectErr:   false,
-		},
-		// Case 116: OpenAI to gpt-5, level xhigh (out of range) → error
-		{
-			name:        "116",
-			from:        "openai",
-			to:          "github-copilot",
-			model:       "gpt-5(xhigh)",
-			inputJSON:   `{"model":"gpt-5(xhigh)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "",
-			expectErr:   true,
-		},
-		// Case 117: Claude to gpt-5.1, budget 0 → none (ZeroAllowed=true)
-		{
-			name:        "117",
-			from:        "claude",
-			to:          "github-copilot",
-			model:       "gpt-5.1(0)",
-			inputJSON:   `{"model":"gpt-5.1(0)","messages":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning_effort",
-			expectValue: "none",
-			expectErr:   false,
-		},
-
-		// GitHub Copilot tests: /responses endpoint (codex format) - with suffix
-
-		// Case 118: OpenAI-Response to gpt-5-codex, level high → high
-		{
-			name:        "118",
-			from:        "openai-response",
-			to:          "github-copilot",
-			model:       "gpt-5-codex(high)",
-			inputJSON:   `{"model":"gpt-5-codex(high)","input":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "high",
-			expectErr:   false,
-		},
-		// Case 119: OpenAI-Response to gpt-5.2-codex, level xhigh → xhigh
-		{
-			name:        "119",
-			from:        "openai-response",
-			to:          "github-copilot",
-			model:       "gpt-5.2-codex(xhigh)",
-			inputJSON:   `{"model":"gpt-5.2-codex(xhigh)","input":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "xhigh",
-			expectErr:   false,
-		},
-		// Case 120: OpenAI-Response to gpt-5.2-codex, level none → none
-		{
-			name:        "120",
-			from:        "openai-response",
-			to:          "github-copilot",
-			model:       "gpt-5.2-codex(none)",
-			inputJSON:   `{"model":"gpt-5.2-codex(none)","input":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "none",
-			expectErr:   false,
-		},
-		// Case 121: OpenAI-Response to gpt-5-codex, level none → clamped to low (ZeroAllowed=false)
-		{
-			name:        "121",
-			from:        "openai-response",
-			to:          "github-copilot",
-			model:       "gpt-5-codex(none)",
-			inputJSON:   `{"model":"gpt-5-codex(none)","input":[{"role":"user","content":"hi"}]}`,
-			expectField: "reasoning.effort",
-			expectValue: "low",
-			expectErr:   false,
-		},
 	}
 
 	runThinkingTests(t, cases)
@@ -2701,251 +2584,6 @@ func TestThinkingE2EMatrix_Body(t *testing.T) {
 			includeThoughts: "true",
 			expectErr:       false,
 		},
-
-		// GitHub Copilot tests: gpt-5, gpt-5.1, gpt-5.2 (Levels=low/medium/high, some with none/xhigh)
-		// Testing /chat/completions endpoint (openai format) - with body params
-
-		// Case 112: OpenAI to gpt-5, reasoning_effort=high → high
-		{
-			name:        "112",
-			from:        "openai",
-			to:          "github-copilot",
-			model:       "gpt-5",
-			inputJSON:   `{"model":"gpt-5","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"high"}`,
-			expectField: "reasoning_effort",
-			expectValue: "high",
-			expectErr:   false,
-		},
-		// Case 113: OpenAI to gpt-5, reasoning_effort=none → clamped to low (ZeroAllowed=false)
-		{
-			name:        "113",
-			from:        "openai",
-			to:          "github-copilot",
-			model:       "gpt-5",
-			inputJSON:   `{"model":"gpt-5","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`,
-			expectField: "reasoning_effort",
-			expectValue: "low",
-			expectErr:   false,
-		},
-		// Case 114: OpenAI to gpt-5.1, reasoning_effort=none → none (ZeroAllowed=true)
-		{
-			name:        "114",
-			from:        "openai",
-			to:          "github-copilot",
-			model:       "gpt-5.1",
-			inputJSON:   `{"model":"gpt-5.1","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"none"}`,
-			expectField: "reasoning_effort",
-			expectValue: "none",
-			expectErr:   false,
-		},
-		// Case 115: OpenAI to gpt-5.2, reasoning_effort=xhigh → xhigh
-		{
-			name:        "115",
-			from:        "openai",
-			to:          "github-copilot",
-			model:       "gpt-5.2",
-			inputJSON:   `{"model":"gpt-5.2","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"xhigh"}`,
-			expectField: "reasoning_effort",
-			expectValue: "xhigh",
-			expectErr:   false,
-		},
-		// Case 116: OpenAI to gpt-5, reasoning_effort=xhigh (out of range) → error
-		{
-			name:        "116",
-			from:        "openai",
-			to:          "github-copilot",
-			model:       "gpt-5",
-			inputJSON:   `{"model":"gpt-5","messages":[{"role":"user","content":"hi"}],"reasoning_effort":"xhigh"}`,
-			expectField: "",
-			expectErr:   true,
-		},
-		// Case 117: Claude to gpt-5.1, thinking.budget_tokens=0 → none (ZeroAllowed=true)
-		{
-			name:        "117",
-			from:        "claude",
-			to:          "github-copilot",
-			model:       "gpt-5.1",
-			inputJSON:   `{"model":"gpt-5.1","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"enabled","budget_tokens":0}}`,
-			expectField: "reasoning_effort",
-			expectValue: "none",
-			expectErr:   false,
-		},
-
-		// GitHub Copilot tests: /responses endpoint (codex format) - with body params
-
-		// Case 118: OpenAI-Response to gpt-5-codex, reasoning.effort=high → high
-		{
-			name:        "118",
-			from:        "openai-response",
-			to:          "github-copilot",
-			model:       "gpt-5-codex",
-			inputJSON:   `{"model":"gpt-5-codex","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"high"}}`,
-			expectField: "reasoning.effort",
-			expectValue: "high",
-			expectErr:   false,
-		},
-		// Case 119: OpenAI-Response to gpt-5.2-codex, reasoning.effort=xhigh → xhigh
-		{
-			name:        "119",
-			from:        "openai-response",
-			to:          "github-copilot",
-			model:       "gpt-5.2-codex",
-			inputJSON:   `{"model":"gpt-5.2-codex","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"xhigh"}}`,
-			expectField: "reasoning.effort",
-			expectValue: "xhigh",
-			expectErr:   false,
-		},
-		// Case 120: OpenAI-Response to gpt-5.2-codex, reasoning.effort=none → none
-		{
-			name:        "120",
-			from:        "openai-response",
-			to:          "github-copilot",
-			model:       "gpt-5.2-codex",
-			inputJSON:   `{"model":"gpt-5.2-codex","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"none"}}`,
-			expectField: "reasoning.effort",
-			expectValue: "none",
-			expectErr:   false,
-		},
-		// Case 121: OpenAI-Response to gpt-5-codex, reasoning.effort=none → clamped to low (ZeroAllowed=false)
-		{
-			name:        "121",
-			from:        "openai-response",
-			to:          "github-copilot",
-			model:       "gpt-5-codex",
-			inputJSON:   `{"model":"gpt-5-codex","input":[{"role":"user","content":"hi"}],"reasoning":{"effort":"none"}}`,
-			expectField: "reasoning.effort",
-			expectValue: "low",
-			expectErr:   false,
-		},
-	}
-
-	runThinkingTests(t, cases)
-}
-
-// TestThinkingE2EClaudeAdaptive_Body tests Claude thinking.type=adaptive extended body-only cases.
-// These cases validate that adaptive means "thinking enabled without explicit budget", and
-// cross-protocol conversion should resolve to target-model maximum thinking capability.
-func TestThinkingE2EClaudeAdaptive_Body(t *testing.T) {
-	reg := registry.GetGlobalRegistry()
-	uid := fmt.Sprintf("thinking-e2e-claude-adaptive-%d", time.Now().UnixNano())
-
-	reg.RegisterClient(uid, "test", getTestModels())
-	defer reg.UnregisterClient(uid)
-
-	cases := []thinkingTestCase{
-		// A1: Claude adaptive to OpenAI level model -> highest supported level
-		{
-			name:        "A1",
-			from:        "claude",
-			to:          "openai",
-			model:       "level-model",
-			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"adaptive"}}`,
-			expectField: "reasoning_effort",
-			expectValue: "high",
-			expectErr:   false,
-		},
-		// A2: Claude adaptive to Gemini level subset model -> highest supported level
-		{
-			name:            "A2",
-			from:            "claude",
-			to:              "gemini",
-			model:           "level-subset-model",
-			inputJSON:       `{"model":"level-subset-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"adaptive"}}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
-			expectValue:     "high",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		// A3: Claude adaptive to Gemini budget model -> max budget
-		{
-			name:            "A3",
-			from:            "claude",
-			to:              "gemini",
-			model:           "gemini-budget-model",
-			inputJSON:       `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"adaptive"}}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "20000",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		// A4: Claude adaptive to Gemini mixed model -> highest supported level
-		{
-			name:            "A4",
-			from:            "claude",
-			to:              "gemini",
-			model:           "gemini-mixed-model",
-			inputJSON:       `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"adaptive"}}`,
-			expectField:     "generationConfig.thinkingConfig.thinkingLevel",
-			expectValue:     "high",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		// A5: Claude adaptive passthrough for same protocol
-		{
-			name:        "A5",
-			from:        "claude",
-			to:          "claude",
-			model:       "claude-budget-model",
-			inputJSON:   `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"adaptive"}}`,
-			expectField: "thinking.type",
-			expectValue: "adaptive",
-			expectErr:   false,
-		},
-		// A6: Claude adaptive to Antigravity budget model -> max budget
-		{
-			name:            "A6",
-			from:            "claude",
-			to:              "antigravity",
-			model:           "antigravity-budget-model",
-			inputJSON:       `{"model":"antigravity-budget-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"adaptive"}}`,
-			expectField:     "request.generationConfig.thinkingConfig.thinkingBudget",
-			expectValue:     "20000",
-			includeThoughts: "true",
-			expectErr:       false,
-		},
-		// A7: Claude adaptive to iFlow GLM -> enabled boolean
-		{
-			name:        "A7",
-			from:        "claude",
-			to:          "iflow",
-			model:       "glm-test",
-			inputJSON:   `{"model":"glm-test","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"adaptive"}}`,
-			expectField: "chat_template_kwargs.enable_thinking",
-			expectValue: "true",
-			expectErr:   false,
-		},
-		// A8: Claude adaptive to iFlow MiniMax -> enabled boolean
-		{
-			name:        "A8",
-			from:        "claude",
-			to:          "iflow",
-			model:       "minimax-test",
-			inputJSON:   `{"model":"minimax-test","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"adaptive"}}`,
-			expectField: "reasoning_split",
-			expectValue: "true",
-			expectErr:   false,
-		},
-		// A9: Claude adaptive to Codex level model -> highest supported level
-		{
-			name:        "A9",
-			from:        "claude",
-			to:          "codex",
-			model:       "level-model",
-			inputJSON:   `{"model":"level-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"adaptive"}}`,
-			expectField: "reasoning.effort",
-			expectValue: "high",
-			expectErr:   false,
-		},
-		// A10: Claude adaptive on non-thinking model should still be stripped
-		{
-			name:        "A10",
-			from:        "claude",
-			to:          "openai",
-			model:       "no-thinking-model",
-			inputJSON:   `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}],"thinking":{"type":"adaptive"}}`,
-			expectField: "",
-			expectErr:   false,
-		},
 	}
 
 	runThinkingTests(t, cases)
@@ -3045,51 +2683,6 @@ func getTestModels() []*registry.ModelInfo {
 			DisplayName: "MiniMax Test Model",
 			Thinking:    &registry.ThinkingSupport{Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}},
 		},
-		{
-			ID:          "gpt-5",
-			Object:      "model",
-			Created:     1700000000,
-			OwnedBy:     "github-copilot",
-			Type:        "github-copilot",
-			DisplayName: "GPT-5",
-			Thinking:    &registry.ThinkingSupport{Levels: []string{"low", "medium", "high"}, ZeroAllowed: false, DynamicAllowed: false},
-		},
-		{
-			ID:          "gpt-5.1",
-			Object:      "model",
-			Created:     1700000000,
-			OwnedBy:     "github-copilot",
-			Type:        "github-copilot",
-			DisplayName: "GPT-5.1",
-			Thinking:    &registry.ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}, ZeroAllowed: true, DynamicAllowed: false},
-		},
-		{
-			ID:          "gpt-5.2",
-			Object:      "model",
-			Created:     1700000000,
-			OwnedBy:     "github-copilot",
-			Type:        "github-copilot",
-			DisplayName: "GPT-5.2",
-			Thinking:    &registry.ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}, ZeroAllowed: true, DynamicAllowed: false},
-		},
-		{
-			ID:          "gpt-5-codex",
-			Object:      "model",
-			Created:     1700000000,
-			OwnedBy:     "github-copilot",
-			Type:        "github-copilot",
-			DisplayName: "GPT-5 Codex",
-			Thinking:    &registry.ThinkingSupport{Levels: []string{"low", "medium", "high"}, ZeroAllowed: false, DynamicAllowed: false},
-		},
-		{
-			ID:          "gpt-5.2-codex",
-			Object:      "model",
-			Created:     1700000000,
-			OwnedBy:     "github-copilot",
-			Type:        "github-copilot",
-			DisplayName: "GPT-5.2 Codex",
-			Thinking:    &registry.ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}, ZeroAllowed: true, DynamicAllowed: false},
-		},
 	}
 }
 
@@ -3108,15 +2701,6 @@ func runThinkingTests(t *testing.T, cases []thinkingTestCase) {
 				translateTo = "openai"
 				applyTo = "iflow"
 			}
-			if tc.to == "github-copilot" {
-				if tc.from == "openai-response" {
-					translateTo = "codex"
-					applyTo = "codex"
-				} else {
-					translateTo = "openai"
-					applyTo = "openai"
-				}
-			}
 
 			body := sdktranslator.TranslateRequest(
 				sdktranslator.FromString(tc.from),