Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# v0.5.4 (2026-06-18)

## Features
- **Ponytail**: inject a lazy-senior-dev (minimal-code) system prompt to curb over-engineering; dashboard toggle next to Caveman, lite/full levels. Caveman and Ponytail now share one format-aware system-prompt injector.

## Fixes
- **Kiro**: honor thinking effort budgets
- **AG/Kiro/Xiaomi**: provider fixes
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ Default URLs:
|---------|--------------|----------------|
| 🚀 **RTK Token Saver** ([RTK](https://github.com/rtk-ai/rtk) ⭐40K) | Compress tool outputs (`git diff`, `grep`, `ls`, `tree`...) before sending to LLM | Save **20-40% input tokens** per request |
| 🪨 **Caveman Mode** ([Caveman](https://github.com/JuliusBrussee/caveman) ⭐52K) | Inject caveman-speak prompt → LLM replies terse, technical substance preserved | Save **up to 65% output tokens** |
| 🐴 **Ponytail Mode** ([Ponytail](https://github.com/DietrichGebert/ponytail) ⭐38K) | Inject lazy-senior-dev prompt → LLM writes the minimum code that works, never cutting validation/security | **Less over-engineering**, fewer files |
| 🎯 **Smart 3-Tier Fallback** | Auto-route: Subscription → Cheap → Free | Never stop coding, zero downtime |
| 📊 **Real-Time Quota Tracking** | Live token count + reset countdown | Maximize subscription value |
| 🔄 **Format Translation** | OpenAI ↔ Claude ↔ Gemini ↔ Cursor ↔ Kiro ↔ Vertex | Works with any CLI tool |
Expand Down
9 changes: 8 additions & 1 deletion open-sse/handlers/chatCore.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import { handleStreamingResponse, buildOnStreamComplete } from "./chatCore/strea
import { detectClientTool, isNativePassthrough } from "../utils/clientDetector.js";
import { dedupeTools } from "../utils/toolDeduper.js";
import { injectCaveman } from "../rtk/caveman.js";
import { injectPonytail } from "../rtk/ponytail.js";
import { compressMessages, formatRtkLog } from "../rtk/index.js";
import { getCapabilitiesForModel } from "../providers/capabilities.js";
import { stripUnsupportedModalities } from "../translator/concerns/modality.js";
Expand All @@ -32,7 +33,7 @@ import { prefetchRemoteImages } from "../translator/concerns/prefetch.js";
* @param {object} options.credentials - Provider credentials
* @param {string} options.sourceFormatOverride - Override detected source format (e.g. "openai-responses")
*/
export async function handleChatCore({ body, modelInfo, credentials, log, onCredentialsRefreshed, onRequestSuccess, onDisconnect, clientRawRequest, connectionId, userAgent, apiKey, ccFilterNaming, rtkEnabled, cavemanEnabled, cavemanLevel, sourceFormatOverride, providerThinking }) {
export async function handleChatCore({ body, modelInfo, credentials, log, onCredentialsRefreshed, onRequestSuccess, onDisconnect, clientRawRequest, connectionId, userAgent, apiKey, ccFilterNaming, rtkEnabled, cavemanEnabled, cavemanLevel, ponytailEnabled, ponytailLevel, sourceFormatOverride, providerThinking }) {
const { provider, model } = modelInfo;
const requestStartTime = Date.now();

Expand Down Expand Up @@ -155,6 +156,12 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred
log?.debug?.("CAVEMAN", `${cavemanLevel} | ${finalFormat}`);
}

// Ponytail: inject minimal-code (lazy senior dev) system prompt
if (ponytailEnabled && ponytailLevel) {
injectPonytail(translatedBody, finalFormat, ponytailLevel);
log?.debug?.("PONYTAIL", `${ponytailLevel} | ${finalFormat}`);
}

const executor = getExecutor(provider);
trackPendingRequest(model, provider, connectionId, true);
appendRequestLog({ model, provider, connectionId, status: "PENDING" }).catch(() => { });
Expand Down
96 changes: 3 additions & 93 deletions open-sse/rtk/caveman.js
Original file line number Diff line number Diff line change
@@ -1,100 +1,10 @@
// Caveman injector: appends a caveman-style instruction into the system message
// of the final request body, just before it is dispatched to the provider executor.
// Dispatches by format so it works for both translated and native-passthrough flows.
// Format-aware injection lives in the shared systemInject helper.

import { FORMATS } from "../translator/formats.js";
import { injectSystemPrompt } from "./systemInject.js";
import { CAVEMAN_PROMPTS } from "./cavemanPrompts.js";

const SEP = "\n\n";

export function injectCaveman(body, format, level) {
const prompt = CAVEMAN_PROMPTS[level];
if (!body || !prompt) return;

switch (format) {
case FORMATS.CLAUDE:
injectClaudeSystem(body, prompt);
return;
case FORMATS.GEMINI:
case FORMATS.GEMINI_CLI:
case FORMATS.VERTEX:
case FORMATS.ANTIGRAVITY:
// Antigravity wraps Gemini shape in body.request → injectGeminiSystem handles it
injectGeminiSystem(body, prompt);
return;
default:
// OpenAI and OpenAI-shaped formats (responses/codex/cursor/kiro/ollama)
injectMessagesSystem(body, prompt);
}
}

// OpenAI-shaped: messages[] (chat) or input[] (responses) or instructions (responses string)
function injectMessagesSystem(body, prompt) {
// OpenAI Responses API: top-level string field
if (typeof body.instructions === "string") {
body.instructions = body.instructions
? `${body.instructions}${SEP}${prompt}`
: prompt;
return;
}

const arr = Array.isArray(body.messages) ? body.messages
: Array.isArray(body.input) ? body.input
: null;
if (!arr) return;

const idx = arr.findIndex(m => m && (m.role === "system" || m.role === "developer"));
if (idx >= 0) {
appendToOpenAIMessage(arr[idx], prompt);
} else {
arr.unshift({ role: "system", content: prompt });
}
}

function appendToOpenAIMessage(msg, prompt) {
if (typeof msg.content === "string") {
msg.content = `${msg.content}${SEP}${prompt}`;
} else if (Array.isArray(msg.content)) {
// Responses-style array of parts {type:"input_text"|"text", text}
msg.content.push({ type: "input_text", text: prompt });
} else {
msg.content = prompt;
}
}

// Claude shape: body.system as string | array of {type:"text", text}
// Insert before the last cache_control block to keep caveman inside the cached prefix.
function injectClaudeSystem(body, prompt) {
if (typeof body.system === "string" && body.system.length > 0) {
body.system = `${body.system}${SEP}${prompt}`;
return;
}
if (Array.isArray(body.system)) {
const block = { type: "text", text: prompt };
let lastCacheIdx = -1;
for (let i = body.system.length - 1; i >= 0; i--) {
if (body.system[i]?.cache_control) { lastCacheIdx = i; break; }
}
if (lastCacheIdx >= 0) {
body.system.splice(lastCacheIdx, 0, block);
} else {
body.system.push(block);
}
return;
}
body.system = prompt;
}

// Gemini shape: body.system_instruction | body.systemInstruction | body.request.systemInstruction
// Each shape: { parts: [{ text }] }
function injectGeminiSystem(body, prompt) {
const target = body.request && typeof body.request === "object" ? body.request : body;
const useSnake = Object.prototype.hasOwnProperty.call(target, "system_instruction");
const key = useSnake ? "system_instruction" : "systemInstruction";
const sys = target[key];
if (sys && Array.isArray(sys.parts)) {
sys.parts.push({ text: prompt });
return;
}
target[key] = { parts: [{ text: prompt }] };
injectSystemPrompt(body, format, CAVEMAN_PROMPTS[level]);
}
11 changes: 11 additions & 0 deletions open-sse/rtk/ponytail.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Ponytail injector: appends a "lazy senior dev / write minimal code" instruction
// into the system message of the final request body, just before it is dispatched
// to the provider executor. Format-aware injection lives in the shared
// systemInject helper (same as caveman).

import { injectSystemPrompt } from "./systemInject.js";
import { PONYTAIL_PROMPTS } from "./ponytailPrompts.js";

export function injectPonytail(body, format, level) {
injectSystemPrompt(body, format, PONYTAIL_PROMPTS[level]);
}
33 changes: 33 additions & 0 deletions open-sse/rtk/ponytailPrompts.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Ponytail intensity-level prompts injected into the system message to curb
// over-engineering (less code, fewer deps, fewer files) while never cutting
// validation, error handling, security or accessibility.
// Adapted from the ponytail ruleset (https://github.com/DietrichGebert/ponytail, MIT).

export const PONYTAIL_LEVELS = {
LITE: "lite",
FULL: "full",
};

const SHARED_LADDER = "Before writing code, stop at the first rung that holds: 1. Does this need to exist? (YAGNI) 2. Stdlib does it? Use it. 3. Native platform feature? Use it. 4. Installed dependency? Use it. 5. One line? One line. 6. Only then: the minimum that works.";

const SHARED_GUARDS = "Never lazy about: input validation at trust boundaries, error handling that prevents data loss, security, accessibility, anything explicitly requested. Lazy means less code, not the flimsier algorithm.";

const SHARED_PERSISTENCE = "ACTIVE EVERY RESPONSE. No revert after many turns. Still active if unsure.";

export const PONYTAIL_PROMPTS = {
[PONYTAIL_LEVELS.LITE]: [
"Act like a lazy senior dev: lazy means efficient, not careless. Prefer the smallest change that fully solves the task.",
"No abstractions, dependencies, or boilerplate that were not requested. Deletion over addition. Boring over clever.",
SHARED_GUARDS,
SHARED_PERSISTENCE,
].join(" "),

[PONYTAIL_LEVELS.FULL]: [
"Act like the laziest senior dev in the room. The best code is the code never written.",
SHARED_LADDER,
"No abstractions that were not requested. No new dependency if it can be avoided. No boilerplate nobody asked for. Deletion over addition. Boring over clever. Fewest files possible.",
"Question complex requests: \"Do you actually need X, or does Y cover it?\" Mark intentional simplifications with a `ponytail:` comment naming any known ceiling.",
SHARED_GUARDS,
SHARED_PERSISTENCE,
].join(" "),
};
102 changes: 102 additions & 0 deletions open-sse/rtk/systemInject.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Shared system-prompt injector: appends an instruction string into the system
// message of the final request body, just before it is dispatched to the provider
// executor. Dispatches by format so it works for both translated and
// native-passthrough flows. Used by both caveman (terse output) and ponytail
// (minimal code) token savers so the format-handling logic lives in one place.

import { FORMATS } from "../translator/formats.js";

const SEP = "\n\n";

// Append `prompt` to the system instruction of `body`, picking the right shape
// for `format`. No-op when body or prompt is missing.
export function injectSystemPrompt(body, format, prompt) {
if (!body || !prompt) return;

switch (format) {
case FORMATS.CLAUDE:
injectClaudeSystem(body, prompt);
return;
case FORMATS.GEMINI:
case FORMATS.GEMINI_CLI:
case FORMATS.VERTEX:
case FORMATS.ANTIGRAVITY:
// Antigravity wraps Gemini shape in body.request → injectGeminiSystem handles it
injectGeminiSystem(body, prompt);
return;
default:
// OpenAI and OpenAI-shaped formats (responses/codex/cursor/kiro/ollama)
injectMessagesSystem(body, prompt);
}
}

// OpenAI-shaped: messages[] (chat) or input[] (responses) or instructions (responses string)
function injectMessagesSystem(body, prompt) {
// OpenAI Responses API: top-level string field
if (typeof body.instructions === "string") {
body.instructions = body.instructions
? `${body.instructions}${SEP}${prompt}`
: prompt;
return;
}

const arr = Array.isArray(body.messages) ? body.messages
: Array.isArray(body.input) ? body.input
: null;
if (!arr) return;

const idx = arr.findIndex(m => m && (m.role === "system" || m.role === "developer"));
if (idx >= 0) {
appendToOpenAIMessage(arr[idx], prompt);
} else {
arr.unshift({ role: "system", content: prompt });
}
}

function appendToOpenAIMessage(msg, prompt) {
if (typeof msg.content === "string") {
msg.content = `${msg.content}${SEP}${prompt}`;
} else if (Array.isArray(msg.content)) {
// Responses-style array of parts {type:"input_text"|"text", text}
msg.content.push({ type: "input_text", text: prompt });
} else {
msg.content = prompt;
}
}

// Claude shape: body.system as string | array of {type:"text", text}
// Insert before the last cache_control block to keep the prompt inside the cached prefix.
function injectClaudeSystem(body, prompt) {
if (typeof body.system === "string" && body.system.length > 0) {
body.system = `${body.system}${SEP}${prompt}`;
return;
}
if (Array.isArray(body.system)) {
const block = { type: "text", text: prompt };
let lastCacheIdx = -1;
for (let i = body.system.length - 1; i >= 0; i--) {
if (body.system[i]?.cache_control) { lastCacheIdx = i; break; }
}
if (lastCacheIdx >= 0) {
body.system.splice(lastCacheIdx, 0, block);
} else {
body.system.push(block);
}
return;
}
body.system = prompt;
}

// Gemini shape: body.system_instruction | body.systemInstruction | body.request.systemInstruction
// Each shape: { parts: [{ text }] }
function injectGeminiSystem(body, prompt) {
const target = body.request && typeof body.request === "object" ? body.request : body;
const useSnake = Object.prototype.hasOwnProperty.call(target, "system_instruction");
const key = useSnake ? "system_instruction" : "systemInstruction";
const sys = target[key];
if (sys && Array.isArray(sys.parts)) {
sys.parts.push({ text: prompt });
return;
}
target[key] = { parts: [{ text: prompt }] };
}
62 changes: 62 additions & 0 deletions src/app/(dashboard)/dashboard/endpoint/EndpointPageClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
REACHABLE_MISS_THRESHOLD,
CLIENT_PING_FAST_MS,
CAVEMAN_LEVELS,
PONYTAIL_LEVELS,
} from "./endpointConstants";
import { clientPingUrl, clientPingAny } from "./endpointPing";
import EndpointRow from "./components/EndpointRow";
Expand All @@ -35,6 +36,8 @@ export default function APIPageClient({ machineId }) {
const [rtkEnabled, setRtkEnabledState] = useState(true);
const [cavemanEnabled, setCavemanEnabled] = useState(false);
const [cavemanLevel, setCavemanLevel] = useState("full");
const [ponytailEnabled, setPonytailEnabled] = useState(false);
const [ponytailLevel, setPonytailLevel] = useState("full");
const [locale, setLocale] = useState("en");

// Cloudflare Tunnel state
Expand Down Expand Up @@ -234,6 +237,8 @@ export default function APIPageClient({ machineId }) {
setRtkEnabledState(data.rtkEnabled !== false);
setCavemanEnabled(!!data.cavemanEnabled);
setCavemanLevel(data.cavemanLevel || "full");
setPonytailEnabled(!!data.ponytailEnabled);
setPonytailLevel(data.ponytailLevel || "full");
}
if (statusRes.ok) {
const data = await statusRes.json();
Expand Down Expand Up @@ -318,6 +323,16 @@ export default function APIPageClient({ machineId }) {
patchSetting({ cavemanLevel: level });
};

const handlePonytailEnabled = (value) => {
setPonytailEnabled(value);
patchSetting({ ponytailEnabled: value });
};

const handlePonytailLevel = (level) => {
setPonytailLevel(level);
patchSetting({ ponytailLevel: level });
};

const fetchData = async () => {
try {
const keysRes = await fetch("/api/keys");
Expand Down Expand Up @@ -1090,6 +1105,53 @@ export default function APIPageClient({ machineId }) {
/>
</div>
</div>
<div className="flex items-center justify-between pt-4 gap-4 flex-wrap border-t border-border">
<div className="min-w-0 flex-1">
<p className="font-medium">
Write less code{" "}
<a
href="https://github.com/DietrichGebert/ponytail"
target="_blank"
rel="noreferrer"
className="text-xs font-normal text-primary underline hover:opacity-80"
>
(Ponytail)
</a>
</p>
<p className="text-sm text-text-muted">
Lazy-senior-dev system prompt → less over-engineering, fewer files
</p>
</div>
<div className="flex items-center gap-3 shrink-0">
{ponytailEnabled && (
<div className="flex flex-col items-end gap-1">
<div className="flex items-center gap-1.5">
{PONYTAIL_LEVELS.map((lvl) => (
<button
key={lvl.id}
onClick={() => handlePonytailLevel(lvl.id)}
className={`px-3 py-1.5 rounded text-xs font-medium border transition-colors ${
ponytailLevel === lvl.id
? "bg-primary text-white border-primary"
: "bg-transparent border-border text-text-muted hover:bg-surface-2"
}`}
title={lvl.desc}
>
{lvl.label}
</button>
))}
</div>
<p className="text-xs text-primary">
{PONYTAIL_LEVELS.find((lvl) => lvl.id === ponytailLevel)?.desc}
</p>
</div>
)}
<Toggle
checked={ponytailEnabled}
onChange={() => handlePonytailEnabled(!ponytailEnabled)}
/>
</div>
</div>
</Card>

{/* API Keys */}
Expand Down
Loading