decolua · enjoyer-hub · Jun 19, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # v0.5.4 (2026-06-18)
 
+## Features
+- **Ponytail**: inject a lazy-senior-dev (minimal-code) system prompt to curb over-engineering; dashboard toggle next to Caveman, lite/full levels. Caveman and Ponytail now share one format-aware system-prompt injector.
+
 ## Fixes
 - **Kiro**: honor thinking effort budgets
 - **AG/Kiro/Xiaomi**: provider fixes

diff --git a/README.md b/README.md
@@ -408,6 +408,7 @@ Default URLs:
 |---------|--------------|----------------|
 | 🚀 **RTK Token Saver** ([RTK](https://github.com/rtk-ai/rtk) ⭐40K) | Compress tool outputs (`git diff`, `grep`, `ls`, `tree`...) before sending to LLM | Save **20-40% input tokens** per request |
 | 🪨 **Caveman Mode** ([Caveman](https://github.com/JuliusBrussee/caveman) ⭐52K) | Inject caveman-speak prompt → LLM replies terse, technical substance preserved | Save **up to 65% output tokens** |
+| 🐴 **Ponytail Mode** ([Ponytail](https://github.com/DietrichGebert/ponytail) ⭐38K) | Inject lazy-senior-dev prompt → LLM writes the minimum code that works, never cutting validation/security | **Less over-engineering**, fewer files |
 | 🎯 **Smart 3-Tier Fallback** | Auto-route: Subscription → Cheap → Free | Never stop coding, zero downtime |
 | 📊 **Real-Time Quota Tracking** | Live token count + reset countdown | Maximize subscription value |
 | 🔄 **Format Translation** | OpenAI ↔ Claude ↔ Gemini ↔ Cursor ↔ Kiro ↔ Vertex | Works with any CLI tool |

diff --git a/open-sse/handlers/chatCore.js b/open-sse/handlers/chatCore.js
@@ -20,6 +20,7 @@ import { handleStreamingResponse, buildOnStreamComplete } from "./chatCore/strea
 import { detectClientTool, isNativePassthrough } from "../utils/clientDetector.js";
 import { dedupeTools } from "../utils/toolDeduper.js";
 import { injectCaveman } from "../rtk/caveman.js";
+import { injectPonytail } from "../rtk/ponytail.js";
 import { compressMessages, formatRtkLog } from "../rtk/index.js";
 import { getCapabilitiesForModel } from "../providers/capabilities.js";
 import { stripUnsupportedModalities } from "../translator/concerns/modality.js";
@@ -32,7 +33,7 @@ import { prefetchRemoteImages } from "../translator/concerns/prefetch.js";
  * @param {object} options.credentials - Provider credentials
  * @param {string} options.sourceFormatOverride - Override detected source format (e.g. "openai-responses")
  */
-export async function handleChatCore({ body, modelInfo, credentials, log, onCredentialsRefreshed, onRequestSuccess, onDisconnect, clientRawRequest, connectionId, userAgent, apiKey, ccFilterNaming, rtkEnabled, cavemanEnabled, cavemanLevel, sourceFormatOverride, providerThinking }) {
+export async function handleChatCore({ body, modelInfo, credentials, log, onCredentialsRefreshed, onRequestSuccess, onDisconnect, clientRawRequest, connectionId, userAgent, apiKey, ccFilterNaming, rtkEnabled, cavemanEnabled, cavemanLevel, ponytailEnabled, ponytailLevel, sourceFormatOverride, providerThinking }) {
   const { provider, model } = modelInfo;
   const requestStartTime = Date.now();
 
@@ -155,6 +156,12 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred
     log?.debug?.("CAVEMAN", `${cavemanLevel} | ${finalFormat}`);
   }
 
+  // Ponytail: inject minimal-code (lazy senior dev) system prompt
+  if (ponytailEnabled && ponytailLevel) {
+    injectPonytail(translatedBody, finalFormat, ponytailLevel);
+    log?.debug?.("PONYTAIL", `${ponytailLevel} | ${finalFormat}`);
+  }
+
   const executor = getExecutor(provider);
   trackPendingRequest(model, provider, connectionId, true);
   appendRequestLog({ model, provider, connectionId, status: "PENDING" }).catch(() => { });

diff --git a/open-sse/rtk/caveman.js b/open-sse/rtk/caveman.js
@@ -1,100 +1,10 @@
 // Caveman injector: appends a caveman-style instruction into the system message
 // of the final request body, just before it is dispatched to the provider executor.
-// Dispatches by format so it works for both translated and native-passthrough flows.
+// Format-aware injection lives in the shared systemInject helper.
 
-import { FORMATS } from "../translator/formats.js";
+import { injectSystemPrompt } from "./systemInject.js";
 import { CAVEMAN_PROMPTS } from "./cavemanPrompts.js";
 
-const SEP = "\n\n";
-
 export function injectCaveman(body, format, level) {
-  const prompt = CAVEMAN_PROMPTS[level];
-  if (!body || !prompt) return;
-
-  switch (format) {
-    case FORMATS.CLAUDE:
-      injectClaudeSystem(body, prompt);
-      return;
-    case FORMATS.GEMINI:
-    case FORMATS.GEMINI_CLI:
-    case FORMATS.VERTEX:
-    case FORMATS.ANTIGRAVITY:
-      // Antigravity wraps Gemini shape in body.request → injectGeminiSystem handles it
-      injectGeminiSystem(body, prompt);
-      return;
-    default:
-      // OpenAI and OpenAI-shaped formats (responses/codex/cursor/kiro/ollama)
-      injectMessagesSystem(body, prompt);
-  }
-}
-
-// OpenAI-shaped: messages[] (chat) or input[] (responses) or instructions (responses string)
-function injectMessagesSystem(body, prompt) {
-  // OpenAI Responses API: top-level string field
-  if (typeof body.instructions === "string") {
-    body.instructions = body.instructions
-      ? `${body.instructions}${SEP}${prompt}`
-      : prompt;
-    return;
-  }
-
-  const arr = Array.isArray(body.messages) ? body.messages
-    : Array.isArray(body.input) ? body.input
-    : null;
-  if (!arr) return;
-
-  const idx = arr.findIndex(m => m && (m.role === "system" || m.role === "developer"));
-  if (idx >= 0) {
-    appendToOpenAIMessage(arr[idx], prompt);
-  } else {
-    arr.unshift({ role: "system", content: prompt });
-  }
-}
-
-function appendToOpenAIMessage(msg, prompt) {
-  if (typeof msg.content === "string") {
-    msg.content = `${msg.content}${SEP}${prompt}`;
-  } else if (Array.isArray(msg.content)) {
-    // Responses-style array of parts {type:"input_text"|"text", text}
-    msg.content.push({ type: "input_text", text: prompt });
-  } else {
-    msg.content = prompt;
-  }
-}
-
-// Claude shape: body.system as string | array of {type:"text", text}
-// Insert before the last cache_control block to keep caveman inside the cached prefix.
-function injectClaudeSystem(body, prompt) {
-  if (typeof body.system === "string" && body.system.length > 0) {
-    body.system = `${body.system}${SEP}${prompt}`;
-    return;
-  }
-  if (Array.isArray(body.system)) {
-    const block = { type: "text", text: prompt };
-    let lastCacheIdx = -1;
-    for (let i = body.system.length - 1; i >= 0; i--) {
-      if (body.system[i]?.cache_control) { lastCacheIdx = i; break; }
-    }
-    if (lastCacheIdx >= 0) {
-      body.system.splice(lastCacheIdx, 0, block);
-    } else {
-      body.system.push(block);
-    }
-    return;
-  }
-  body.system = prompt;
-}
-
-// Gemini shape: body.system_instruction | body.systemInstruction | body.request.systemInstruction
-// Each shape: { parts: [{ text }] }
-function injectGeminiSystem(body, prompt) {
-  const target = body.request && typeof body.request === "object" ? body.request : body;
-  const useSnake = Object.prototype.hasOwnProperty.call(target, "system_instruction");
-  const key = useSnake ? "system_instruction" : "systemInstruction";
-  const sys = target[key];
-  if (sys && Array.isArray(sys.parts)) {
-    sys.parts.push({ text: prompt });
-    return;
-  }
-  target[key] = { parts: [{ text: prompt }] };
+  injectSystemPrompt(body, format, CAVEMAN_PROMPTS[level]);
 }
diff --git a/open-sse/rtk/ponytail.js b/open-sse/rtk/ponytail.js
@@ -0,0 +1,11 @@
+// Ponytail injector: appends a "lazy senior dev / write minimal code" instruction
+// into the system message of the final request body, just before it is dispatched
+// to the provider executor. Format-aware injection lives in the shared
+// systemInject helper (same as caveman).
+
+import { injectSystemPrompt } from "./systemInject.js";
+import { PONYTAIL_PROMPTS } from "./ponytailPrompts.js";
+
+export function injectPonytail(body, format, level) {
+  injectSystemPrompt(body, format, PONYTAIL_PROMPTS[level]);
+}
diff --git a/open-sse/rtk/ponytailPrompts.js b/open-sse/rtk/ponytailPrompts.js
@@ -0,0 +1,33 @@
+// Ponytail intensity-level prompts injected into the system message to curb
+// over-engineering (less code, fewer deps, fewer files) while never cutting
+// validation, error handling, security or accessibility.
+// Adapted from the ponytail ruleset (https://github.com/DietrichGebert/ponytail, MIT).
+
+export const PONYTAIL_LEVELS = {
+  LITE: "lite",
+  FULL: "full",
+};
+
+const SHARED_LADDER = "Before writing code, stop at the first rung that holds: 1. Does this need to exist? (YAGNI) 2. Stdlib does it? Use it. 3. Native platform feature? Use it. 4. Installed dependency? Use it. 5. One line? One line. 6. Only then: the minimum that works.";
+
+const SHARED_GUARDS = "Never lazy about: input validation at trust boundaries, error handling that prevents data loss, security, accessibility, anything explicitly requested. Lazy means less code, not the flimsier algorithm.";
+
+const SHARED_PERSISTENCE = "ACTIVE EVERY RESPONSE. No revert after many turns. Still active if unsure.";
+
+export const PONYTAIL_PROMPTS = {
+  [PONYTAIL_LEVELS.LITE]: [
+    "Act like a lazy senior dev: lazy means efficient, not careless. Prefer the smallest change that fully solves the task.",
+    "No abstractions, dependencies, or boilerplate that were not requested. Deletion over addition. Boring over clever.",
+    SHARED_GUARDS,
+    SHARED_PERSISTENCE,
+  ].join(" "),
+
+  [PONYTAIL_LEVELS.FULL]: [
+    "Act like the laziest senior dev in the room. The best code is the code never written.",
+    SHARED_LADDER,
+    "No abstractions that were not requested. No new dependency if it can be avoided. No boilerplate nobody asked for. Deletion over addition. Boring over clever. Fewest files possible.",
+    "Question complex requests: \"Do you actually need X, or does Y cover it?\" Mark intentional simplifications with a `ponytail:` comment naming any known ceiling.",
+    SHARED_GUARDS,
+    SHARED_PERSISTENCE,
+  ].join(" "),
+};
diff --git a/open-sse/rtk/systemInject.js b/open-sse/rtk/systemInject.js
@@ -0,0 +1,102 @@
+// Shared system-prompt injector: appends an instruction string into the system
+// message of the final request body, just before it is dispatched to the provider
+// executor. Dispatches by format so it works for both translated and
+// native-passthrough flows. Used by both caveman (terse output) and ponytail
+// (minimal code) token savers so the format-handling logic lives in one place.
+
+import { FORMATS } from "../translator/formats.js";
+
+const SEP = "\n\n";
+
+// Append `prompt` to the system instruction of `body`, picking the right shape
+// for `format`. No-op when body or prompt is missing.
+export function injectSystemPrompt(body, format, prompt) {
+  if (!body || !prompt) return;
+
+  switch (format) {
+    case FORMATS.CLAUDE:
+      injectClaudeSystem(body, prompt);
+      return;
+    case FORMATS.GEMINI:
+    case FORMATS.GEMINI_CLI:
+    case FORMATS.VERTEX:
+    case FORMATS.ANTIGRAVITY:
+      // Antigravity wraps Gemini shape in body.request → injectGeminiSystem handles it
+      injectGeminiSystem(body, prompt);
+      return;
+    default:
+      // OpenAI and OpenAI-shaped formats (responses/codex/cursor/kiro/ollama)
+      injectMessagesSystem(body, prompt);
+  }
+}
+
+// OpenAI-shaped: messages[] (chat) or input[] (responses) or instructions (responses string)
+function injectMessagesSystem(body, prompt) {
+  // OpenAI Responses API: top-level string field
+  if (typeof body.instructions === "string") {
+    body.instructions = body.instructions
+      ? `${body.instructions}${SEP}${prompt}`
+      : prompt;
+    return;
+  }
+
+  const arr = Array.isArray(body.messages) ? body.messages
+    : Array.isArray(body.input) ? body.input
+    : null;
+  if (!arr) return;
+
+  const idx = arr.findIndex(m => m && (m.role === "system" || m.role === "developer"));
+  if (idx >= 0) {
+    appendToOpenAIMessage(arr[idx], prompt);
+  } else {
+    arr.unshift({ role: "system", content: prompt });
+  }
+}
+
+function appendToOpenAIMessage(msg, prompt) {
+  if (typeof msg.content === "string") {
+    msg.content = `${msg.content}${SEP}${prompt}`;
+  } else if (Array.isArray(msg.content)) {
+    // Responses-style array of parts {type:"input_text"|"text", text}
+    msg.content.push({ type: "input_text", text: prompt });
+  } else {
+    msg.content = prompt;
+  }
+}
+
+// Claude shape: body.system as string | array of {type:"text", text}
+// Insert before the last cache_control block to keep the prompt inside the cached prefix.
+function injectClaudeSystem(body, prompt) {
+  if (typeof body.system === "string" && body.system.length > 0) {
+    body.system = `${body.system}${SEP}${prompt}`;
+    return;
+  }
+  if (Array.isArray(body.system)) {
+    const block = { type: "text", text: prompt };
+    let lastCacheIdx = -1;
+    for (let i = body.system.length - 1; i >= 0; i--) {
+      if (body.system[i]?.cache_control) { lastCacheIdx = i; break; }
+    }
+    if (lastCacheIdx >= 0) {
+      body.system.splice(lastCacheIdx, 0, block);
+    } else {
+      body.system.push(block);
+    }
+    return;
+  }
+  body.system = prompt;
+}
+
+// Gemini shape: body.system_instruction | body.systemInstruction | body.request.systemInstruction
+// Each shape: { parts: [{ text }] }
+function injectGeminiSystem(body, prompt) {
+  const target = body.request && typeof body.request === "object" ? body.request : body;
+  const useSnake = Object.prototype.hasOwnProperty.call(target, "system_instruction");
+  const key = useSnake ? "system_instruction" : "systemInstruction";
+  const sys = target[key];
+  if (sys && Array.isArray(sys.parts)) {
+    sys.parts.push({ text: prompt });
+    return;
+  }
+  target[key] = { parts: [{ text: prompt }] };
+}
diff --git a/src/app/(dashboard)/dashboard/endpoint/EndpointPageClient.js b/src/app/(dashboard)/dashboard/endpoint/EndpointPageClient.js
@@ -14,6 +14,7 @@ import {
   REACHABLE_MISS_THRESHOLD,
   CLIENT_PING_FAST_MS,
   CAVEMAN_LEVELS,
+  PONYTAIL_LEVELS,
 } from "./endpointConstants";
 import { clientPingUrl, clientPingAny } from "./endpointPing";
 import EndpointRow from "./components/EndpointRow";
@@ -35,6 +36,8 @@ export default function APIPageClient({ machineId }) {
   const [rtkEnabled, setRtkEnabledState] = useState(true);
   const [cavemanEnabled, setCavemanEnabled] = useState(false);
   const [cavemanLevel, setCavemanLevel] = useState("full");
+  const [ponytailEnabled, setPonytailEnabled] = useState(false);
+  const [ponytailLevel, setPonytailLevel] = useState("full");
   const [locale, setLocale] = useState("en");
 
   // Cloudflare Tunnel state
@@ -234,6 +237,8 @@ export default function APIPageClient({ machineId }) {
         setRtkEnabledState(data.rtkEnabled !== false);
         setCavemanEnabled(!!data.cavemanEnabled);
         setCavemanLevel(data.cavemanLevel || "full");
+        setPonytailEnabled(!!data.ponytailEnabled);
+        setPonytailLevel(data.ponytailLevel || "full");
       }
       if (statusRes.ok) {
         const data = await statusRes.json();
@@ -318,6 +323,16 @@ export default function APIPageClient({ machineId }) {
     patchSetting({ cavemanLevel: level });
   };
 
+  const handlePonytailEnabled = (value) => {
+    setPonytailEnabled(value);
+    patchSetting({ ponytailEnabled: value });
+  };
+
+  const handlePonytailLevel = (level) => {
+    setPonytailLevel(level);
+    patchSetting({ ponytailLevel: level });
+  };
+
   const fetchData = async () => {
     try {
       const keysRes = await fetch("/api/keys");
@@ -1090,6 +1105,53 @@ export default function APIPageClient({ machineId }) {
             />
           </div>
         </div>
+        <div className="flex items-center justify-between pt-4 gap-4 flex-wrap border-t border-border">
+          <div className="min-w-0 flex-1">
+            <p className="font-medium">
+              Write less code{" "}
+              <a
+                href="https://github.com/DietrichGebert/ponytail"
+                target="_blank"
+                rel="noreferrer"
+                className="text-xs font-normal text-primary underline hover:opacity-80"
+              >
+                (Ponytail)
+              </a>
+            </p>
+            <p className="text-sm text-text-muted">
+              Lazy-senior-dev system prompt → less over-engineering, fewer files
+            </p>
+          </div>
+          <div className="flex items-center gap-3 shrink-0">
+            {ponytailEnabled && (
+              <div className="flex flex-col items-end gap-1">
+                <div className="flex items-center gap-1.5">
+                  {PONYTAIL_LEVELS.map((lvl) => (
+                    <button
+                      key={lvl.id}
+                      onClick={() => handlePonytailLevel(lvl.id)}
+                      className={`px-3 py-1.5 rounded text-xs font-medium border transition-colors ${
+                        ponytailLevel === lvl.id
+                          ? "bg-primary text-white border-primary"
+                          : "bg-transparent border-border text-text-muted hover:bg-surface-2"
+                      }`}
+                      title={lvl.desc}
+                    >
+                      {lvl.label}
+                    </button>
+                  ))}
+                </div>
+                <p className="text-xs text-primary">
+                  {PONYTAIL_LEVELS.find((lvl) => lvl.id === ponytailLevel)?.desc}
+                </p>
+              </div>
+            )}
+            <Toggle
+              checked={ponytailEnabled}
+              onChange={() => handlePonytailEnabled(!ponytailEnabled)}
+            />
+          </div>
+        </div>
       </Card>
 
       {/* API Keys */}