diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ae9fdd05..5d455a23 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -30,7 +30,7 @@ Thank you for your interest in contributing to Efrit! This guide will help you g
2. **Set up your API key** in `~/.authinfo`:
```
- machine api.anthropic.com login personal password YOUR_API_KEY_HERE
+ machine openrouter.ai login personal password YOUR_API_KEY_HERE
```
3. **Load Efrit for development**:
diff --git a/README.md b/README.md
index 10b0a2f8..f3cd9e5c 100644
--- a/README.md
+++ b/README.md
@@ -62,7 +62,7 @@ Efrit provides multiple interfaces for AI-powered Emacs development:
3. **Configure your API key** in `~/.authinfo`:
```
- machine api.anthropic.com login personal password YOUR_API_KEY_HERE
+ machine openrouter.ai login personal password YOUR_API_KEY_HERE
```
4. **Restart Emacs** and test with `M-x efrit-chat`
@@ -218,7 +218,7 @@ Transform Efrit from a user assistant into an **autonomous AI development platfo
```elisp
;; Standard Efrit settings
-(setq efrit-model "claude-3-5-sonnet-20241022")
+(setq efrit-model "anthropic/claude-sonnet-4")
(setq efrit-max-tokens 8192)
;; š Agent communication settings
diff --git a/lisp/efrit-agent.el b/lisp/efrit-agent.el
index 9e4a8fdd..2a95213f 100644
--- a/lisp/efrit-agent.el
+++ b/lisp/efrit-agent.el
@@ -31,10 +31,10 @@
:group 'efrit
:prefix "efrit-agent-")
-(defcustom efrit-agent-backend "claude-3.5-sonnet"
+(defcustom efrit-agent-backend "anthropic/claude-sonnet-4"
"Default model backend for agent mode."
- :type '(choice (const "claude-3.5-sonnet")
- (const "gpt-4")
+ :type '(choice (const "anthropic/claude-sonnet-4")
+ (const "gpt-4")
(const "local-llama")
(string :tag "Custom API endpoint"))
:group 'efrit-agent)
@@ -44,12 +44,12 @@
:type 'integer
:group 'efrit-agent)
-(defcustom efrit-agent-api-url "https://api.anthropic.com/v1/messages"
+(defcustom efrit-agent-api-url "https://openrouter.ai/api/v1/chat/completions"
"API URL for Claude requests."
:type 'string
:group 'efrit-agent)
-(defcustom efrit-agent-model "claude-4-sonnet-20250514"
+(defcustom efrit-agent-model "anthropic/claude-sonnet-4"
"Model to use for agent requests. Updated to latest Claude 4 Sonnet."
:type 'string
:group 'efrit-agent)
@@ -166,7 +166,7 @@
(defun efrit-agent--get-api-key ()
"Get the Anthropic API key from .authinfo file."
(efrit-agent--log "DEBUG" "Looking for API key in .authinfo")
- (let* ((auth-info (car (auth-source-search :host "api.anthropic.com"
+ (let* ((auth-info (car (auth-source-search :host "openrouter.ai"
:user "personal"
:require '(:secret))))
(secret (plist-get auth-info :secret)))
@@ -176,7 +176,7 @@
(if (functionp secret)
(funcall secret)
secret))
- (efrit-agent--log "ERROR" "No API key found in .authinfo for api.anthropic.com")
+ (efrit-agent--log "ERROR" "No API key found in .authinfo for openrouter.ai")
nil)))
(defun efrit-agent--build-system-prompt ()
diff --git a/lisp/efrit-chat-streamlined.el b/lisp/efrit-chat-streamlined.el
index dad81734..d550e0a1 100644
--- a/lisp/efrit-chat-streamlined.el
+++ b/lisp/efrit-chat-streamlined.el
@@ -40,7 +40,7 @@
:group 'tools
:prefix "efrit-")
-(defcustom efrit-model "claude-3-5-sonnet-20241022"
+(defcustom efrit-model "anthropic/claude-sonnet-4"
"Claude model to use for conversations."
:type 'string
:group 'efrit)
@@ -55,7 +55,7 @@
:type 'float
:group 'efrit)
-(defcustom efrit-api-url "https://api.anthropic.com/v1/messages"
+(defcustom efrit-api-url "https://openrouter.ai/api/v1/chat/completions"
"URL for the Anthropic API endpoint."
:type 'string
:group 'efrit)
diff --git a/lisp/efrit-chat.el b/lisp/efrit-chat.el
index b426858a..b1d20f3c 100644
--- a/lisp/efrit-chat.el
+++ b/lisp/efrit-chat.el
@@ -46,7 +46,7 @@
:type 'string
:group 'efrit)
-(defcustom efrit-model "claude-3-5-sonnet-20241022"
+(defcustom efrit-model "anthropic/claude-sonnet-4"
"Claude model to use for conversations."
:type 'string
:group 'efrit)
@@ -63,7 +63,7 @@ or 4096 without. This setting uses the higher limit."
:type 'float
:group 'efrit)
-(defcustom efrit-api-url "https://api.anthropic.com/v1/messages"
+(defcustom efrit-api-url "https://openrouter.ai/api/v1/chat/completions"
"URL for the Anthropic API endpoint."
:type 'string
:group 'efrit)
@@ -126,7 +126,7 @@ or 4096 without. This setting uses the higher limit."
(defun efrit--get-api-key ()
"Get the Anthropic API key from .authinfo file."
- (let* ((auth-info (car (auth-source-search :host "api.anthropic.com"
+ (let* ((auth-info (car (auth-source-search :host "openrouter.ai"
:user "personal"
:require '(:secret))))
(secret (plist-get auth-info :secret)))
@@ -228,9 +228,7 @@ or 4096 without. This setting uses the higher limit."
(let* ((api-key (efrit--get-api-key))
(url-request-method "POST")
(url-request-extra-headers
- `(("x-api-key" . ,api-key)
- ("anthropic-version" . "2023-06-01")
- ("anthropic-beta" . "max-tokens-3-5-sonnet-2024-07-15")
+ `(("Authorization" . ,(concat "Bearer " openrouter-api-key))
("content-type" . "application/json")))
(system-prompt (when efrit-enable-tools (efrit-tools-system-prompt)))
(request-data
diff --git a/lisp/efrit-do.el b/lisp/efrit-do.el
index 5b886d36..87dc15e4 100644
--- a/lisp/efrit-do.el
+++ b/lisp/efrit-do.el
@@ -84,7 +84,7 @@ If nil, uses the default location in the efrit data directory."
:type 'boolean
:group 'efrit-do)
-(defcustom efrit-model "claude-3-5-sonnet-20241022"
+(defcustom efrit-model "anthropic/claude-sonnet-4"
"Claude model to use for efrit-do commands."
:type 'string
:group 'efrit-do)
@@ -94,7 +94,7 @@ If nil, uses the default location in the efrit data directory."
:type 'integer
:group 'efrit-do)
-(defcustom efrit-api-url "https://api.anthropic.com/v1/messages"
+(defcustom efrit-api-url "https://openrouter.ai/api/v1/chat/completions"
"URL for the Anthropic API endpoint used by efrit-do."
:type 'string
:group 'efrit-do)
@@ -710,92 +710,102 @@ When `efrit-do-show-errors-only' is non-nil, only show buffer for errors."
display-buffer-below-selected
(window-height . 10)))))))
+
(defun efrit-do--execute-command (command &optional retry-count error-msg previous-code)
"Execute natural language COMMAND and return the result.
-Uses improved error handling. If RETRY-COUNT is provided, this is a retry
+Uses improved error handling. If RETRY-COUNT is provided, this is a retry
attempt with ERROR-MSG and PREVIOUS-CODE from the failed attempt."
(condition-case api-err
(let* ((api-key (efrit--get-api-key))
(url-request-method "POST")
(url-request-extra-headers
- `(("x-api-key" . ,api-key)
- ("anthropic-version" . "2023-06-01")
+ `(("Authorization" . ,(concat "Bearer " openrouter-api-key))
("content-type" . "application/json")))
(system-prompt (efrit-do--command-system-prompt retry-count error-msg previous-code))
(request-data
`(("model" . ,efrit-model)
("max_tokens" . ,efrit-max-tokens)
("temperature" . 0.0)
- ("messages" . [(("role" . "user")
+ ("messages" . [(("role" . "system")
+ ("content" . ,system-prompt))
+ (("role" . "user")
("content" . ,command))])
- ("system" . ,system-prompt)
- ("tools" . [(("name" . "eval_sexp")
+ ("tools" . [(("type" . "function")
+ ("function" . (("name" . "eval_sexp")
("description" . "Evaluate a Lisp expression and return the result. This is the primary tool for interacting with Emacs.")
- ("input_schema" . (("type" . "object")
+ ("parameters" . (("type" . "object")
("properties" . (("expr" . (("type" . "string")
("description" . "The Elisp expression to evaluate")))))
- ("required" . ["expr"]))))
- (("name" . "shell_exec")
+ ("required" . ["expr"]))))))
+ (("type" . "function")
+ ("function" . (("name" . "shell_exec")
("description" . "Execute a shell command and return the result.")
- ("input_schema" . (("type" . "object")
+ ("parameters" . (("type" . "object")
("properties" . (("command" . (("type" . "string")
("description" . "The shell command to execute")))))
- ("required" . ["command"]))))
- (("name" . "todo_add")
+ ("required" . ["command"]))))))
+ (("type" . "function")
+ ("function" . (("name" . "todo_add")
("description" . "Add a new TODO item to track progress.")
- ("input_schema" . (("type" . "object")
+ ("parameters" . (("type" . "object")
("properties" . (("content" . (("type" . "string")
("description" . "The TODO item description")))
("priority" . (("type" . "string")
("enum" . ["low" "medium" "high"])
("description" . "Priority level")))))
- ("required" . ["content"]))))
- (("name" . "todo_update")
+ ("required" . ["content"]))))))
+ (("type" . "function")
+ ("function" . (("name" . "todo_update")
("description" . "Update the status of a TODO item.")
- ("input_schema" . (("type" . "object")
+ ("parameters" . (("type" . "object")
("properties" . (("id" . (("type" . "string")
("description" . "The TODO item ID")))
("status" . (("type" . "string")
("enum" . ["todo" "in-progress" "completed"])
("description" . "New status")))))
- ("required" . ["id" "status"]))))
- (("name" . "todo_show")
+ ("required" . ["id" "status"]))))))
+ (("type" . "function")
+ ("function" . (("name" . "todo_show")
("description" . "Show all current TODO items.")
- ("input_schema" . (("type" . "object")
- ("properties" . ()))))
- (("name" . "buffer_create")
+ ("parameters" . (("type" . "object")
+ ("properties" . ()))))))
+ (("type" . "function")
+ ("function" . (("name" . "buffer_create")
("description" . "Create a new buffer with content and optional mode. Use this for reports, lists, and formatted output.")
- ("input_schema" . (("type" . "object")
+ ("parameters" . (("type" . "object")
("properties" . (("name" . (("type" . "string")
("description" . "Buffer name (e.g. '*efrit-report: Files*')")))
("content" . (("type" . "string")
("description" . "Buffer content")))
("mode" . (("type" . "string")
("description" . "Optional major mode (e.g. 'markdown-mode', 'org-mode')")))))
- ("required" . ["name" "content"]))))
- (("name" . "format_file_list")
+ ("required" . ["name" "content"]))))))
+ (("type" . "function")
+ ("function" . (("name" . "format_file_list")
("description" . "Format content as a markdown file list with bullet points.")
- ("input_schema" . (("type" . "object")
+ ("parameters" . (("type" . "object")
("properties" . (("content" . (("type" . "string")
("description" . "Raw content to format as file list")))))
- ("required" . ["content"]))))
- (("name" . "format_todo_list")
+ ("required" . ["content"]))))))
+ (("type" . "function")
+ ("function" . (("name" . "format_todo_list")
("description" . "Format TODO list with optional sorting.")
- ("input_schema" . (("type" . "object")
+ ("parameters" . (("type" . "object")
("properties" . (("sort_by" . (("type" . "string")
("enum" . ["status" "priority"])
- ("description" . "Optional sorting criteria")))))
- ("required" . []))))
- (("name" . "display_in_buffer")
+ ("description" . "Sorting criteria")))))
+ ("required" . []))))))
+ (("type" . "function")
+ ("function" . (("name" . "display_in_buffer")
("description" . "Display content in a specific buffer.")
- ("input_schema" . (("type" . "object")
+ ("parameters" . (("type" . "object")
("properties" . (("buffer_name" . (("type" . "string")
("description" . "Buffer name")))
("content" . (("type" . "string")
("description" . "Content to display")))
("window_height" . (("type" . "number")
("description" . "Optional window height")))))
- ("required" . ["buffer_name" "content"]))))])))
+ ("required" . ["buffer_name" "content"]))))))])))
(url-request-data
(encode-coding-string (json-encode request-data) 'utf-8)))
@@ -825,26 +835,49 @@ attempt with ERROR-MSG and PREVIOUS-CODE from the failed attempt."
(error-message (gethash "message" error-obj)))
(format "API Error (%s): %s" error-type error-message))
- ;; Process successful response
- (let ((content (gethash "content" response)))
+ ;; Process successful response - OpenRouter format
+ (let* ((choices (gethash "choices" response))
+ (first-choice (when (and choices (> (length choices) 0))
+ (aref choices 0)))
+ (message-obj (when first-choice
+ (gethash "message" first-choice)))
+ (content (when message-obj
+ (gethash "content" message-obj)))
+ (tool-calls (when message-obj
+ (gethash "tool_calls" message-obj))))
+
(when efrit-do-debug
- (message "API Response content: %S" content))
+ (message "API Response content: %S" content)
+ (message "Tool calls: %S" tool-calls))
(when content
- (dotimes (i (length content))
- (let* ((item (aref content i))
- (type (gethash "type" item)))
- (cond
- ;; Handle text content
- ((string= type "text")
- (when-let* ((text (gethash "text" item)))
- (setq message-text (concat message-text text))))
+ (setq message-text (concat message-text content)))
+
+ (when tool-calls
+ (dotimes (i (length tool-calls))
+ (let* ((tool-call (aref tool-calls i))
+ (function-obj (gethash "function" tool-call))
+ (tool-name (gethash "name" function-obj))
+ (arguments-str (gethash "arguments" function-obj))
+ (arguments (when arguments-str
+ (condition-case err
+ (json-read-from-string arguments-str)
+ (error
+ (when efrit-do-debug
+ (message "Failed to parse tool arguments: %s" (error-message-string err)))
+ nil))))
+ (tool-item (make-hash-table :test 'equal)))
+
+ ;; Create tool item in expected format
+ (puthash "name" tool-name tool-item)
+ (puthash "input" arguments tool-item)
+
+ (when efrit-do-debug
+ (message "Processing tool call: %s with args: %S" tool-name arguments))
- ;; Handle tool use
- ((string= type "tool_use")
(setq message-text
(concat message-text
- (efrit-do--execute-tool item))))))))
+ (efrit-do--execute-tool tool-item))))))
(or message-text "Command executed"))))
(error
diff --git a/lisp/efrit-do.el.backup b/lisp/efrit-do.el.backup
index 3f3bb2dd..cf79c348 100644
--- a/lisp/efrit-do.el.backup
+++ b/lisp/efrit-do.el.backup
@@ -84,7 +84,7 @@ If nil, uses the default location in the efrit data directory."
:type 'boolean
:group 'efrit-do)
-(defcustom efrit-model "claude-3-5-sonnet-20241022"
+(defcustom efrit-model "anthropic/claude-sonnet-4"
"Claude model to use for efrit-do commands."
:type 'string
:group 'efrit-do)
@@ -94,7 +94,7 @@ If nil, uses the default location in the efrit data directory."
:type 'integer
:group 'efrit-do)
-(defcustom efrit-api-url "https://api.anthropic.com/v1/messages"
+(defcustom efrit-api-url "https://openrouter.ai/api/v1/chat/completions"
"URL for the Anthropic API endpoint used by efrit-do."
:type 'string
:group 'efrit-do)
diff --git a/lisp/efrit-multi-turn.el b/lisp/efrit-multi-turn.el
index e3a38000..3eba4960 100644
--- a/lisp/efrit-multi-turn.el
+++ b/lisp/efrit-multi-turn.el
@@ -43,7 +43,7 @@
:type 'boolean
:group 'efrit)
-(defcustom efrit-multi-turn-completion-model "claude-3-5-haiku-20241022"
+(defcustom efrit-multi-turn-completion-model "anthropic/claude-sonnet-4"
"Claude model to use for completion assessment.
This should be a fast, lightweight model for efficiency."
:type 'string
@@ -196,7 +196,7 @@ REASON: [brief explanation]"
(efrit-debug-log "Sending completion check to Claude...")
(condition-case err
(with-current-buffer
- (url-retrieve-synchronously "https://api.anthropic.com/v1/messages" t nil efrit-multi-turn-api-timeout)
+ (url-retrieve-synchronously "https://openrouter.ai/api/v1/chat/completions" t nil efrit-multi-turn-api-timeout)
(goto-char (point-min))
(re-search-forward "\n\n" nil t) ; Skip headers
(let* ((response-json (buffer-substring (point) (point-max)))
diff --git a/lisp/efrit-tools.el b/lisp/efrit-tools.el
index ff488d5b..573e5227 100644
--- a/lisp/efrit-tools.el
+++ b/lisp/efrit-tools.el
@@ -108,7 +108,7 @@ LEVEL is one of: debug info warn error."
(defun efrit--get-api-key ()
"Get the Anthropic API key from .authinfo file."
- (let* ((auth-info (car (auth-source-search :host "api.anthropic.com"
+ (let* ((auth-info (car (auth-source-search :host "openrouter.ai"
:user "personal"
:require '(:secret))))
(secret (plist-get auth-info :secret)))
@@ -499,7 +499,7 @@ Arguments:
(let ((results nil)
(processed-text (or text ""))
- (elisp-regex "\\([\\s\\S]+?\\)"))
+ (elisp-regex "\\([^<]*\\(?:\n[^<]*\\)*\\)"))
(condition-case-unless-debug extraction-err
(progn
diff --git a/plans/AUTONOMOUS_MODE_DESIGN.md b/plans/AUTONOMOUS_MODE_DESIGN.md
index 630beb9f..7781ea17 100644
--- a/plans/AUTONOMOUS_MODE_DESIGN.md
+++ b/plans/AUTONOMOUS_MODE_DESIGN.md
@@ -25,7 +25,7 @@ efrit-agent ā Aggressive problem-solving until complete (NEW)
"context": "It's several versions behind, managed by straight.el",
"session_id": "upgrade-20250814",
"max_iterations": 50,
- "model_backend": "claude-3.5-sonnet"
+ "model_backend": "anthropic/claude-sonnet-4"
}
```
@@ -112,9 +112,9 @@ Respond with your next action as JSON...
### 4. **Model Backend Abstraction**
```elisp
-(defcustom efrit-agent-backend "claude-3.5-sonnet"
+(defcustom efrit-agent-backend "anthropic/claude-sonnet-4"
"Default model backend for agent mode."
- :type '(choice (const "claude-3.5-sonnet")
+ :type '(choice (const "anthropic/claude-sonnet-4")
(const "gpt-4")
(const "local-llama")
(string :tag "Custom API endpoint")))
diff --git a/qa/qa-final-comprehensive-report.el b/qa/qa-final-comprehensive-report.el
index e8f7e5dd..1794f546 100644
--- a/qa/qa-final-comprehensive-report.el
+++ b/qa/qa-final-comprehensive-report.el
@@ -132,7 +132,7 @@
(insert ";; Recommended production settings\n")
(insert "(setq efrit-work-buffer-max-size 100000) ; 100KB limit\n")
(insert "(setq efrit-show-work-buffer nil) ; Don't auto-show\n")
- (insert "(setq efrit-model \"claude-3-5-sonnet-20241022\") ; Latest model\n")
+ (insert "(setq efrit-model \"anthropic/claude-sonnet-4") ; Latest model\n")
(insert "(setq efrit-max-tokens 8192) ; Maximum context\n")
(insert "(setq efrit-temperature 0.1) ; Focused responses\n")
(insert "```\n\n")
diff --git a/qa/qa-test-integration.el b/qa/qa-test-integration.el
index e11c3b30..1f312689 100644
--- a/qa/qa-test-integration.el
+++ b/qa/qa-test-integration.el
@@ -89,7 +89,7 @@
"{\"type\":\"text\",\"text\":\"I'll create a haiku about Vim for you.\"},"
"{\"type\":\"tool_use\",\"id\":\"toolu_test\",\"name\":\"eval_sexp\","
"\"input\":{\"expr\":\"(get-buffer-create \\\"*vim-haiku*\\\")\"}}"
- "],\"model\":\"claude-3-5-sonnet-20241022\"}"))
+ "],\"model\":\"anthropic/claude-sonnet-4"}"))
(mock-buffer (get-buffer-create "*mock-api-response*")))
;; Set up mock response buffer
@@ -225,7 +225,7 @@
;; Check for hardcoded values that should be configurable
(let ((system-prompt (efrit-streamlined--system-prompt)))
(cond
- ((string-match-p "claude-3-5-sonnet" system-prompt)
+ ((string-match-p "anthropic/claude-sonnet-4" system-prompt)
(message "ā System prompt contains hardcoded model name"))
((string-match-p "anthropic\\.com" system-prompt)
(message "ā System prompt contains hardcoded API URL"))
diff --git a/qa/qa-test-real-integration.el b/qa/qa-test-real-integration.el
index 21c3c2c2..c6775395 100644
--- a/qa/qa-test-real-integration.el
+++ b/qa/qa-test-real-integration.el
@@ -248,7 +248,7 @@
;; Test 4: Different models
(let ((original-model efrit-model))
- (setq efrit-model "claude-3-haiku-20240307")
+ (setq efrit-model "anthropic/claude-sonnet-4")
(unwind-protect
(condition-case err
(progn
diff --git a/qa/qa-test-streamlined.el b/qa/qa-test-streamlined.el
index cc1ebd0e..62776b37 100644
--- a/qa/qa-test-streamlined.el
+++ b/qa/qa-test-streamlined.el
@@ -83,7 +83,7 @@
;; Test 1: Valid response with text content
(let ((mock-response-text-only
- "{\"content\":[{\"type\":\"text\",\"text\":\"Hello, this is a test response.\"}],\"model\":\"claude-3-5-sonnet-20241022\"}"))
+ "{\"content\":[{\"type\":\"text\",\"text\":\"Hello, this is a test response.\"}],\"model\":\"anthropic/claude-sonnet-4"}"))
(condition-case err
(let ((parsed (json-read-from-string mock-response-text-only)))
(if parsed
diff --git a/test/run-integration-tests.sh b/test/run-integration-tests.sh
index a4a89bbd..7d95b24c 100755
--- a/test/run-integration-tests.sh
+++ b/test/run-integration-tests.sh
@@ -28,7 +28,8 @@ run_test() {
echo -e "${BLUE}Running $test_name...${NC}"
TESTS_RUN=$((TESTS_RUN + 1))
- if emacs --batch --load "$test_file" 2>&1; then
+ # Use the correct load pattern with explicit file loading in dependency order
+ if emacs --batch --load ../lisp/efrit-debug.el --load ../lisp/efrit-config.el --load ../lisp/efrit.el --load ../lisp/efrit-agent.el --load ../lisp/efrit-tools.el --load ../lisp/efrit-do.el --load ../lisp/efrit-multi-turn.el --load ../lisp/efrit-command.el --load ../lisp/efrit-chat.el --load "$test_file" 2>&1; then
echo -e "${GREEN}ā
$test_name PASSED${NC}"
TESTS_PASSED=$((TESTS_PASSED + 1))
else
diff --git a/test/test-history-functionality.el b/test/test-history-functionality.el
index 4bbf0d99..125ecbd4 100644
--- a/test/test-history-functionality.el
+++ b/test/test-history-functionality.el
@@ -201,7 +201,8 @@
(setq efrit-do-history original-history))
;; Test 6: Context persistence after clearing
-(let ((temp-file efrit-do-context-file))
+(let ((temp-file (or efrit-do-context-file
+ (efrit-config-context-file "efrit-do-context.el"))))
;; Create some context
(efrit-do--capture-context "persistent-cmd" "persistent-result")
diff --git a/test/test-openrouter.el b/test/test-openrouter.el
new file mode 100644
index 00000000..2888601e
--- /dev/null
+++ b/test/test-openrouter.el
@@ -0,0 +1,413 @@
+;;; test-openrouter.el --- -*- lexical-binding: t; -*-
+(message "=== Response Processing Debug Test ===")
+
+;; Set up environment
+(add-to-list 'load-path "../lisp")
+(require 'efrit-tools)
+(require 'efrit-do)
+(setq efrit-do-debug t)
+
+;; Sample API response from the actual call
+(defvar test-api-response
+ "{\"id\":\"gen-1755874447-77pPQOiXzJ9E4p2wUGQg\",\"provider\":\"Google\",\"model\":\"anthropic/claude-sonnet-4\",\"object\":\"chat.completion\",\"created\":1755874447,\"choices\":[{\"logprobs\":null,\"finish_reason\":\"tool_calls\",\"native_finish_reason\":\"tool_calls\",\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"I'll calculate 15 * 23 and show the result.\",\"refusal\":null,\"reasoning\":null,\"tool_calls\":[{\"id\":\"toolu_vrtx_013wcQgbLbMbaKG1vXH4Hjq9\",\"index\":0,\"type\":\"function\",\"function\":{\"name\":\"eval_sexp\",\"arguments\":\"{\\\"expr\\\": \\\"(* 15 23)\\\"}\"}}]}}],\"usage\":{\"prompt_tokens\":2042,\"completion_tokens\":75,\"total_tokens\":2117}}")
+
+(message "\nš Testing response processing...")
+(message "Raw response: %s" test-api-response)
+
+;; Test the response processing function directly
+(let ((result (efrit-do--process-api-response test-api-response)))
+ (message "\nProcessed result: '%s'" result)
+ (message "Result length: %d" (length result))
+ (message "Contains 345? %s" (if (string-match-p "345" result) "YES" "NO")))
+
+;; Let's also examine the parsed JSON structure
+(message "\nš Examining JSON structure...")
+(let* ((json-object-type 'hash-table)
+ (json-array-type 'vector)
+ (json-key-type 'string)
+ (response (json-read-from-string test-api-response)))
+
+ (message "Response keys: %S" (hash-table-keys response))
+
+ (let ((choices (gethash "choices" response)))
+ (message "Choices length: %d" (length choices))
+
+ (when (> (length choices) 0)
+ (let* ((choice (aref choices 0))
+ (message-obj (gethash "message" choice))
+ (content (gethash "content" message-obj))
+ (tool-calls (gethash "tool_calls" message-obj)))
+
+ (message "Message content: '%s'" content)
+ (message "Tool calls: %S" tool-calls)
+
+ (when tool-calls
+ (message "Tool calls length: %d" (length tool-calls))
+ (when (> (length tool-calls) 0)
+ (let* ((tool-call (aref tool-calls 0))
+ (function-obj (gethash "function" tool-call))
+ (name (gethash "name" function-obj))
+ (arguments (gethash "arguments" function-obj)))
+ (message "Tool name: %s" name)
+ (message "Tool arguments: %s" arguments))))))))
+
+(message "\n=== Test Complete ===")
+
+
+(message "=== Unit Test: efrit-tools-extract-tools-from-response ===")
+
+;; Set up environment
+(add-to-list 'load-path "../lisp")
+(require 'efrit-tools)
+
+;; Test function to run a single test case
+(defun test-extract-tools (test-name input expected-blocks)
+ "Test efrit-tools-extract-tools-from-response with given input."
+ (message "\n--- Test: %s ---" test-name)
+ (message "Input length: %d chars" (length input))
+ (message "Expected blocks: %d" expected-blocks)
+
+ (let ((result (efrit-tools-extract-tools-from-response input)))
+ (let ((processed-text (car result))
+ (results-list (cdr result))
+ (actual-blocks (length (cdr result))))
+
+ (message "Actual blocks processed: %d" actual-blocks)
+ (message "Results list: %S" results-list)
+
+ ;; Check if text was processed
+ (let ((text-changed (not (string= processed-text input))))
+ (message "Text changed: %s" (if text-changed "YES" "NO")))
+
+ ;; Check if we got the expected number of blocks
+ (let ((blocks-correct (= actual-blocks expected-blocks)))
+ (message "Blocks correct: %s" (if blocks-correct "YES" "NO"))
+ (if blocks-correct
+ (message "ā
%s: PASSED" test-name)
+ (message "ā %s: FAILED (expected %d, got %d)" test-name expected-blocks actual-blocks))))))
+
+;; Test Case 1: Original failing case (multi-line with newlines between blocks)
+(let ((test1 "I'll help you with that. First, let me create a buffer:
+(with-current-buffer (get-buffer-create \"*test-output*\")
+ (erase-buffer)
+ (insert \"Hello World\\n\")
+ (buffer-name))
+
+Now let me add more content:
+(with-current-buffer \"*test-output*\"
+ (goto-char (point-max))
+ (insert \"Additional line\\n\")
+ (buffer-size))
+
+Done!"))
+ (test-extract-tools "Multi-line with newlines between blocks" test1 2))
+
+;; Test Case 2: Multi-line with no newlines between blocks
+(let ((test2 "I'll help you with that. First, let me create a buffer:
+(with-current-buffer (get-buffer-create \"*test-output*\")
+ (erase-buffer)
+ (insert \"Hello World\\n\")
+ (buffer-name))
+Now let me add more content:
+(with-current-buffer \"*test-output*\"
+ (goto-char (point-max))
+ (insert \"Additional line\\n\")
+ (buffer-size))
+Done!"))
+ (test-extract-tools "Multi-line with no newlines between blocks" test2 2))
+
+;; Test Case 3: Single-line elisp blocks
+(let ((test3 "Simple test: (+ 2 3) and (* 4 5) Done!"))
+ (test-extract-tools "Single-line elisp blocks" test3 2))
+
+;; Test Case 4: Mixed single and multi-line blocks
+(let ((test4 "First: (+ 1 2)
+Then complex:
+(with-current-buffer (get-buffer-create \"*test*\")
+ (insert \"Hello\"))
+Finally: (buffer-name)"))
+ (test-extract-tools "Mixed single and multi-line blocks" test4 3))
+
+;; Test Case 5: Multi-line with extra whitespace and indentation
+(let ((test5 "Let's test this:
+
+ (with-current-buffer
+ (get-buffer-create \"*test*\")
+ (insert \"Hello World\"))
+
+
+And another:
+
+ (buffer-name)
+
+
+Done!"))
+ (test-extract-tools "Multi-line with extra whitespace and indentation" test5 2))
+
+(message "\n=== Hypothesis Testing ===")
+
+;; Test 1: Implementation Logic Bug in Capture Groups Approach
+(message "\n--- Testing Hypothesis 1: Implementation Logic Bug ---")
+(let ((test-input "Single: (+ 1 2)
+Multi: (with-current-buffer (get-buffer-create \"*test*\")
+ (insert \"Hello\"))
+Single: (buffer-name)"))
+
+ ;; Test if the manual tag finding logic is being executed at all
+ (let ((pos 0)
+ (found-tags 0))
+ (while (string-match "" test-input pos)
+ (setq found-tags (1+ found-tags))
+ (let* ((start-tag-pos (match-beginning 0))
+ (start-tag-end (match-end 0))
+ (end-tag-pos (string-match "" test-input start-tag-end)))
+ (if end-tag-pos
+ (setq pos (+ end-tag-pos 8))
+ (setq pos (1+ start-tag-pos)))))
+
+ (message "Manual tag search found: %d tags" found-tags)
+ (if (= found-tags 3)
+ (message "ā
HYPOTHESIS 1 REJECTED: Manual tag finding works correctly")
+ (message "ā HYPOTHESIS 1 CONFIRMED: Manual tag finding has bugs"))))
+
+;; Test 2: Variable Scope or State Management Issue
+(message "\n--- Testing Hypothesis 2: Variable Scope/State Management ---")
+(let ((test-input "Test: (+ 1 2)"))
+ ;; Test if the function properly updates its internal state
+ (let* ((result1 (efrit-tools-extract-tools-from-response test-input))
+ (result2 (efrit-tools-extract-tools-from-response test-input)))
+
+ (message "First call results: %S" (cdr result1))
+ (message "Second call results: %S" (cdr result2))
+ (message "Results identical: %s" (if (equal (cdr result1) (cdr result2)) "YES" "NO"))
+
+ (if (and (cdr result1) (equal (cdr result1) (cdr result2)))
+ (message "ā
HYPOTHESIS 2 REJECTED: Function state management is consistent")
+ (message "ā HYPOTHESIS 2 CONFIRMED: Function has state management issues"))))
+
+;; Test 3: Conditional Logic or Feature Flag Issue
+(message "\n--- Testing Hypothesis 3: Conditional Logic/Feature Flag ---")
+(let ((single-line-input "Test: (+ 1 2)")
+ (multi-line-input "Test: (with-current-buffer (get-buffer-create \"*test*\")
+ (insert \"Hello\"))"))
+
+ ;; Test if the function behaves differently based on input characteristics
+ (let* ((single-result (efrit-tools-extract-tools-from-response single-line-input))
+ (multi-result (efrit-tools-extract-tools-from-response multi-line-input))
+ (single-blocks (length (cdr single-result)))
+ (multi-blocks (length (cdr multi-result)))
+ (single-changed (not (string= (car single-result) single-line-input)))
+ (multi-changed (not (string= (car multi-result) multi-line-input))))
+
+ (message "Single-line: %d blocks, text changed: %s" single-blocks single-changed)
+ (message "Multi-line: %d blocks, text changed: %s" multi-blocks multi-changed)
+
+ (if (and (= single-blocks 1) (= multi-blocks 0) single-changed (not multi-changed))
+ (message "ā HYPOTHESIS 3 CONFIRMED: Function has different code paths for different input types")
+ (message "ā
HYPOTHESIS 3 REJECTED: Function behavior is consistent across input types"))))
+
+(message "\n=== Code Path Investigation ===")
+
+;; Add debug instrumentation to the function by temporarily redefining it
+(message "\n--- Setting up debug instrumentation ---")
+
+;; Save the original function
+(fset 'efrit-tools-extract-tools-from-response-original
+ (symbol-function 'efrit-tools-extract-tools-from-response))
+
+;; Create a debug version with extensive logging
+(defun efrit-tools-extract-tools-from-response (text)
+ "Debug version with extensive logging."
+ (message "š DEBUG: Function called with text length: %d" (length text))
+ (message "š DEBUG: Text contains newlines: %s" (if (string-match-p "\n" text) "YES" "NO"))
+ (message "š DEBUG: First 50 chars: %S" (substring text 0 (min 50 (length text))))
+
+ (unless (stringp text)
+ (message "š DEBUG: ERROR - Text is not a string!")
+ (error "Response text must be a string"))
+
+ (let ((results nil)
+ (processed-text (or text ""))
+ (elisp-regex "\\(.*?\\)"))
+
+ (message "š DEBUG: Initialized variables")
+ (message "š DEBUG: - results: %S" results)
+ (message "š DEBUG: - processed-text length: %d" (length processed-text))
+ (message "š DEBUG: - elisp-regex: %S" elisp-regex)
+
+ (condition-case-unless-debug extraction-err
+ (progn
+ (message "š DEBUG: Entering main processing block")
+
+ ;; Test the regex first
+ (message "š DEBUG: Testing regex match...")
+ (let ((regex-test-pos (string-match elisp-regex processed-text)))
+ (message "š DEBUG: Regex match result: %S" regex-test-pos))
+
+ ;; Process Elisp evaluation requests using the while loop
+ (message "š DEBUG: Starting while loop for regex matches")
+ (let ((loop-count 0))
+ (while (string-match elisp-regex processed-text)
+ (setq loop-count (1+ loop-count))
+ (message "š DEBUG: Loop iteration %d" loop-count)
+
+ (let* ((elisp-code (match-string 1 processed-text))
+ (call-start (match-beginning 0))
+ (call-end (match-end 0)))
+
+ (message "š DEBUG: - elisp-code: %S" elisp-code)
+ (message "š DEBUG: - call-start: %d" call-start)
+ (message "š DEBUG: - call-end: %d" call-end)
+
+ (let ((result (condition-case eval-err
+ (efrit-tools-eval-sexp elisp-code)
+ (error
+ (format "Error in Elisp evaluation: %s"
+ (error-message-string eval-err))))))
+
+ (message "š DEBUG: - evaluation result: %S" result)
+
+ ;; Add result to the list
+ (push result results)
+ (message "š DEBUG: - results list now: %S" results)
+
+ ;; Replace the Elisp call with its result in the text
+ (setq processed-text
+ (concat (substring processed-text 0 call-start)
+ (format "[Result: %s]" result)
+ (substring processed-text call-end)))
+
+ (message "š DEBUG: - processed-text length after replacement: %d" (length processed-text))
+ (message "š DEBUG: - processed-text first 100 chars: %S"
+ (substring processed-text 0 (min 100 (length processed-text)))))))
+
+ (message "š DEBUG: While loop completed after %d iterations" loop-count)))
+
+ ;; Handle extraction errors
+ (error
+ (message "š DEBUG: ERROR in extraction: %s" (error-message-string extraction-err))
+ (setq processed-text (concat processed-text
+ "\n[Error processing tool calls: "
+ (error-message-string extraction-err) "]"))))
+
+ (message "š DEBUG: Final results:")
+ (message "š DEBUG: - processed-text length: %d" (length processed-text))
+ (message "š DEBUG: - results list: %S" results)
+ (message "š DEBUG: - returning cons: %S" (cons processed-text (nreverse results)))
+
+ ;; Return both the processed text and results
+ (cons processed-text (nreverse results))))
+
+;; Test both single-line and multi-line inputs with debug version
+(message "\n--- Testing Single-line input with debug ---")
+(let ((single-input "Test: (+ 1 2)"))
+ (let ((result (efrit-tools-extract-tools-from-response single-input)))
+ (message "Single-line result: %S" result)))
+
+(message "\n--- Testing Multi-line input with debug ---")
+(let ((multi-input "Test: (with-current-buffer (get-buffer-create \"*test*\")
+ (insert \"Hello\"))"))
+ (let ((result (efrit-tools-extract-tools-from-response multi-input)))
+ (message "Multi-line result: %S" result)))
+
+;; Restore the original function
+(message "\n--- Restoring original function ---")
+(fset 'efrit-tools-extract-tools-from-response
+ (symbol-function 'efrit-tools-extract-tools-from-response-original))
+
+(message "\n=== string-match Behavior Analysis ===")
+
+;; Test the exact string-match behavior with our regex and inputs
+(let ((elisp-regex "\\(.*?\\)")
+ (single-input "Test: (+ 1 2)")
+ (multi-input "Test: (with-current-buffer (get-buffer-create \"*test*\")
+ (insert \"Hello\"))")
+ (multi-simple "Test: (message
+\"hello\")"))
+
+ (message "\n--- Testing string-match with different inputs ---")
+ (message "Regex pattern: %S" elisp-regex)
+
+ ;; Test 1: Single-line input
+ (message "\nš Test 1: Single-line input")
+ (message "Input: %S" single-input)
+ (message "Input length: %d" (length single-input))
+ (let ((match-pos (string-match elisp-regex single-input)))
+ (message "string-match result: %S" match-pos)
+ (when match-pos
+ (message "match-beginning 0: %d" (match-beginning 0))
+ (message "match-end 0: %d" (match-end 0))
+ (message "match-string 0: %S" (match-string 0 single-input))
+ (message "match-string 1: %S" (match-string 1 single-input))))
+
+ ;; Test 2: Multi-line input (complex)
+ (message "\nš Test 2: Multi-line input (complex)")
+ (message "Input: %S" multi-input)
+ (message "Input length: %d" (length multi-input))
+ (message "Input contains newlines: %s" (if (string-match-p "\n" multi-input) "YES" "NO"))
+ (let ((match-pos (string-match elisp-regex multi-input)))
+ (message "string-match result: %S" match-pos)
+ (when match-pos
+ (message "match-beginning 0: %d" (match-beginning 0))
+ (message "match-end 0: %d" (match-end 0))
+ (message "match-string 0: %S" (match-string 0 multi-input))
+ (message "match-string 1: %S" (match-string 1 multi-input))))
+
+ ;; Test 3: Multi-line input (simple)
+ (message "\nš Test 3: Multi-line input (simple)")
+ (message "Input: %S" multi-simple)
+ (message "Input length: %d" (length multi-simple))
+ (message "Input contains newlines: %s" (if (string-match-p "\n" multi-simple) "YES" "NO"))
+ (let ((match-pos (string-match elisp-regex multi-simple)))
+ (message "string-match result: %S" match-pos)
+ (when match-pos
+ (message "match-beginning 0: %d" (match-beginning 0))
+ (message "match-end 0: %d" (match-end 0))
+ (message "match-string 0: %S" (match-string 0 multi-simple))
+ (message "match-string 1: %S" (match-string 1 multi-simple))))
+
+ ;; Test 4: Check if the issue is with the non-greedy matching
+ (message "\nš Test 4: Testing greedy vs non-greedy matching")
+ (let ((greedy-regex "\\(.*\\)")
+ (non-greedy-regex "\\(.*?\\)"))
+
+ (message "Testing greedy regex: %S" greedy-regex)
+ (let ((greedy-match (string-match greedy-regex multi-simple)))
+ (message "Greedy match result: %S" greedy-match))
+
+ (message "Testing non-greedy regex: %S" non-greedy-regex)
+ (let ((non-greedy-match (string-match non-greedy-regex multi-simple)))
+ (message "Non-greedy match result: %S" non-greedy-match)))
+
+ ;; Test 5: Test with explicit newline in regex
+ (message "\nš Test 5: Testing regex with explicit newline handling")
+ (let ((newline-regex "\\([^<]*\\(?:\n[^<]*\\)*\\)"))
+ (message "Newline-aware regex: %S" newline-regex)
+ (let ((newline-match (string-match newline-regex multi-simple)))
+ (message "Newline-aware match result: %S" newline-match)
+ (when newline-match
+ (message "match-string 1: %S" (match-string 1 multi-simple)))))
+
+ ;; Test 6: Test the exact character positions in multi-line input
+ (message "\nš Test 6: Character-by-character analysis of multi-line input")
+ (message "Multi-simple input character analysis:")
+ (dotimes (i (min 50 (length multi-simple)))
+ (let ((char (aref multi-simple i)))
+ (message "Position %d: %c (ASCII %d) %s"
+ i char char
+ (if (= char ?\n) "ā NEWLINE" ""))))
+
+ ;; Test 7: Manual search for opening and closing tags
+ (message "\nš Test 7: Manual tag search in multi-line input")
+ (let ((open-pos (string-match "" multi-simple))
+ (close-pos (string-match "" multi-simple)))
+ (message "Manual search: %S" open-pos)
+ (message "Manual search: %S" close-pos)
+ (when (and open-pos close-pos)
+ (let* ((start-content (+ open-pos 7))
+ (content (substring multi-simple start-content close-pos)))
+ (message "Content between tags: %S" content)
+ (message "Content length: %d" (length content))
+ (message "Content contains newlines: %s" (if (string-match-p "\n" content) "YES" "NO"))))))
+
+(message "\n=== string-match Behavior Analysis Complete ===")