diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ae9fdd05..5d455a23 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -30,7 +30,7 @@ Thank you for your interest in contributing to Efrit! This guide will help you g 2. **Set up your API key** in `~/.authinfo`: ``` - machine api.anthropic.com login personal password YOUR_API_KEY_HERE + machine openrouter.ai login personal password YOUR_API_KEY_HERE ``` 3. **Load Efrit for development**: diff --git a/README.md b/README.md index 10b0a2f8..f3cd9e5c 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ Efrit provides multiple interfaces for AI-powered Emacs development: 3. **Configure your API key** in `~/.authinfo`: ``` - machine api.anthropic.com login personal password YOUR_API_KEY_HERE + machine openrouter.ai login personal password YOUR_API_KEY_HERE ``` 4. **Restart Emacs** and test with `M-x efrit-chat` @@ -218,7 +218,7 @@ Transform Efrit from a user assistant into an **autonomous AI development platfo ```elisp ;; Standard Efrit settings -(setq efrit-model "claude-3-5-sonnet-20241022") +(setq efrit-model "anthropic/claude-sonnet-4") (setq efrit-max-tokens 8192) ;; šŸ†• Agent communication settings diff --git a/lisp/efrit-agent.el b/lisp/efrit-agent.el index 9e4a8fdd..2a95213f 100644 --- a/lisp/efrit-agent.el +++ b/lisp/efrit-agent.el @@ -31,10 +31,10 @@ :group 'efrit :prefix "efrit-agent-") -(defcustom efrit-agent-backend "claude-3.5-sonnet" +(defcustom efrit-agent-backend "anthropic/claude-sonnet-4" "Default model backend for agent mode." - :type '(choice (const "claude-3.5-sonnet") - (const "gpt-4") + :type '(choice (const "anthropic/claude-sonnet-4") + (const "gpt-4") (const "local-llama") (string :tag "Custom API endpoint")) :group 'efrit-agent) @@ -44,12 +44,12 @@ :type 'integer :group 'efrit-agent) -(defcustom efrit-agent-api-url "https://api.anthropic.com/v1/messages" +(defcustom efrit-agent-api-url "https://openrouter.ai/api/v1/chat/completions" "API URL for Claude requests." :type 'string :group 'efrit-agent) -(defcustom efrit-agent-model "claude-4-sonnet-20250514" +(defcustom efrit-agent-model "anthropic/claude-sonnet-4" "Model to use for agent requests. Updated to latest Claude 4 Sonnet." :type 'string :group 'efrit-agent) @@ -166,7 +166,7 @@ (defun efrit-agent--get-api-key () "Get the Anthropic API key from .authinfo file." (efrit-agent--log "DEBUG" "Looking for API key in .authinfo") - (let* ((auth-info (car (auth-source-search :host "api.anthropic.com" + (let* ((auth-info (car (auth-source-search :host "openrouter.ai" :user "personal" :require '(:secret)))) (secret (plist-get auth-info :secret))) @@ -176,7 +176,7 @@ (if (functionp secret) (funcall secret) secret)) - (efrit-agent--log "ERROR" "No API key found in .authinfo for api.anthropic.com") + (efrit-agent--log "ERROR" "No API key found in .authinfo for openrouter.ai") nil))) (defun efrit-agent--build-system-prompt () diff --git a/lisp/efrit-chat-streamlined.el b/lisp/efrit-chat-streamlined.el index dad81734..d550e0a1 100644 --- a/lisp/efrit-chat-streamlined.el +++ b/lisp/efrit-chat-streamlined.el @@ -40,7 +40,7 @@ :group 'tools :prefix "efrit-") -(defcustom efrit-model "claude-3-5-sonnet-20241022" +(defcustom efrit-model "anthropic/claude-sonnet-4" "Claude model to use for conversations." :type 'string :group 'efrit) @@ -55,7 +55,7 @@ :type 'float :group 'efrit) -(defcustom efrit-api-url "https://api.anthropic.com/v1/messages" +(defcustom efrit-api-url "https://openrouter.ai/api/v1/chat/completions" "URL for the Anthropic API endpoint." :type 'string :group 'efrit) diff --git a/lisp/efrit-chat.el b/lisp/efrit-chat.el index b426858a..b1d20f3c 100644 --- a/lisp/efrit-chat.el +++ b/lisp/efrit-chat.el @@ -46,7 +46,7 @@ :type 'string :group 'efrit) -(defcustom efrit-model "claude-3-5-sonnet-20241022" +(defcustom efrit-model "anthropic/claude-sonnet-4" "Claude model to use for conversations." :type 'string :group 'efrit) @@ -63,7 +63,7 @@ or 4096 without. This setting uses the higher limit." :type 'float :group 'efrit) -(defcustom efrit-api-url "https://api.anthropic.com/v1/messages" +(defcustom efrit-api-url "https://openrouter.ai/api/v1/chat/completions" "URL for the Anthropic API endpoint." :type 'string :group 'efrit) @@ -126,7 +126,7 @@ or 4096 without. This setting uses the higher limit." (defun efrit--get-api-key () "Get the Anthropic API key from .authinfo file." - (let* ((auth-info (car (auth-source-search :host "api.anthropic.com" + (let* ((auth-info (car (auth-source-search :host "openrouter.ai" :user "personal" :require '(:secret)))) (secret (plist-get auth-info :secret))) @@ -228,9 +228,7 @@ or 4096 without. This setting uses the higher limit." (let* ((api-key (efrit--get-api-key)) (url-request-method "POST") (url-request-extra-headers - `(("x-api-key" . ,api-key) - ("anthropic-version" . "2023-06-01") - ("anthropic-beta" . "max-tokens-3-5-sonnet-2024-07-15") + `(("Authorization" . ,(concat "Bearer " openrouter-api-key)) ("content-type" . "application/json"))) (system-prompt (when efrit-enable-tools (efrit-tools-system-prompt))) (request-data diff --git a/lisp/efrit-do.el b/lisp/efrit-do.el index 5b886d36..87dc15e4 100644 --- a/lisp/efrit-do.el +++ b/lisp/efrit-do.el @@ -84,7 +84,7 @@ If nil, uses the default location in the efrit data directory." :type 'boolean :group 'efrit-do) -(defcustom efrit-model "claude-3-5-sonnet-20241022" +(defcustom efrit-model "anthropic/claude-sonnet-4" "Claude model to use for efrit-do commands." :type 'string :group 'efrit-do) @@ -94,7 +94,7 @@ If nil, uses the default location in the efrit data directory." :type 'integer :group 'efrit-do) -(defcustom efrit-api-url "https://api.anthropic.com/v1/messages" +(defcustom efrit-api-url "https://openrouter.ai/api/v1/chat/completions" "URL for the Anthropic API endpoint used by efrit-do." :type 'string :group 'efrit-do) @@ -710,92 +710,102 @@ When `efrit-do-show-errors-only' is non-nil, only show buffer for errors." display-buffer-below-selected (window-height . 10))))))) + (defun efrit-do--execute-command (command &optional retry-count error-msg previous-code) "Execute natural language COMMAND and return the result. -Uses improved error handling. If RETRY-COUNT is provided, this is a retry +Uses improved error handling. If RETRY-COUNT is provided, this is a retry attempt with ERROR-MSG and PREVIOUS-CODE from the failed attempt." (condition-case api-err (let* ((api-key (efrit--get-api-key)) (url-request-method "POST") (url-request-extra-headers - `(("x-api-key" . ,api-key) - ("anthropic-version" . "2023-06-01") + `(("Authorization" . ,(concat "Bearer " openrouter-api-key)) ("content-type" . "application/json"))) (system-prompt (efrit-do--command-system-prompt retry-count error-msg previous-code)) (request-data `(("model" . ,efrit-model) ("max_tokens" . ,efrit-max-tokens) ("temperature" . 0.0) - ("messages" . [(("role" . "user") + ("messages" . [(("role" . "system") + ("content" . ,system-prompt)) + (("role" . "user") ("content" . ,command))]) - ("system" . ,system-prompt) - ("tools" . [(("name" . "eval_sexp") + ("tools" . [(("type" . "function") + ("function" . (("name" . "eval_sexp") ("description" . "Evaluate a Lisp expression and return the result. This is the primary tool for interacting with Emacs.") - ("input_schema" . (("type" . "object") + ("parameters" . (("type" . "object") ("properties" . (("expr" . (("type" . "string") ("description" . "The Elisp expression to evaluate"))))) - ("required" . ["expr"])))) - (("name" . "shell_exec") + ("required" . ["expr"])))))) + (("type" . "function") + ("function" . (("name" . "shell_exec") ("description" . "Execute a shell command and return the result.") - ("input_schema" . (("type" . "object") + ("parameters" . (("type" . "object") ("properties" . (("command" . (("type" . "string") ("description" . "The shell command to execute"))))) - ("required" . ["command"])))) - (("name" . "todo_add") + ("required" . ["command"])))))) + (("type" . "function") + ("function" . (("name" . "todo_add") ("description" . "Add a new TODO item to track progress.") - ("input_schema" . (("type" . "object") + ("parameters" . (("type" . "object") ("properties" . (("content" . (("type" . "string") ("description" . "The TODO item description"))) ("priority" . (("type" . "string") ("enum" . ["low" "medium" "high"]) ("description" . "Priority level"))))) - ("required" . ["content"])))) - (("name" . "todo_update") + ("required" . ["content"])))))) + (("type" . "function") + ("function" . (("name" . "todo_update") ("description" . "Update the status of a TODO item.") - ("input_schema" . (("type" . "object") + ("parameters" . (("type" . "object") ("properties" . (("id" . (("type" . "string") ("description" . "The TODO item ID"))) ("status" . (("type" . "string") ("enum" . ["todo" "in-progress" "completed"]) ("description" . "New status"))))) - ("required" . ["id" "status"])))) - (("name" . "todo_show") + ("required" . ["id" "status"])))))) + (("type" . "function") + ("function" . (("name" . "todo_show") ("description" . "Show all current TODO items.") - ("input_schema" . (("type" . "object") - ("properties" . ())))) - (("name" . "buffer_create") + ("parameters" . (("type" . "object") + ("properties" . ())))))) + (("type" . "function") + ("function" . (("name" . "buffer_create") ("description" . "Create a new buffer with content and optional mode. Use this for reports, lists, and formatted output.") - ("input_schema" . (("type" . "object") + ("parameters" . (("type" . "object") ("properties" . (("name" . (("type" . "string") ("description" . "Buffer name (e.g. '*efrit-report: Files*')"))) ("content" . (("type" . "string") ("description" . "Buffer content"))) ("mode" . (("type" . "string") ("description" . "Optional major mode (e.g. 'markdown-mode', 'org-mode')"))))) - ("required" . ["name" "content"])))) - (("name" . "format_file_list") + ("required" . ["name" "content"])))))) + (("type" . "function") + ("function" . (("name" . "format_file_list") ("description" . "Format content as a markdown file list with bullet points.") - ("input_schema" . (("type" . "object") + ("parameters" . (("type" . "object") ("properties" . (("content" . (("type" . "string") ("description" . "Raw content to format as file list"))))) - ("required" . ["content"])))) - (("name" . "format_todo_list") + ("required" . ["content"])))))) + (("type" . "function") + ("function" . (("name" . "format_todo_list") ("description" . "Format TODO list with optional sorting.") - ("input_schema" . (("type" . "object") + ("parameters" . (("type" . "object") ("properties" . (("sort_by" . (("type" . "string") ("enum" . ["status" "priority"]) - ("description" . "Optional sorting criteria"))))) - ("required" . [])))) - (("name" . "display_in_buffer") + ("description" . "Sorting criteria"))))) + ("required" . [])))))) + (("type" . "function") + ("function" . (("name" . "display_in_buffer") ("description" . "Display content in a specific buffer.") - ("input_schema" . (("type" . "object") + ("parameters" . (("type" . "object") ("properties" . (("buffer_name" . (("type" . "string") ("description" . "Buffer name"))) ("content" . (("type" . "string") ("description" . "Content to display"))) ("window_height" . (("type" . "number") ("description" . "Optional window height"))))) - ("required" . ["buffer_name" "content"]))))]))) + ("required" . ["buffer_name" "content"]))))))]))) (url-request-data (encode-coding-string (json-encode request-data) 'utf-8))) @@ -825,26 +835,49 @@ attempt with ERROR-MSG and PREVIOUS-CODE from the failed attempt." (error-message (gethash "message" error-obj))) (format "API Error (%s): %s" error-type error-message)) - ;; Process successful response - (let ((content (gethash "content" response))) + ;; Process successful response - OpenRouter format + (let* ((choices (gethash "choices" response)) + (first-choice (when (and choices (> (length choices) 0)) + (aref choices 0))) + (message-obj (when first-choice + (gethash "message" first-choice))) + (content (when message-obj + (gethash "content" message-obj))) + (tool-calls (when message-obj + (gethash "tool_calls" message-obj)))) + (when efrit-do-debug - (message "API Response content: %S" content)) + (message "API Response content: %S" content) + (message "Tool calls: %S" tool-calls)) (when content - (dotimes (i (length content)) - (let* ((item (aref content i)) - (type (gethash "type" item))) - (cond - ;; Handle text content - ((string= type "text") - (when-let* ((text (gethash "text" item))) - (setq message-text (concat message-text text)))) + (setq message-text (concat message-text content))) + + (when tool-calls + (dotimes (i (length tool-calls)) + (let* ((tool-call (aref tool-calls i)) + (function-obj (gethash "function" tool-call)) + (tool-name (gethash "name" function-obj)) + (arguments-str (gethash "arguments" function-obj)) + (arguments (when arguments-str + (condition-case err + (json-read-from-string arguments-str) + (error + (when efrit-do-debug + (message "Failed to parse tool arguments: %s" (error-message-string err))) + nil)))) + (tool-item (make-hash-table :test 'equal))) + + ;; Create tool item in expected format + (puthash "name" tool-name tool-item) + (puthash "input" arguments tool-item) + + (when efrit-do-debug + (message "Processing tool call: %s with args: %S" tool-name arguments)) - ;; Handle tool use - ((string= type "tool_use") (setq message-text (concat message-text - (efrit-do--execute-tool item)))))))) + (efrit-do--execute-tool tool-item)))))) (or message-text "Command executed")))) (error diff --git a/lisp/efrit-do.el.backup b/lisp/efrit-do.el.backup index 3f3bb2dd..cf79c348 100644 --- a/lisp/efrit-do.el.backup +++ b/lisp/efrit-do.el.backup @@ -84,7 +84,7 @@ If nil, uses the default location in the efrit data directory." :type 'boolean :group 'efrit-do) -(defcustom efrit-model "claude-3-5-sonnet-20241022" +(defcustom efrit-model "anthropic/claude-sonnet-4" "Claude model to use for efrit-do commands." :type 'string :group 'efrit-do) @@ -94,7 +94,7 @@ If nil, uses the default location in the efrit data directory." :type 'integer :group 'efrit-do) -(defcustom efrit-api-url "https://api.anthropic.com/v1/messages" +(defcustom efrit-api-url "https://openrouter.ai/api/v1/chat/completions" "URL for the Anthropic API endpoint used by efrit-do." :type 'string :group 'efrit-do) diff --git a/lisp/efrit-multi-turn.el b/lisp/efrit-multi-turn.el index e3a38000..3eba4960 100644 --- a/lisp/efrit-multi-turn.el +++ b/lisp/efrit-multi-turn.el @@ -43,7 +43,7 @@ :type 'boolean :group 'efrit) -(defcustom efrit-multi-turn-completion-model "claude-3-5-haiku-20241022" +(defcustom efrit-multi-turn-completion-model "anthropic/claude-sonnet-4" "Claude model to use for completion assessment. This should be a fast, lightweight model for efficiency." :type 'string @@ -196,7 +196,7 @@ REASON: [brief explanation]" (efrit-debug-log "Sending completion check to Claude...") (condition-case err (with-current-buffer - (url-retrieve-synchronously "https://api.anthropic.com/v1/messages" t nil efrit-multi-turn-api-timeout) + (url-retrieve-synchronously "https://openrouter.ai/api/v1/chat/completions" t nil efrit-multi-turn-api-timeout) (goto-char (point-min)) (re-search-forward "\n\n" nil t) ; Skip headers (let* ((response-json (buffer-substring (point) (point-max))) diff --git a/lisp/efrit-tools.el b/lisp/efrit-tools.el index ff488d5b..573e5227 100644 --- a/lisp/efrit-tools.el +++ b/lisp/efrit-tools.el @@ -108,7 +108,7 @@ LEVEL is one of: debug info warn error." (defun efrit--get-api-key () "Get the Anthropic API key from .authinfo file." - (let* ((auth-info (car (auth-source-search :host "api.anthropic.com" + (let* ((auth-info (car (auth-source-search :host "openrouter.ai" :user "personal" :require '(:secret)))) (secret (plist-get auth-info :secret))) @@ -499,7 +499,7 @@ Arguments: (let ((results nil) (processed-text (or text "")) - (elisp-regex "\\([\\s\\S]+?\\)")) + (elisp-regex "\\([^<]*\\(?:\n[^<]*\\)*\\)")) (condition-case-unless-debug extraction-err (progn diff --git a/plans/AUTONOMOUS_MODE_DESIGN.md b/plans/AUTONOMOUS_MODE_DESIGN.md index 630beb9f..7781ea17 100644 --- a/plans/AUTONOMOUS_MODE_DESIGN.md +++ b/plans/AUTONOMOUS_MODE_DESIGN.md @@ -25,7 +25,7 @@ efrit-agent → Aggressive problem-solving until complete (NEW) "context": "It's several versions behind, managed by straight.el", "session_id": "upgrade-20250814", "max_iterations": 50, - "model_backend": "claude-3.5-sonnet" + "model_backend": "anthropic/claude-sonnet-4" } ``` @@ -112,9 +112,9 @@ Respond with your next action as JSON... ### 4. **Model Backend Abstraction** ```elisp -(defcustom efrit-agent-backend "claude-3.5-sonnet" +(defcustom efrit-agent-backend "anthropic/claude-sonnet-4" "Default model backend for agent mode." - :type '(choice (const "claude-3.5-sonnet") + :type '(choice (const "anthropic/claude-sonnet-4") (const "gpt-4") (const "local-llama") (string :tag "Custom API endpoint"))) diff --git a/qa/qa-final-comprehensive-report.el b/qa/qa-final-comprehensive-report.el index e8f7e5dd..1794f546 100644 --- a/qa/qa-final-comprehensive-report.el +++ b/qa/qa-final-comprehensive-report.el @@ -132,7 +132,7 @@ (insert ";; Recommended production settings\n") (insert "(setq efrit-work-buffer-max-size 100000) ; 100KB limit\n") (insert "(setq efrit-show-work-buffer nil) ; Don't auto-show\n") - (insert "(setq efrit-model \"claude-3-5-sonnet-20241022\") ; Latest model\n") + (insert "(setq efrit-model \"anthropic/claude-sonnet-4") ; Latest model\n") (insert "(setq efrit-max-tokens 8192) ; Maximum context\n") (insert "(setq efrit-temperature 0.1) ; Focused responses\n") (insert "```\n\n") diff --git a/qa/qa-test-integration.el b/qa/qa-test-integration.el index e11c3b30..1f312689 100644 --- a/qa/qa-test-integration.el +++ b/qa/qa-test-integration.el @@ -89,7 +89,7 @@ "{\"type\":\"text\",\"text\":\"I'll create a haiku about Vim for you.\"}," "{\"type\":\"tool_use\",\"id\":\"toolu_test\",\"name\":\"eval_sexp\"," "\"input\":{\"expr\":\"(get-buffer-create \\\"*vim-haiku*\\\")\"}}" - "],\"model\":\"claude-3-5-sonnet-20241022\"}")) + "],\"model\":\"anthropic/claude-sonnet-4"}")) (mock-buffer (get-buffer-create "*mock-api-response*"))) ;; Set up mock response buffer @@ -225,7 +225,7 @@ ;; Check for hardcoded values that should be configurable (let ((system-prompt (efrit-streamlined--system-prompt))) (cond - ((string-match-p "claude-3-5-sonnet" system-prompt) + ((string-match-p "anthropic/claude-sonnet-4" system-prompt) (message "⚠ System prompt contains hardcoded model name")) ((string-match-p "anthropic\\.com" system-prompt) (message "⚠ System prompt contains hardcoded API URL")) diff --git a/qa/qa-test-real-integration.el b/qa/qa-test-real-integration.el index 21c3c2c2..c6775395 100644 --- a/qa/qa-test-real-integration.el +++ b/qa/qa-test-real-integration.el @@ -248,7 +248,7 @@ ;; Test 4: Different models (let ((original-model efrit-model)) - (setq efrit-model "claude-3-haiku-20240307") + (setq efrit-model "anthropic/claude-sonnet-4") (unwind-protect (condition-case err (progn diff --git a/qa/qa-test-streamlined.el b/qa/qa-test-streamlined.el index cc1ebd0e..62776b37 100644 --- a/qa/qa-test-streamlined.el +++ b/qa/qa-test-streamlined.el @@ -83,7 +83,7 @@ ;; Test 1: Valid response with text content (let ((mock-response-text-only - "{\"content\":[{\"type\":\"text\",\"text\":\"Hello, this is a test response.\"}],\"model\":\"claude-3-5-sonnet-20241022\"}")) + "{\"content\":[{\"type\":\"text\",\"text\":\"Hello, this is a test response.\"}],\"model\":\"anthropic/claude-sonnet-4"}")) (condition-case err (let ((parsed (json-read-from-string mock-response-text-only))) (if parsed diff --git a/test/run-integration-tests.sh b/test/run-integration-tests.sh index a4a89bbd..7d95b24c 100755 --- a/test/run-integration-tests.sh +++ b/test/run-integration-tests.sh @@ -28,7 +28,8 @@ run_test() { echo -e "${BLUE}Running $test_name...${NC}" TESTS_RUN=$((TESTS_RUN + 1)) - if emacs --batch --load "$test_file" 2>&1; then + # Use the correct load pattern with explicit file loading in dependency order + if emacs --batch --load ../lisp/efrit-debug.el --load ../lisp/efrit-config.el --load ../lisp/efrit.el --load ../lisp/efrit-agent.el --load ../lisp/efrit-tools.el --load ../lisp/efrit-do.el --load ../lisp/efrit-multi-turn.el --load ../lisp/efrit-command.el --load ../lisp/efrit-chat.el --load "$test_file" 2>&1; then echo -e "${GREEN}āœ… $test_name PASSED${NC}" TESTS_PASSED=$((TESTS_PASSED + 1)) else diff --git a/test/test-history-functionality.el b/test/test-history-functionality.el index 4bbf0d99..125ecbd4 100644 --- a/test/test-history-functionality.el +++ b/test/test-history-functionality.el @@ -201,7 +201,8 @@ (setq efrit-do-history original-history)) ;; Test 6: Context persistence after clearing -(let ((temp-file efrit-do-context-file)) +(let ((temp-file (or efrit-do-context-file + (efrit-config-context-file "efrit-do-context.el")))) ;; Create some context (efrit-do--capture-context "persistent-cmd" "persistent-result") diff --git a/test/test-openrouter.el b/test/test-openrouter.el new file mode 100644 index 00000000..2888601e --- /dev/null +++ b/test/test-openrouter.el @@ -0,0 +1,413 @@ +;;; test-openrouter.el --- -*- lexical-binding: t; -*- +(message "=== Response Processing Debug Test ===") + +;; Set up environment +(add-to-list 'load-path "../lisp") +(require 'efrit-tools) +(require 'efrit-do) +(setq efrit-do-debug t) + +;; Sample API response from the actual call +(defvar test-api-response + "{\"id\":\"gen-1755874447-77pPQOiXzJ9E4p2wUGQg\",\"provider\":\"Google\",\"model\":\"anthropic/claude-sonnet-4\",\"object\":\"chat.completion\",\"created\":1755874447,\"choices\":[{\"logprobs\":null,\"finish_reason\":\"tool_calls\",\"native_finish_reason\":\"tool_calls\",\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"I'll calculate 15 * 23 and show the result.\",\"refusal\":null,\"reasoning\":null,\"tool_calls\":[{\"id\":\"toolu_vrtx_013wcQgbLbMbaKG1vXH4Hjq9\",\"index\":0,\"type\":\"function\",\"function\":{\"name\":\"eval_sexp\",\"arguments\":\"{\\\"expr\\\": \\\"(* 15 23)\\\"}\"}}]}}],\"usage\":{\"prompt_tokens\":2042,\"completion_tokens\":75,\"total_tokens\":2117}}") + +(message "\nšŸ” Testing response processing...") +(message "Raw response: %s" test-api-response) + +;; Test the response processing function directly +(let ((result (efrit-do--process-api-response test-api-response))) + (message "\nProcessed result: '%s'" result) + (message "Result length: %d" (length result)) + (message "Contains 345? %s" (if (string-match-p "345" result) "YES" "NO"))) + +;; Let's also examine the parsed JSON structure +(message "\nšŸ” Examining JSON structure...") +(let* ((json-object-type 'hash-table) + (json-array-type 'vector) + (json-key-type 'string) + (response (json-read-from-string test-api-response))) + + (message "Response keys: %S" (hash-table-keys response)) + + (let ((choices (gethash "choices" response))) + (message "Choices length: %d" (length choices)) + + (when (> (length choices) 0) + (let* ((choice (aref choices 0)) + (message-obj (gethash "message" choice)) + (content (gethash "content" message-obj)) + (tool-calls (gethash "tool_calls" message-obj))) + + (message "Message content: '%s'" content) + (message "Tool calls: %S" tool-calls) + + (when tool-calls + (message "Tool calls length: %d" (length tool-calls)) + (when (> (length tool-calls) 0) + (let* ((tool-call (aref tool-calls 0)) + (function-obj (gethash "function" tool-call)) + (name (gethash "name" function-obj)) + (arguments (gethash "arguments" function-obj))) + (message "Tool name: %s" name) + (message "Tool arguments: %s" arguments)))))))) + +(message "\n=== Test Complete ===") + + +(message "=== Unit Test: efrit-tools-extract-tools-from-response ===") + +;; Set up environment +(add-to-list 'load-path "../lisp") +(require 'efrit-tools) + +;; Test function to run a single test case +(defun test-extract-tools (test-name input expected-blocks) + "Test efrit-tools-extract-tools-from-response with given input." + (message "\n--- Test: %s ---" test-name) + (message "Input length: %d chars" (length input)) + (message "Expected blocks: %d" expected-blocks) + + (let ((result (efrit-tools-extract-tools-from-response input))) + (let ((processed-text (car result)) + (results-list (cdr result)) + (actual-blocks (length (cdr result)))) + + (message "Actual blocks processed: %d" actual-blocks) + (message "Results list: %S" results-list) + + ;; Check if text was processed + (let ((text-changed (not (string= processed-text input)))) + (message "Text changed: %s" (if text-changed "YES" "NO"))) + + ;; Check if we got the expected number of blocks + (let ((blocks-correct (= actual-blocks expected-blocks))) + (message "Blocks correct: %s" (if blocks-correct "YES" "NO")) + (if blocks-correct + (message "āœ… %s: PASSED" test-name) + (message "āŒ %s: FAILED (expected %d, got %d)" test-name expected-blocks actual-blocks)))))) + +;; Test Case 1: Original failing case (multi-line with newlines between blocks) +(let ((test1 "I'll help you with that. First, let me create a buffer: +(with-current-buffer (get-buffer-create \"*test-output*\") + (erase-buffer) + (insert \"Hello World\\n\") + (buffer-name)) + +Now let me add more content: +(with-current-buffer \"*test-output*\" + (goto-char (point-max)) + (insert \"Additional line\\n\") + (buffer-size)) + +Done!")) + (test-extract-tools "Multi-line with newlines between blocks" test1 2)) + +;; Test Case 2: Multi-line with no newlines between blocks +(let ((test2 "I'll help you with that. First, let me create a buffer: +(with-current-buffer (get-buffer-create \"*test-output*\") + (erase-buffer) + (insert \"Hello World\\n\") + (buffer-name)) +Now let me add more content: +(with-current-buffer \"*test-output*\" + (goto-char (point-max)) + (insert \"Additional line\\n\") + (buffer-size)) +Done!")) + (test-extract-tools "Multi-line with no newlines between blocks" test2 2)) + +;; Test Case 3: Single-line elisp blocks +(let ((test3 "Simple test: (+ 2 3) and (* 4 5) Done!")) + (test-extract-tools "Single-line elisp blocks" test3 2)) + +;; Test Case 4: Mixed single and multi-line blocks +(let ((test4 "First: (+ 1 2) +Then complex: +(with-current-buffer (get-buffer-create \"*test*\") + (insert \"Hello\")) +Finally: (buffer-name)")) + (test-extract-tools "Mixed single and multi-line blocks" test4 3)) + +;; Test Case 5: Multi-line with extra whitespace and indentation +(let ((test5 "Let's test this: + + (with-current-buffer + (get-buffer-create \"*test*\") + (insert \"Hello World\")) + + +And another: + + (buffer-name) + + +Done!")) + (test-extract-tools "Multi-line with extra whitespace and indentation" test5 2)) + +(message "\n=== Hypothesis Testing ===") + +;; Test 1: Implementation Logic Bug in Capture Groups Approach +(message "\n--- Testing Hypothesis 1: Implementation Logic Bug ---") +(let ((test-input "Single: (+ 1 2) +Multi: (with-current-buffer (get-buffer-create \"*test*\") + (insert \"Hello\")) +Single: (buffer-name)")) + + ;; Test if the manual tag finding logic is being executed at all + (let ((pos 0) + (found-tags 0)) + (while (string-match "" test-input pos) + (setq found-tags (1+ found-tags)) + (let* ((start-tag-pos (match-beginning 0)) + (start-tag-end (match-end 0)) + (end-tag-pos (string-match "" test-input start-tag-end))) + (if end-tag-pos + (setq pos (+ end-tag-pos 8)) + (setq pos (1+ start-tag-pos))))) + + (message "Manual tag search found: %d tags" found-tags) + (if (= found-tags 3) + (message "āœ… HYPOTHESIS 1 REJECTED: Manual tag finding works correctly") + (message "āŒ HYPOTHESIS 1 CONFIRMED: Manual tag finding has bugs")))) + +;; Test 2: Variable Scope or State Management Issue +(message "\n--- Testing Hypothesis 2: Variable Scope/State Management ---") +(let ((test-input "Test: (+ 1 2)")) + ;; Test if the function properly updates its internal state + (let* ((result1 (efrit-tools-extract-tools-from-response test-input)) + (result2 (efrit-tools-extract-tools-from-response test-input))) + + (message "First call results: %S" (cdr result1)) + (message "Second call results: %S" (cdr result2)) + (message "Results identical: %s" (if (equal (cdr result1) (cdr result2)) "YES" "NO")) + + (if (and (cdr result1) (equal (cdr result1) (cdr result2))) + (message "āœ… HYPOTHESIS 2 REJECTED: Function state management is consistent") + (message "āŒ HYPOTHESIS 2 CONFIRMED: Function has state management issues")))) + +;; Test 3: Conditional Logic or Feature Flag Issue +(message "\n--- Testing Hypothesis 3: Conditional Logic/Feature Flag ---") +(let ((single-line-input "Test: (+ 1 2)") + (multi-line-input "Test: (with-current-buffer (get-buffer-create \"*test*\") + (insert \"Hello\"))")) + + ;; Test if the function behaves differently based on input characteristics + (let* ((single-result (efrit-tools-extract-tools-from-response single-line-input)) + (multi-result (efrit-tools-extract-tools-from-response multi-line-input)) + (single-blocks (length (cdr single-result))) + (multi-blocks (length (cdr multi-result))) + (single-changed (not (string= (car single-result) single-line-input))) + (multi-changed (not (string= (car multi-result) multi-line-input)))) + + (message "Single-line: %d blocks, text changed: %s" single-blocks single-changed) + (message "Multi-line: %d blocks, text changed: %s" multi-blocks multi-changed) + + (if (and (= single-blocks 1) (= multi-blocks 0) single-changed (not multi-changed)) + (message "āŒ HYPOTHESIS 3 CONFIRMED: Function has different code paths for different input types") + (message "āœ… HYPOTHESIS 3 REJECTED: Function behavior is consistent across input types")))) + +(message "\n=== Code Path Investigation ===") + +;; Add debug instrumentation to the function by temporarily redefining it +(message "\n--- Setting up debug instrumentation ---") + +;; Save the original function +(fset 'efrit-tools-extract-tools-from-response-original + (symbol-function 'efrit-tools-extract-tools-from-response)) + +;; Create a debug version with extensive logging +(defun efrit-tools-extract-tools-from-response (text) + "Debug version with extensive logging." + (message "šŸ” DEBUG: Function called with text length: %d" (length text)) + (message "šŸ” DEBUG: Text contains newlines: %s" (if (string-match-p "\n" text) "YES" "NO")) + (message "šŸ” DEBUG: First 50 chars: %S" (substring text 0 (min 50 (length text)))) + + (unless (stringp text) + (message "šŸ” DEBUG: ERROR - Text is not a string!") + (error "Response text must be a string")) + + (let ((results nil) + (processed-text (or text "")) + (elisp-regex "\\(.*?\\)")) + + (message "šŸ” DEBUG: Initialized variables") + (message "šŸ” DEBUG: - results: %S" results) + (message "šŸ” DEBUG: - processed-text length: %d" (length processed-text)) + (message "šŸ” DEBUG: - elisp-regex: %S" elisp-regex) + + (condition-case-unless-debug extraction-err + (progn + (message "šŸ” DEBUG: Entering main processing block") + + ;; Test the regex first + (message "šŸ” DEBUG: Testing regex match...") + (let ((regex-test-pos (string-match elisp-regex processed-text))) + (message "šŸ” DEBUG: Regex match result: %S" regex-test-pos)) + + ;; Process Elisp evaluation requests using the while loop + (message "šŸ” DEBUG: Starting while loop for regex matches") + (let ((loop-count 0)) + (while (string-match elisp-regex processed-text) + (setq loop-count (1+ loop-count)) + (message "šŸ” DEBUG: Loop iteration %d" loop-count) + + (let* ((elisp-code (match-string 1 processed-text)) + (call-start (match-beginning 0)) + (call-end (match-end 0))) + + (message "šŸ” DEBUG: - elisp-code: %S" elisp-code) + (message "šŸ” DEBUG: - call-start: %d" call-start) + (message "šŸ” DEBUG: - call-end: %d" call-end) + + (let ((result (condition-case eval-err + (efrit-tools-eval-sexp elisp-code) + (error + (format "Error in Elisp evaluation: %s" + (error-message-string eval-err)))))) + + (message "šŸ” DEBUG: - evaluation result: %S" result) + + ;; Add result to the list + (push result results) + (message "šŸ” DEBUG: - results list now: %S" results) + + ;; Replace the Elisp call with its result in the text + (setq processed-text + (concat (substring processed-text 0 call-start) + (format "[Result: %s]" result) + (substring processed-text call-end))) + + (message "šŸ” DEBUG: - processed-text length after replacement: %d" (length processed-text)) + (message "šŸ” DEBUG: - processed-text first 100 chars: %S" + (substring processed-text 0 (min 100 (length processed-text))))))) + + (message "šŸ” DEBUG: While loop completed after %d iterations" loop-count))) + + ;; Handle extraction errors + (error + (message "šŸ” DEBUG: ERROR in extraction: %s" (error-message-string extraction-err)) + (setq processed-text (concat processed-text + "\n[Error processing tool calls: " + (error-message-string extraction-err) "]")))) + + (message "šŸ” DEBUG: Final results:") + (message "šŸ” DEBUG: - processed-text length: %d" (length processed-text)) + (message "šŸ” DEBUG: - results list: %S" results) + (message "šŸ” DEBUG: - returning cons: %S" (cons processed-text (nreverse results))) + + ;; Return both the processed text and results + (cons processed-text (nreverse results)))) + +;; Test both single-line and multi-line inputs with debug version +(message "\n--- Testing Single-line input with debug ---") +(let ((single-input "Test: (+ 1 2)")) + (let ((result (efrit-tools-extract-tools-from-response single-input))) + (message "Single-line result: %S" result))) + +(message "\n--- Testing Multi-line input with debug ---") +(let ((multi-input "Test: (with-current-buffer (get-buffer-create \"*test*\") + (insert \"Hello\"))")) + (let ((result (efrit-tools-extract-tools-from-response multi-input))) + (message "Multi-line result: %S" result))) + +;; Restore the original function +(message "\n--- Restoring original function ---") +(fset 'efrit-tools-extract-tools-from-response + (symbol-function 'efrit-tools-extract-tools-from-response-original)) + +(message "\n=== string-match Behavior Analysis ===") + +;; Test the exact string-match behavior with our regex and inputs +(let ((elisp-regex "\\(.*?\\)") + (single-input "Test: (+ 1 2)") + (multi-input "Test: (with-current-buffer (get-buffer-create \"*test*\") + (insert \"Hello\"))") + (multi-simple "Test: (message +\"hello\")")) + + (message "\n--- Testing string-match with different inputs ---") + (message "Regex pattern: %S" elisp-regex) + + ;; Test 1: Single-line input + (message "\nšŸ” Test 1: Single-line input") + (message "Input: %S" single-input) + (message "Input length: %d" (length single-input)) + (let ((match-pos (string-match elisp-regex single-input))) + (message "string-match result: %S" match-pos) + (when match-pos + (message "match-beginning 0: %d" (match-beginning 0)) + (message "match-end 0: %d" (match-end 0)) + (message "match-string 0: %S" (match-string 0 single-input)) + (message "match-string 1: %S" (match-string 1 single-input)))) + + ;; Test 2: Multi-line input (complex) + (message "\nšŸ” Test 2: Multi-line input (complex)") + (message "Input: %S" multi-input) + (message "Input length: %d" (length multi-input)) + (message "Input contains newlines: %s" (if (string-match-p "\n" multi-input) "YES" "NO")) + (let ((match-pos (string-match elisp-regex multi-input))) + (message "string-match result: %S" match-pos) + (when match-pos + (message "match-beginning 0: %d" (match-beginning 0)) + (message "match-end 0: %d" (match-end 0)) + (message "match-string 0: %S" (match-string 0 multi-input)) + (message "match-string 1: %S" (match-string 1 multi-input)))) + + ;; Test 3: Multi-line input (simple) + (message "\nšŸ” Test 3: Multi-line input (simple)") + (message "Input: %S" multi-simple) + (message "Input length: %d" (length multi-simple)) + (message "Input contains newlines: %s" (if (string-match-p "\n" multi-simple) "YES" "NO")) + (let ((match-pos (string-match elisp-regex multi-simple))) + (message "string-match result: %S" match-pos) + (when match-pos + (message "match-beginning 0: %d" (match-beginning 0)) + (message "match-end 0: %d" (match-end 0)) + (message "match-string 0: %S" (match-string 0 multi-simple)) + (message "match-string 1: %S" (match-string 1 multi-simple)))) + + ;; Test 4: Check if the issue is with the non-greedy matching + (message "\nšŸ” Test 4: Testing greedy vs non-greedy matching") + (let ((greedy-regex "\\(.*\\)") + (non-greedy-regex "\\(.*?\\)")) + + (message "Testing greedy regex: %S" greedy-regex) + (let ((greedy-match (string-match greedy-regex multi-simple))) + (message "Greedy match result: %S" greedy-match)) + + (message "Testing non-greedy regex: %S" non-greedy-regex) + (let ((non-greedy-match (string-match non-greedy-regex multi-simple))) + (message "Non-greedy match result: %S" non-greedy-match))) + + ;; Test 5: Test with explicit newline in regex + (message "\nšŸ” Test 5: Testing regex with explicit newline handling") + (let ((newline-regex "\\([^<]*\\(?:\n[^<]*\\)*\\)")) + (message "Newline-aware regex: %S" newline-regex) + (let ((newline-match (string-match newline-regex multi-simple))) + (message "Newline-aware match result: %S" newline-match) + (when newline-match + (message "match-string 1: %S" (match-string 1 multi-simple))))) + + ;; Test 6: Test the exact character positions in multi-line input + (message "\nšŸ” Test 6: Character-by-character analysis of multi-line input") + (message "Multi-simple input character analysis:") + (dotimes (i (min 50 (length multi-simple))) + (let ((char (aref multi-simple i))) + (message "Position %d: %c (ASCII %d) %s" + i char char + (if (= char ?\n) "← NEWLINE" "")))) + + ;; Test 7: Manual search for opening and closing tags + (message "\nšŸ” Test 7: Manual tag search in multi-line input") + (let ((open-pos (string-match "" multi-simple)) + (close-pos (string-match "" multi-simple))) + (message "Manual search: %S" open-pos) + (message "Manual search: %S" close-pos) + (when (and open-pos close-pos) + (let* ((start-content (+ open-pos 7)) + (content (substring multi-simple start-content close-pos))) + (message "Content between tags: %S" content) + (message "Content length: %d" (length content)) + (message "Content contains newlines: %s" (if (string-match-p "\n" content) "YES" "NO")))))) + +(message "\n=== string-match Behavior Analysis Complete ===")