dwash96 · dwash96 · Jan 23, 2026 · Jan 21, 2026 · Jan 21, 2026 · Jan 21, 2026
diff --git a/cecli/__init__.py b/cecli/__init__.py
@@ -1,6 +1,6 @@
 from packaging import version
 
-__version__ = "0.96.1.dev"
+__version__ = "0.96.2.dev"
 safe_version = __version__
 
 try:

diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py
@@ -12,8 +12,6 @@
 from datetime import datetime
 from pathlib import Path
 
-from litellm import experimental_mcp_client
-
 from cecli import urls, utils
 from cecli.change_tracker import ChangeTracker
 from cecli.helpers import nested
@@ -29,6 +27,7 @@
     normalize_vector,
 )
 from cecli.helpers.skills import SkillsManager
+from cecli.llm import litellm
 from cecli.mcp import LocalServer, McpServerManager
 from cecli.repo import ANY_GIT_ERROR
 from cecli.tools.utils.registry import ToolRegistry
@@ -306,7 +305,7 @@ async def _exec_async():
             }
             try:
                 session = await server.connect()
-                call_result = await experimental_mcp_client.call_openai_tool(
+                call_result = await litellm.experimental_mcp_client.call_openai_tool(
                     session=session, openai_tool=tool_call_dict
                 )
                 content_parts = []

diff --git a/cecli/commands/tokens.py b/cecli/commands/tokens.py
@@ -2,8 +2,7 @@
 
 from cecli.commands.utils.base_command import BaseCommand
 from cecli.commands.utils.helpers import format_command_result
-from cecli.helpers.conversation import ConversationManager
-from cecli.utils import is_image_file
+from cecli.helpers.conversation import ConversationManager, MessageTag
 
 
 class TokensCommand(BaseCommand):
@@ -15,39 +14,29 @@ async def execute(cls, io, coder, args, **kwargs):
         res = []
 
         coder.choose_fence()
+        coder.format_chat_chunks()
 
         # Show progress indicator
         total_files = len(coder.abs_fnames) + len(coder.abs_read_only_fnames)
         if total_files > 20:
             io.tool_output(f"Calculating tokens for {total_files} files...")
 
-        # system messages
-        main_sys = coder.fmt_system_prompt(coder.gpt_prompts.main_system)
-        main_sys += "\n" + coder.fmt_system_prompt(coder.gpt_prompts.system_reminder)
-        msgs = [
-            dict(role="system", content=main_sys),
-            dict(
-                role="system",
-                content=coder.fmt_system_prompt(coder.gpt_prompts.system_reminder),
-            ),
+        # system messages - sum of SYSTEM, STATIC, EXAMPLES, and REMINDER tags
+        system_tags = [
+            MessageTag.SYSTEM,
+            MessageTag.STATIC,
+            MessageTag.EXAMPLES,
+            MessageTag.REMINDER,
         ]
+        system_tokens = 0
 
-        tokens = coder.main_model.token_count(msgs)
-        res.append((tokens, "system messages", ""))
+        for tag in system_tags:
+            msgs = ConversationManager.get_messages_dict(tag=tag)
+            if msgs:
+                system_tokens += coder.main_model.token_count(msgs)
 
-        # chat history
-        msgs = ConversationManager.get_messages_dict()
-        if msgs:
-            tokens = coder.main_model.token_count(msgs)
-            res.append((tokens, "chat history", "use /clear to clear"))
-
-        # repo map
-        other_files = set(coder.get_all_abs_files()) - set(coder.abs_fnames)
-        if coder.repo_map:
-            repo_content = coder.repo_map.get_repo_map(coder.abs_fnames, other_files)
-            if repo_content:
-                tokens = coder.main_model.token_count(repo_content)
-                res.append((tokens, "repository map", "use --map-tokens to resize"))
+        # Calculate context block tokens (they are part of STATIC messages)
+        context_block_total = 0
 
         # Enhanced context blocks (only for agent mode)
         if hasattr(coder, "use_enhanced_context") and coder.use_enhanced_context:
@@ -56,86 +45,124 @@ async def execute(cls, io, coder, args, **kwargs):
                 if not hasattr(coder, "tokens_calculated") or not coder.tokens_calculated:
                     coder._calculate_context_block_tokens()
 
-            # Add enhanced context blocks to the display
+            # Calculate total context block tokens
             if hasattr(coder, "context_block_tokens") and coder.context_block_tokens:
-                for block_name, tokens in coder.context_block_tokens.items():
-                    # Format the block name more nicely
-                    display_name = block_name.replace("_", " ").title()
-                    res.append(
-                        (tokens, f"{display_name} context block", "/context-blocks to toggle")
-                    )
+                context_block_total = sum(coder.context_block_tokens.values())
 
-        fence = "`" * 3
+                # Subtract context block tokens from system token count
+                # Context blocks are part of STATIC messages, so we need to subtract them
+                system_tokens = max(0, system_tokens - context_block_total)
 
-        file_res = []
-        # Process files with progress indication
-        total_editable_files = len(coder.abs_fnames)
-        total_readonly_files = len(coder.abs_read_only_fnames)
+        res.append((system_tokens, "system messages", ""))
 
-        # Display progress for editable files
-        if total_editable_files > 0:
-            if total_editable_files > 20:
-                io.tool_output(f"Calculating tokens for {total_editable_files} editable files...")
+        # chat history
+        msgs_done = ConversationManager.get_messages_dict(tag=MessageTag.DONE)
+        msgs_cur = ConversationManager.get_messages_dict(tag=MessageTag.CUR)
+        tokens_done = 0
+        tokens_cur = 0
 
-            # Calculate tokens for editable files
-            for i, fname in enumerate(coder.abs_fnames):
-                if i > 0 and i % 20 == 0 and total_editable_files > 20:
-                    io.tool_output(f"Processed {i}/{total_editable_files} editable files...")
+        if msgs_done:
+            tokens_done = coder.main_model.token_count(msgs_done)
 
-                relative_fname = coder.get_rel_fname(fname)
-                content = io.read_text(fname)
+        if msgs_cur:
+            tokens_cur = coder.main_model.token_count(msgs_cur)
 
-                if not content:
-                    continue
+        if tokens_cur + tokens_done:
+            res.append((tokens_cur + tokens_done, "chat history", "use /clear to clear"))
 
-                if is_image_file(relative_fname):
-                    tokens = coder.main_model.token_count_for_image(fname)
-                else:
-                    # approximate
-                    content = f"{relative_fname}\n{fence}\n" + content + f"{fence}\n"
-                    tokens = coder.main_model.token_count(content)
-                file_res.append((tokens, f"{relative_fname}", "/drop to remove"))
+        # repo map
+        if coder.repo_map:
+            tokens = coder.main_model.token_count(
+                ConversationManager.get_messages_dict(tag=MessageTag.REPO)
+            )
+            res.append((tokens, "repository map", "use --map-tokens to resize"))
 
-        # Display progress for read-only files
-        if total_readonly_files > 0:
-            if total_readonly_files > 20:
-                io.tool_output(f"Calculating tokens for {total_readonly_files} read-only files...")
+        # Display enhanced context blocks (only for agent mode)
+        # Note: Context block tokens were already calculated and subtracted from system messages
+        if hasattr(coder, "use_enhanced_context") and coder.use_enhanced_context:
+            if hasattr(coder, "context_block_tokens") and coder.context_block_tokens:
+                for block_name, tokens in coder.context_block_tokens.items():
+                    # Format the block name more nicely
+                    display_name = block_name.replace("_", " ").title()
+                    res.append(
+                        (tokens, f"{display_name} context block", "/context-blocks to toggle")
+                    )
 
-            # Calculate tokens for read-only files
-            for i, fname in enumerate(coder.abs_read_only_fnames):
-                if i > 0 and i % 20 == 0 and total_readonly_files > 20:
-                    io.tool_output(f"Processed {i}/{total_readonly_files} read-only files...")
+        file_res = []
 
+        # Calculate tokens for read-only files using READONLY_FILES tag
+        readonly_msgs = ConversationManager.get_messages_dict(tag=MessageTag.READONLY_FILES)
+        if readonly_msgs:
+            # Group messages by file (each file has user and assistant messages)
+            file_tokens = {}
+            for msg in readonly_msgs:
+                # Extract file name from message content
+                content = msg.get("content", "")
+                if content.startswith("File Contents"):
+                    # Extract file path from "File Contents {path}:"
+                    lines = content.split("\n", 1)
+                    if lines:
+                        file_line = lines[0]
+                        if file_line.startswith("File Contents"):
+                            fname = file_line[13:].rstrip(":")
+                            # Calculate tokens for this message
+                            tokens = coder.main_model.token_count([msg])
+                            if fname not in file_tokens:
+                                file_tokens[fname] = 0
+                            file_tokens[fname] += tokens
+                elif "image_file" in msg:
+                    # Handle image files
+                    fname = msg.get("image_file")
+                    if fname:
+                        tokens = coder.main_model.token_count([msg])
+                        if fname not in file_tokens:
+                            file_tokens[fname] = 0
+                        file_tokens[fname] += tokens
+
+            # Add to results
+            for fname, tokens in file_tokens.items():
                 relative_fname = coder.get_rel_fname(fname)
-                content = io.read_text(fname)
-
-                if not content:
-                    continue
-
-                if not is_image_file(relative_fname):
-                    # approximate
-                    content = f"{relative_fname}\n{fence}\n" + content + f"{fence}\n"
-                    tokens = coder.main_model.token_count(content)
-                    file_res.append((tokens, f"{relative_fname} (read-only)", "/drop to remove"))
-
-        if total_files > 20:
-            io.tool_output("Token calculation complete. Generating report...")
-
-        file_res.sort()
-        res.extend(file_res)
-
-        # stub files
-        for fname in coder.abs_read_only_stubs_fnames:
+                file_res.append((tokens, f"{relative_fname} (read-only)", "/drop to remove"))
+
+        # Calculate tokens for editable files using CHAT_FILES and EDIT_FILES tags
+        editable_tags = [MessageTag.CHAT_FILES, MessageTag.EDIT_FILES]
+        editable_file_tokens = {}
+
+        for tag in editable_tags:
+            msgs = ConversationManager.get_messages_dict(tag=tag)
+            if msgs:
+                for msg in msgs:
+                    # Extract file name from message content
+                    content = msg.get("content", "")
+                    if content.startswith("File Contents"):
+                        # Extract file path from "File Contents {path}:"
+                        lines = content.split("\n", 1)
+                        if lines:
+                            file_line = lines[0]
+                            if file_line.startswith("File Contents"):
+                                fname = file_line[13:].rstrip(":")
+                                # Calculate tokens for this message
+                                tokens = coder.main_model.token_count([msg])
+                                if fname not in editable_file_tokens:
+                                    editable_file_tokens[fname] = 0
+                                editable_file_tokens[fname] += tokens
+                    elif "image_file" in msg:
+                        # Handle image files
+                        fname = msg.get("image_file")
+                        if fname:
+                            tokens = coder.main_model.token_count([msg])
+                            if fname not in editable_file_tokens:
+                                editable_file_tokens[fname] = 0
+                            editable_file_tokens[fname] += tokens
+
+        # Add editable files to results
+        for fname, tokens in editable_file_tokens.items():
             relative_fname = coder.get_rel_fname(fname)
-            if not is_image_file(relative_fname):
-                stub = coder.get_file_stub(fname)
-
-                if not stub:
-                    continue
+            file_res.append((tokens, f"{relative_fname}", "/drop to remove"))
 
-                content = f"{relative_fname} (stub)\n{fence}\n" + stub + "{fence}\n"
-                tokens = coder.main_model.token_count(content)
-                res.append((tokens, f"{relative_fname} (read-only stub)", "/drop to remove"))
+        if file_res:
+            file_res.sort()
+            res.extend(file_res)
 
         io.tool_output(f"Approximate context window usage for {coder.main_model.name}, in tokens:")
         io.tool_output()

diff --git a/cecli/helpers/conversation/base_message.py b/cecli/helpers/conversation/base_message.py
@@ -1,3 +1,4 @@
+import json
 import time
 import uuid
 from dataclasses import dataclass, field
@@ -54,6 +55,9 @@ def _transform_message(self, tool_calls):
                 tool_calls_list.append(tool_call)
         return tool_calls_list
 
+    def _serialize_default(self, content):
+        return "<not serializable>"
+
     def generate_id(self) -> str:
         """
         Creates deterministic hash from hash_key or (role, content).
@@ -81,7 +85,7 @@ def generate_id(self) -> str:
             if tool_calls:
                 # For tool calls, include them in the hash
                 transformed_tool_calls = self._transform_message(tool_calls)
-                tool_calls_str = str(transformed_tool_calls)
+                tool_calls_str = json.dumps(transformed_tool_calls, default=self._serialize_default)
                 key_data = f"{role}:{content}:{tool_calls_str}"
             else:
                 key_data = f"{role}:{content}"