From dea729636c8737a5651755a54cd9bad55af6a414 Mon Sep 17 00:00:00 2001
From: scosman <scosman@users.noreply.github.com>
Date: Wed, 3 Dec 2025 13:34:00 -0500
Subject: [PATCH 1/3] New timing logger, for use with the timing viz tool

---
 app/desktop/dev_server.py                     |   2 +-
 .../adapters/model_adapters/base_adapter.py   |  12 ++
 .../model_adapters/litellm_adapter.py         |   7 +-
 libs/core/kiln_ai/utils/test_timing_logger.py | 124 ++++++++++++++++++
 libs/core/kiln_ai/utils/timing_logger.py      |  69 ++++++++++
 5 files changed, 212 insertions(+), 2 deletions(-)
 create mode 100644 libs/core/kiln_ai/utils/test_timing_logger.py
 create mode 100644 libs/core/kiln_ai/utils/timing_logger.py

diff --git a/app/desktop/dev_server.py b/app/desktop/dev_server.py
index 88dc5047b..5763e8a81 100644
--- a/app/desktop/dev_server.py
+++ b/app/desktop/dev_server.py
@@ -15,7 +15,7 @@
 dev_app = make_app()
 
 os.environ["DEBUG_EVENT_LOOP"] = "true"
-
+os.environ["KILN_SHOW_TIMING"] = "true"
 
 if __name__ == "__main__":
     setup_resource_limits()
diff --git a/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py
index cce9b4a60..10949fdbb 100644
--- a/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py
+++ b/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py
@@ -30,6 +30,7 @@
 from kiln_ai.tools.tool_registry import tool_from_id
 from kiln_ai.utils.config import Config
 from kiln_ai.utils.open_ai_types import ChatCompletionMessageParam
+from kiln_ai.utils.timing_logger import time_operation
 
 
 @dataclass
@@ -99,6 +100,17 @@ async def invoke_returning_run_output(
         input: InputType,
         input_source: DataSource | None = None,
     ) -> Tuple[TaskRun, RunOutput]:
+        with time_operation("invoke_task", self.task.name):
+            return await self.invoke_returning_run_output_core(input, input_source)
+
+    async def invoke_returning_run_output_core(
+        self,
+        input: InputType,
+        input_source: DataSource | None = None,
+    ) -> Tuple[TaskRun, RunOutput]:
+        """
+        Core implementation of invoke_returning_run_output. Wrapped by time_operation to log the time it takes.
+        """
         # validate input, allowing arrays
         if self.input_schema is not None:
             validate_schema_with_value_error(
diff --git a/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py
index 33828ac18..9ef617976 100644
--- a/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py
+++ b/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py
@@ -46,6 +46,7 @@
     ChatCompletionMessageParam,
     ChatCompletionToolMessageParamWrapper,
 )
+from kiln_ai.utils.timing_logger import time_operation
 
 MAX_CALLS_PER_TURN = 10
 MAX_TOOL_CALLS_PER_TURN = 30
@@ -641,7 +642,11 @@ async def process_tool_calls(
             async def run_tool_and_format(
                 t=tool, c=context, args=parsed_args, tc_id=tool_call.id
             ):
-                result = await t.run(c, **args)
+                tool_name = await t.name()
+
+                with time_operation("tool_run", f"{tool_name}"):
+                    result = await t.run(c, **args)
+
                 return ChatCompletionToolMessageParamWrapper(
                     role="tool",
                     tool_call_id=tc_id,
diff --git a/libs/core/kiln_ai/utils/test_timing_logger.py b/libs/core/kiln_ai/utils/test_timing_logger.py
new file mode 100644
index 000000000..2d77478d7
--- /dev/null
+++ b/libs/core/kiln_ai/utils/test_timing_logger.py
@@ -0,0 +1,124 @@
+import logging
+import os
+import time
+from unittest.mock import patch
+
+import pytest
+
+from kiln_ai.utils.timing_logger import TimingLogger, time_operation
+
+
+class TestTimingLogger:
+    """Test cases for TimingLogger class."""
+
+    def test_timing_logger_basic_usage(self, caplog):
+        """Test basic timing logger functionality."""
+        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
+            with caplog.at_level(logging.WARNING):
+                with TimingLogger("test", "test_operation"):
+                    time.sleep(0.1)
+
+        assert len(caplog.records) == 1
+        assert "timing_logger [test][test_operation]" in caplog.records[0].message
+        assert caplog.records[0].levelno == logging.WARNING
+
+    def test_timing_logger_logs_at_warning_level(self, caplog):
+        """Test that timing logger uses WARNING level when enabled."""
+        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
+            with caplog.at_level(logging.WARNING):
+                with TimingLogger("test", "level_test"):
+                    time.sleep(0.05)
+
+        assert len(caplog.records) == 1
+        assert caplog.records[0].levelname == "WARNING"
+        assert caplog.records[0].levelno == logging.WARNING
+
+    def test_timing_logger_not_logged_when_disabled(self, caplog):
+        """Test that timing messages don't appear when KILN_SHOW_TIMING is false."""
+        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "false"}):
+            with caplog.at_level(logging.WARNING):
+                with TimingLogger("test", "filtered_test"):
+                    time.sleep(0.05)
+
+        assert len(caplog.records) == 0
+
+    @patch("kiln_ai.utils.timing_logger.time.time")
+    def test_timing_calculation(self, mock_time, caplog):
+        """Test that timing calculation is accurate."""
+        # Mock time.time() to return predictable values
+        mock_time.side_effect = [1000.0, 1002.5]  # 2.5 second difference
+
+        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
+            with caplog.at_level(logging.WARNING):
+                with TimingLogger("test", "precise_test"):
+                    pass
+
+        assert len(caplog.records) == 1
+        assert "timing_logger [test][precise_test][2.50s]" in caplog.records[0].message
+
+    def test_timestamp_format(self):
+        """Test that timestamp format is ISO format."""
+        timing_logger = TimingLogger("test", "timestamp_test")
+        timestamp = timing_logger.timestamp()
+
+        # Should be in ISO format (contains T and colons)
+        assert "T" in timestamp
+        assert ":" in timestamp
+
+
+class TestTimeOperationContextManager:
+    """Test cases for time_operation context manager function."""
+
+    def test_time_operation_basic(self, caplog):
+        """Test basic time_operation context manager."""
+        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
+            with caplog.at_level(logging.WARNING):
+                with time_operation("test", "context_test"):
+                    time.sleep(0.1)
+
+        assert len(caplog.records) == 1
+        assert "timing_logger [test][context_test]" in caplog.records[0].message
+
+    def test_time_operation_exception_handling(self, caplog):
+        """Test that timing is still logged even if an exception occurs."""
+        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
+            with caplog.at_level(logging.WARNING):
+                with pytest.raises(ValueError):
+                    with time_operation("test", "exception_test"):
+                        raise ValueError("Test exception")
+
+        assert len(caplog.records) == 1
+        assert "timing_logger [test][exception_test]" in caplog.records[0].message
+
+
+class TestEnvironmentVariableControl:
+    """Test cases for environment variable control of timing logging."""
+
+    def test_timing_disabled_by_default(self, caplog):
+        """Test that timing is disabled when KILN_SHOW_TIMING is not set."""
+        # Ensure the environment variable is not set
+        with patch.dict(os.environ, {}, clear=True):
+            with caplog.at_level(logging.WARNING):
+                with TimingLogger("test", "default_test"):
+                    time.sleep(0.05)
+
+        assert len(caplog.records) == 0
+
+    def test_timing_disabled_when_false(self, caplog):
+        """Test that timing is disabled when KILN_SHOW_TIMING is 'false'."""
+        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "false"}):
+            with caplog.at_level(logging.WARNING):
+                with TimingLogger("test", "false_test"):
+                    time.sleep(0.05)
+
+        assert len(caplog.records) == 0
+
+    def test_timing_enabled_when_true(self, caplog):
+        """Test that timing is enabled when KILN_SHOW_TIMING is 'true'."""
+        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
+            with caplog.at_level(logging.WARNING):
+                with TimingLogger("test", "true_test"):
+                    time.sleep(0.05)
+
+        assert len(caplog.records) == 1
+        assert "timing_logger [test][true_test]" in caplog.records[0].message
diff --git a/libs/core/kiln_ai/utils/timing_logger.py b/libs/core/kiln_ai/utils/timing_logger.py
new file mode 100644
index 000000000..e7f4808aa
--- /dev/null
+++ b/libs/core/kiln_ai/utils/timing_logger.py
@@ -0,0 +1,69 @@
+import logging
+import os
+import time
+from contextlib import contextmanager
+from datetime import datetime
+from typing import Generator, Optional
+
+logger = logging.getLogger(__name__)
+
+
+class TimingLogger:
+    """A utility for timing code execution with context manager support."""
+
+    def __init__(self, operation: str, name: str):
+        """
+        Initialize the timing logger.
+
+        Args:
+            name: Name/description of the operation being timed
+            print_start: Whether to print when the operation starts
+        """
+        self.operation = operation
+        self.name = name
+        self.start_time: Optional[float] = None
+
+    def timestamp(self) -> str:
+        """Generate a timestamp string in ISO format."""
+        return datetime.now().isoformat()
+
+    def __enter__(self):
+        """Enter the context manager and start timing."""
+        self.start_time = time.time()
+
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Exit the context manager and print timing results."""
+        if self.start_time is not None:
+            show_time = os.getenv("KILN_SHOW_TIMING", "false")
+            if show_time != "true":
+                return
+
+            duration = time.time() - self.start_time
+            printable_operation = self.operation.replace("[", "_").replace("]", "_")
+            printable_name = self.name.replace("[", "_").replace("]", "_")
+            logger.warning(
+                f"{self.timestamp()} timing_logger [{printable_operation}][{printable_name}][{duration:.2f}s]"
+            )
+
+
+@contextmanager
+def time_operation(
+    operation: str, name: str = "unknown"
+) -> Generator[None, None, None]:
+    """
+    Context manager for timing operations.
+
+    Args:
+        type: Type/category of the operation being timed
+        name: Name/description of the operation being timed
+
+    Example:
+        with time_operation("api", "my_task"):
+            # Your code here
+            time.sleep(1)
+    """
+    timing_logger = TimingLogger(operation, name)
+    with timing_logger:
+        yield

From eeff917040d031a90b4eafb7a72283793ee9b4d0 Mon Sep 17 00:00:00 2001
From: scosman <scosman@users.noreply.github.com>
Date: Fri, 5 Dec 2025 13:26:19 -0500
Subject: [PATCH 2/3] test fix

---
 conftest.py                                   | 8 ++++++++
 libs/core/kiln_ai/utils/test_timing_logger.py | 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/conftest.py b/conftest.py
index 90dad2896..835433648 100644
--- a/conftest.py
+++ b/conftest.py
@@ -33,6 +33,14 @@ def load_env():
     load_dotenv()
 
 
+# Reset Config singleton between tests to prevent state leakage
+@pytest.fixture(autouse=True)
+def reset_config():
+    Config._shared_instance = None
+    yield
+    Config._shared_instance = None
+
+
 # mock out the settings path so we don't clobber the user's actual settings during tests
 @pytest.fixture(autouse=True)
 def use_temp_settings_dir(tmp_path):
diff --git a/libs/core/kiln_ai/utils/test_timing_logger.py b/libs/core/kiln_ai/utils/test_timing_logger.py
index 2d77478d7..59ba0fd82 100644
--- a/libs/core/kiln_ai/utils/test_timing_logger.py
+++ b/libs/core/kiln_ai/utils/test_timing_logger.py
@@ -46,7 +46,8 @@ def test_timing_logger_not_logged_when_disabled(self, caplog):
     def test_timing_calculation(self, mock_time, caplog):
         """Test that timing calculation is accurate."""
         # Mock time.time() to return predictable values
-        mock_time.side_effect = [1000.0, 1002.5]  # 2.5 second difference
+        # Need 3 values: __enter__, __exit__, and logging internal call
+        mock_time.side_effect = [1000.0, 1002.5, 1003.0]  # 2.5 second difference
 
         with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
             with caplog.at_level(logging.WARNING):

From dd78b9ed52b29b33bb3155dec1514a62f45008ea Mon Sep 17 00:00:00 2001
From: scosman <scosman@users.noreply.github.com>
Date: Fri, 5 Dec 2025 13:40:48 -0500
Subject: [PATCH 3/3] Perf: only read env var once

---
 libs/core/kiln_ai/utils/test_timing_logger.py | 96 ++++++++++---------
 libs/core/kiln_ai/utils/timing_logger.py      |  5 +-
 2 files changed, 54 insertions(+), 47 deletions(-)

diff --git a/libs/core/kiln_ai/utils/test_timing_logger.py b/libs/core/kiln_ai/utils/test_timing_logger.py
index 59ba0fd82..1aeb3f4e3 100644
--- a/libs/core/kiln_ai/utils/test_timing_logger.py
+++ b/libs/core/kiln_ai/utils/test_timing_logger.py
@@ -1,22 +1,29 @@
 import logging
-import os
 import time
 from unittest.mock import patch
 
 import pytest
 
+import kiln_ai.utils.timing_logger as timing_logger
 from kiln_ai.utils.timing_logger import TimingLogger, time_operation
 
 
+@pytest.fixture(autouse=True)
+def reset_show_timing():
+    old_show_timing = timing_logger._show_timing
+    yield
+    timing_logger._show_timing = old_show_timing
+
+
 class TestTimingLogger:
     """Test cases for TimingLogger class."""
 
     def test_timing_logger_basic_usage(self, caplog):
         """Test basic timing logger functionality."""
-        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
-            with caplog.at_level(logging.WARNING):
-                with TimingLogger("test", "test_operation"):
-                    time.sleep(0.1)
+        timing_logger._show_timing = True
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "test_operation"):
+                time.sleep(0.1)
 
         assert len(caplog.records) == 1
         assert "timing_logger [test][test_operation]" in caplog.records[0].message
@@ -24,35 +31,35 @@ def test_timing_logger_basic_usage(self, caplog):
 
     def test_timing_logger_logs_at_warning_level(self, caplog):
         """Test that timing logger uses WARNING level when enabled."""
-        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
-            with caplog.at_level(logging.WARNING):
-                with TimingLogger("test", "level_test"):
-                    time.sleep(0.05)
+        timing_logger._show_timing = True
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "level_test"):
+                time.sleep(0.05)
 
         assert len(caplog.records) == 1
         assert caplog.records[0].levelname == "WARNING"
         assert caplog.records[0].levelno == logging.WARNING
 
     def test_timing_logger_not_logged_when_disabled(self, caplog):
-        """Test that timing messages don't appear when KILN_SHOW_TIMING is false."""
-        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "false"}):
-            with caplog.at_level(logging.WARNING):
-                with TimingLogger("test", "filtered_test"):
-                    time.sleep(0.05)
+        """Test that timing messages don't appear when timing is disabled."""
+        timing_logger._show_timing = False
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "filtered_test"):
+                time.sleep(0.05)
 
         assert len(caplog.records) == 0
 
     @patch("kiln_ai.utils.timing_logger.time.time")
     def test_timing_calculation(self, mock_time, caplog):
         """Test that timing calculation is accurate."""
+        timing_logger._show_timing = True
         # Mock time.time() to return predictable values
         # Need 3 values: __enter__, __exit__, and logging internal call
         mock_time.side_effect = [1000.0, 1002.5, 1003.0]  # 2.5 second difference
 
-        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
-            with caplog.at_level(logging.WARNING):
-                with TimingLogger("test", "precise_test"):
-                    pass
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "precise_test"):
+                pass
 
         assert len(caplog.records) == 1
         assert "timing_logger [test][precise_test][2.50s]" in caplog.records[0].message
@@ -72,54 +79,53 @@ class TestTimeOperationContextManager:
 
     def test_time_operation_basic(self, caplog):
         """Test basic time_operation context manager."""
-        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
-            with caplog.at_level(logging.WARNING):
-                with time_operation("test", "context_test"):
-                    time.sleep(0.1)
+        timing_logger._show_timing = True
+        with caplog.at_level(logging.WARNING):
+            with time_operation("test", "context_test"):
+                time.sleep(0.1)
 
         assert len(caplog.records) == 1
         assert "timing_logger [test][context_test]" in caplog.records[0].message
 
     def test_time_operation_exception_handling(self, caplog):
         """Test that timing is still logged even if an exception occurs."""
-        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
-            with caplog.at_level(logging.WARNING):
-                with pytest.raises(ValueError):
-                    with time_operation("test", "exception_test"):
-                        raise ValueError("Test exception")
+        timing_logger._show_timing = True
+        with caplog.at_level(logging.WARNING):
+            with pytest.raises(ValueError):
+                with time_operation("test", "exception_test"):
+                    raise ValueError("Test exception")
 
         assert len(caplog.records) == 1
         assert "timing_logger [test][exception_test]" in caplog.records[0].message
 
 
-class TestEnvironmentVariableControl:
-    """Test cases for environment variable control of timing logging."""
+class TestShowTimingControl:
+    """Test cases for _show_timing flag control of timing logging."""
 
     def test_timing_disabled_by_default(self, caplog):
-        """Test that timing is disabled when KILN_SHOW_TIMING is not set."""
-        # Ensure the environment variable is not set
-        with patch.dict(os.environ, {}, clear=True):
-            with caplog.at_level(logging.WARNING):
-                with TimingLogger("test", "default_test"):
-                    time.sleep(0.05)
+        """Test that timing is disabled when _show_timing is False."""
+        timing_logger._show_timing = False
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "default_test"):
+                time.sleep(0.05)
 
         assert len(caplog.records) == 0
 
     def test_timing_disabled_when_false(self, caplog):
-        """Test that timing is disabled when KILN_SHOW_TIMING is 'false'."""
-        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "false"}):
-            with caplog.at_level(logging.WARNING):
-                with TimingLogger("test", "false_test"):
-                    time.sleep(0.05)
+        """Test that timing is disabled when _show_timing is False."""
+        timing_logger._show_timing = False
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "false_test"):
+                time.sleep(0.05)
 
         assert len(caplog.records) == 0
 
     def test_timing_enabled_when_true(self, caplog):
-        """Test that timing is enabled when KILN_SHOW_TIMING is 'true'."""
-        with patch.dict(os.environ, {"KILN_SHOW_TIMING": "true"}):
-            with caplog.at_level(logging.WARNING):
-                with TimingLogger("test", "true_test"):
-                    time.sleep(0.05)
+        """Test that timing is enabled when _show_timing is True."""
+        timing_logger._show_timing = True
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "true_test"):
+                time.sleep(0.05)
 
         assert len(caplog.records) == 1
         assert "timing_logger [test][true_test]" in caplog.records[0].message
diff --git a/libs/core/kiln_ai/utils/timing_logger.py b/libs/core/kiln_ai/utils/timing_logger.py
index e7f4808aa..117993e0d 100644
--- a/libs/core/kiln_ai/utils/timing_logger.py
+++ b/libs/core/kiln_ai/utils/timing_logger.py
@@ -7,6 +7,8 @@
 
 logger = logging.getLogger(__name__)
 
+_show_timing = os.getenv("KILN_SHOW_TIMING", "false") == "true"
+
 
 class TimingLogger:
     """A utility for timing code execution with context manager support."""
@@ -36,8 +38,7 @@ def __enter__(self):
     def __exit__(self, exc_type, exc_val, exc_tb):
         """Exit the context manager and print timing results."""
         if self.start_time is not None:
-            show_time = os.getenv("KILN_SHOW_TIMING", "false")
-            if show_time != "true":
+            if not _show_timing:
                 return
 
             duration = time.time() - self.start_time