diff --git a/app/desktop/dev_server.py b/app/desktop/dev_server.py
index 88dc5047b..5763e8a81 100644
--- a/app/desktop/dev_server.py
+++ b/app/desktop/dev_server.py
@@ -15,7 +15,7 @@
 dev_app = make_app()
 
 os.environ["DEBUG_EVENT_LOOP"] = "true"
-
+os.environ["KILN_SHOW_TIMING"] = "true"
 
 if __name__ == "__main__":
     setup_resource_limits()
diff --git a/conftest.py b/conftest.py
index 90dad2896..835433648 100644
--- a/conftest.py
+++ b/conftest.py
@@ -33,6 +33,14 @@ def load_env():
     load_dotenv()
 
 
+# Reset Config singleton between tests to prevent state leakage
+@pytest.fixture(autouse=True)
+def reset_config():
+    Config._shared_instance = None
+    yield
+    Config._shared_instance = None
+
+
 # mock out the settings path so we don't clobber the user's actual settings during tests
 @pytest.fixture(autouse=True)
 def use_temp_settings_dir(tmp_path):
diff --git a/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py
index cce9b4a60..10949fdbb 100644
--- a/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py
+++ b/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py
@@ -30,6 +30,7 @@
 from kiln_ai.tools.tool_registry import tool_from_id
 from kiln_ai.utils.config import Config
 from kiln_ai.utils.open_ai_types import ChatCompletionMessageParam
+from kiln_ai.utils.timing_logger import time_operation
 
 
 @dataclass
@@ -99,6 +100,17 @@ async def invoke_returning_run_output(
         input: InputType,
         input_source: DataSource | None = None,
     ) -> Tuple[TaskRun, RunOutput]:
+        with time_operation("invoke_task", self.task.name):
+            return await self.invoke_returning_run_output_core(input, input_source)
+
+    async def invoke_returning_run_output_core(
+        self,
+        input: InputType,
+        input_source: DataSource | None = None,
+    ) -> Tuple[TaskRun, RunOutput]:
+        """
+        Core implementation of invoke_returning_run_output. Wrapped by time_operation to log the time it takes.
+        """
         # validate input, allowing arrays
         if self.input_schema is not None:
             validate_schema_with_value_error(
diff --git a/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py
index 33828ac18..9ef617976 100644
--- a/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py
+++ b/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py
@@ -46,6 +46,7 @@
     ChatCompletionMessageParam,
     ChatCompletionToolMessageParamWrapper,
 )
+from kiln_ai.utils.timing_logger import time_operation
 
 MAX_CALLS_PER_TURN = 10
 MAX_TOOL_CALLS_PER_TURN = 30
@@ -641,7 +642,11 @@ async def process_tool_calls(
             async def run_tool_and_format(
                 t=tool, c=context, args=parsed_args, tc_id=tool_call.id
             ):
-                result = await t.run(c, **args)
+                tool_name = await t.name()
+
+                with time_operation("tool_run", f"{tool_name}"):
+                    result = await t.run(c, **args)
+
                 return ChatCompletionToolMessageParamWrapper(
                     role="tool",
                     tool_call_id=tc_id,
diff --git a/libs/core/kiln_ai/utils/test_timing_logger.py b/libs/core/kiln_ai/utils/test_timing_logger.py
new file mode 100644
index 000000000..1aeb3f4e3
--- /dev/null
+++ b/libs/core/kiln_ai/utils/test_timing_logger.py
@@ -0,0 +1,131 @@
+import logging
+import time
+from unittest.mock import patch
+
+import pytest
+
+import kiln_ai.utils.timing_logger as timing_logger
+from kiln_ai.utils.timing_logger import TimingLogger, time_operation
+
+
+@pytest.fixture(autouse=True)
+def reset_show_timing():
+    old_show_timing = timing_logger._show_timing
+    yield
+    timing_logger._show_timing = old_show_timing
+
+
+class TestTimingLogger:
+    """Test cases for TimingLogger class."""
+
+    def test_timing_logger_basic_usage(self, caplog):
+        """Test basic timing logger functionality."""
+        timing_logger._show_timing = True
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "test_operation"):
+                time.sleep(0.1)
+
+        assert len(caplog.records) == 1
+        assert "timing_logger [test][test_operation]" in caplog.records[0].message
+        assert caplog.records[0].levelno == logging.WARNING
+
+    def test_timing_logger_logs_at_warning_level(self, caplog):
+        """Test that timing logger uses WARNING level when enabled."""
+        timing_logger._show_timing = True
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "level_test"):
+                time.sleep(0.05)
+
+        assert len(caplog.records) == 1
+        assert caplog.records[0].levelname == "WARNING"
+        assert caplog.records[0].levelno == logging.WARNING
+
+    def test_timing_logger_not_logged_when_disabled(self, caplog):
+        """Test that timing messages don't appear when timing is disabled."""
+        timing_logger._show_timing = False
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "filtered_test"):
+                time.sleep(0.05)
+
+        assert len(caplog.records) == 0
+
+    @patch("kiln_ai.utils.timing_logger.time.time")
+    def test_timing_calculation(self, mock_time, caplog):
+        """Test that timing calculation is accurate."""
+        timing_logger._show_timing = True
+        # Mock time.time() to return predictable values
+        # Need 3 values: __enter__, __exit__, and logging internal call
+        mock_time.side_effect = [1000.0, 1002.5, 1003.0]  # 2.5 second difference
+
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "precise_test"):
+                pass
+
+        assert len(caplog.records) == 1
+        assert "timing_logger [test][precise_test][2.50s]" in caplog.records[0].message
+
+    def test_timestamp_format(self):
+        """Test that timestamp format is ISO format."""
+        timing_logger = TimingLogger("test", "timestamp_test")
+        timestamp = timing_logger.timestamp()
+
+        # Should be in ISO format (contains T and colons)
+        assert "T" in timestamp
+        assert ":" in timestamp
+
+
+class TestTimeOperationContextManager:
+    """Test cases for time_operation context manager function."""
+
+    def test_time_operation_basic(self, caplog):
+        """Test basic time_operation context manager."""
+        timing_logger._show_timing = True
+        with caplog.at_level(logging.WARNING):
+            with time_operation("test", "context_test"):
+                time.sleep(0.1)
+
+        assert len(caplog.records) == 1
+        assert "timing_logger [test][context_test]" in caplog.records[0].message
+
+    def test_time_operation_exception_handling(self, caplog):
+        """Test that timing is still logged even if an exception occurs."""
+        timing_logger._show_timing = True
+        with caplog.at_level(logging.WARNING):
+            with pytest.raises(ValueError):
+                with time_operation("test", "exception_test"):
+                    raise ValueError("Test exception")
+
+        assert len(caplog.records) == 1
+        assert "timing_logger [test][exception_test]" in caplog.records[0].message
+
+
+class TestShowTimingControl:
+    """Test cases for _show_timing flag control of timing logging."""
+
+    def test_timing_disabled_by_default(self, caplog):
+        """Test that timing is disabled when _show_timing is False."""
+        timing_logger._show_timing = False
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "default_test"):
+                time.sleep(0.05)
+
+        assert len(caplog.records) == 0
+
+    def test_timing_disabled_when_false(self, caplog):
+        """Test that timing is disabled when _show_timing is False."""
+        timing_logger._show_timing = False
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "false_test"):
+                time.sleep(0.05)
+
+        assert len(caplog.records) == 0
+
+    def test_timing_enabled_when_true(self, caplog):
+        """Test that timing is enabled when _show_timing is True."""
+        timing_logger._show_timing = True
+        with caplog.at_level(logging.WARNING):
+            with TimingLogger("test", "true_test"):
+                time.sleep(0.05)
+
+        assert len(caplog.records) == 1
+        assert "timing_logger [test][true_test]" in caplog.records[0].message
diff --git a/libs/core/kiln_ai/utils/timing_logger.py b/libs/core/kiln_ai/utils/timing_logger.py
new file mode 100644
index 000000000..117993e0d
--- /dev/null
+++ b/libs/core/kiln_ai/utils/timing_logger.py
@@ -0,0 +1,70 @@
+import logging
+import os
+import time
+from contextlib import contextmanager
+from datetime import datetime
+from typing import Generator, Optional
+
+logger = logging.getLogger(__name__)
+
+_show_timing = os.getenv("KILN_SHOW_TIMING", "false") == "true"
+
+
+class TimingLogger:
+    """A utility for timing code execution with context manager support."""
+
+    def __init__(self, operation: str, name: str):
+        """
+        Initialize the timing logger.
+
+        Args:
+            name: Name/description of the operation being timed
+            print_start: Whether to print when the operation starts
+        """
+        self.operation = operation
+        self.name = name
+        self.start_time: Optional[float] = None
+
+    def timestamp(self) -> str:
+        """Generate a timestamp string in ISO format."""
+        return datetime.now().isoformat()
+
+    def __enter__(self):
+        """Enter the context manager and start timing."""
+        self.start_time = time.time()
+
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Exit the context manager and print timing results."""
+        if self.start_time is not None:
+            if not _show_timing:
+                return
+
+            duration = time.time() - self.start_time
+            printable_operation = self.operation.replace("[", "_").replace("]", "_")
+            printable_name = self.name.replace("[", "_").replace("]", "_")
+            logger.warning(
+                f"{self.timestamp()} timing_logger [{printable_operation}][{printable_name}][{duration:.2f}s]"
+            )
+
+
+@contextmanager
+def time_operation(
+    operation: str, name: str = "unknown"
+) -> Generator[None, None, None]:
+    """
+    Context manager for timing operations.
+
+    Args:
+        type: Type/category of the operation being timed
+        name: Name/description of the operation being timed
+
+    Example:
+        with time_operation("api", "my_task"):
+            # Your code here
+            time.sleep(1)
+    """
+    timing_logger = TimingLogger(operation, name)
+    with timing_logger:
+        yield