diff --git a/app/desktop/dev_server.py b/app/desktop/dev_server.py index 88dc5047b..5763e8a81 100644 --- a/app/desktop/dev_server.py +++ b/app/desktop/dev_server.py @@ -15,7 +15,7 @@ dev_app = make_app() os.environ["DEBUG_EVENT_LOOP"] = "true" - +os.environ["KILN_SHOW_TIMING"] = "true" if __name__ == "__main__": setup_resource_limits() diff --git a/conftest.py b/conftest.py index 90dad2896..835433648 100644 --- a/conftest.py +++ b/conftest.py @@ -33,6 +33,14 @@ def load_env(): load_dotenv() +# Reset Config singleton between tests to prevent state leakage +@pytest.fixture(autouse=True) +def reset_config(): + Config._shared_instance = None + yield + Config._shared_instance = None + + # mock out the settings path so we don't clobber the user's actual settings during tests @pytest.fixture(autouse=True) def use_temp_settings_dir(tmp_path): diff --git a/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py index cce9b4a60..10949fdbb 100644 --- a/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py +++ b/libs/core/kiln_ai/adapters/model_adapters/base_adapter.py @@ -30,6 +30,7 @@ from kiln_ai.tools.tool_registry import tool_from_id from kiln_ai.utils.config import Config from kiln_ai.utils.open_ai_types import ChatCompletionMessageParam +from kiln_ai.utils.timing_logger import time_operation @dataclass @@ -99,6 +100,17 @@ async def invoke_returning_run_output( input: InputType, input_source: DataSource | None = None, ) -> Tuple[TaskRun, RunOutput]: + with time_operation("invoke_task", self.task.name): + return await self.invoke_returning_run_output_core(input, input_source) + + async def invoke_returning_run_output_core( + self, + input: InputType, + input_source: DataSource | None = None, + ) -> Tuple[TaskRun, RunOutput]: + """ + Core implementation of invoke_returning_run_output. Wrapped by time_operation to log the time it takes. + """ # validate input, allowing arrays if self.input_schema is not None: validate_schema_with_value_error( diff --git a/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py b/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py index 33828ac18..9ef617976 100644 --- a/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py +++ b/libs/core/kiln_ai/adapters/model_adapters/litellm_adapter.py @@ -46,6 +46,7 @@ ChatCompletionMessageParam, ChatCompletionToolMessageParamWrapper, ) +from kiln_ai.utils.timing_logger import time_operation MAX_CALLS_PER_TURN = 10 MAX_TOOL_CALLS_PER_TURN = 30 @@ -641,7 +642,11 @@ async def process_tool_calls( async def run_tool_and_format( t=tool, c=context, args=parsed_args, tc_id=tool_call.id ): - result = await t.run(c, **args) + tool_name = await t.name() + + with time_operation("tool_run", f"{tool_name}"): + result = await t.run(c, **args) + return ChatCompletionToolMessageParamWrapper( role="tool", tool_call_id=tc_id, diff --git a/libs/core/kiln_ai/utils/test_timing_logger.py b/libs/core/kiln_ai/utils/test_timing_logger.py new file mode 100644 index 000000000..1aeb3f4e3 --- /dev/null +++ b/libs/core/kiln_ai/utils/test_timing_logger.py @@ -0,0 +1,131 @@ +import logging +import time +from unittest.mock import patch + +import pytest + +import kiln_ai.utils.timing_logger as timing_logger +from kiln_ai.utils.timing_logger import TimingLogger, time_operation + + +@pytest.fixture(autouse=True) +def reset_show_timing(): + old_show_timing = timing_logger._show_timing + yield + timing_logger._show_timing = old_show_timing + + +class TestTimingLogger: + """Test cases for TimingLogger class.""" + + def test_timing_logger_basic_usage(self, caplog): + """Test basic timing logger functionality.""" + timing_logger._show_timing = True + with caplog.at_level(logging.WARNING): + with TimingLogger("test", "test_operation"): + time.sleep(0.1) + + assert len(caplog.records) == 1 + assert "timing_logger [test][test_operation]" in caplog.records[0].message + assert caplog.records[0].levelno == logging.WARNING + + def test_timing_logger_logs_at_warning_level(self, caplog): + """Test that timing logger uses WARNING level when enabled.""" + timing_logger._show_timing = True + with caplog.at_level(logging.WARNING): + with TimingLogger("test", "level_test"): + time.sleep(0.05) + + assert len(caplog.records) == 1 + assert caplog.records[0].levelname == "WARNING" + assert caplog.records[0].levelno == logging.WARNING + + def test_timing_logger_not_logged_when_disabled(self, caplog): + """Test that timing messages don't appear when timing is disabled.""" + timing_logger._show_timing = False + with caplog.at_level(logging.WARNING): + with TimingLogger("test", "filtered_test"): + time.sleep(0.05) + + assert len(caplog.records) == 0 + + @patch("kiln_ai.utils.timing_logger.time.time") + def test_timing_calculation(self, mock_time, caplog): + """Test that timing calculation is accurate.""" + timing_logger._show_timing = True + # Mock time.time() to return predictable values + # Need 3 values: __enter__, __exit__, and logging internal call + mock_time.side_effect = [1000.0, 1002.5, 1003.0] # 2.5 second difference + + with caplog.at_level(logging.WARNING): + with TimingLogger("test", "precise_test"): + pass + + assert len(caplog.records) == 1 + assert "timing_logger [test][precise_test][2.50s]" in caplog.records[0].message + + def test_timestamp_format(self): + """Test that timestamp format is ISO format.""" + timing_logger = TimingLogger("test", "timestamp_test") + timestamp = timing_logger.timestamp() + + # Should be in ISO format (contains T and colons) + assert "T" in timestamp + assert ":" in timestamp + + +class TestTimeOperationContextManager: + """Test cases for time_operation context manager function.""" + + def test_time_operation_basic(self, caplog): + """Test basic time_operation context manager.""" + timing_logger._show_timing = True + with caplog.at_level(logging.WARNING): + with time_operation("test", "context_test"): + time.sleep(0.1) + + assert len(caplog.records) == 1 + assert "timing_logger [test][context_test]" in caplog.records[0].message + + def test_time_operation_exception_handling(self, caplog): + """Test that timing is still logged even if an exception occurs.""" + timing_logger._show_timing = True + with caplog.at_level(logging.WARNING): + with pytest.raises(ValueError): + with time_operation("test", "exception_test"): + raise ValueError("Test exception") + + assert len(caplog.records) == 1 + assert "timing_logger [test][exception_test]" in caplog.records[0].message + + +class TestShowTimingControl: + """Test cases for _show_timing flag control of timing logging.""" + + def test_timing_disabled_by_default(self, caplog): + """Test that timing is disabled when _show_timing is False.""" + timing_logger._show_timing = False + with caplog.at_level(logging.WARNING): + with TimingLogger("test", "default_test"): + time.sleep(0.05) + + assert len(caplog.records) == 0 + + def test_timing_disabled_when_false(self, caplog): + """Test that timing is disabled when _show_timing is False.""" + timing_logger._show_timing = False + with caplog.at_level(logging.WARNING): + with TimingLogger("test", "false_test"): + time.sleep(0.05) + + assert len(caplog.records) == 0 + + def test_timing_enabled_when_true(self, caplog): + """Test that timing is enabled when _show_timing is True.""" + timing_logger._show_timing = True + with caplog.at_level(logging.WARNING): + with TimingLogger("test", "true_test"): + time.sleep(0.05) + + assert len(caplog.records) == 1 + assert "timing_logger [test][true_test]" in caplog.records[0].message diff --git a/libs/core/kiln_ai/utils/timing_logger.py b/libs/core/kiln_ai/utils/timing_logger.py new file mode 100644 index 000000000..117993e0d --- /dev/null +++ b/libs/core/kiln_ai/utils/timing_logger.py @@ -0,0 +1,70 @@ +import logging +import os +import time +from contextlib import contextmanager +from datetime import datetime +from typing import Generator, Optional + +logger = logging.getLogger(__name__) + +_show_timing = os.getenv("KILN_SHOW_TIMING", "false") == "true" + + +class TimingLogger: + """A utility for timing code execution with context manager support.""" + + def __init__(self, operation: str, name: str): + """ + Initialize the timing logger. + + Args: + name: Name/description of the operation being timed + print_start: Whether to print when the operation starts + """ + self.operation = operation + self.name = name + self.start_time: Optional[float] = None + + def timestamp(self) -> str: + """Generate a timestamp string in ISO format.""" + return datetime.now().isoformat() + + def __enter__(self): + """Enter the context manager and start timing.""" + self.start_time = time.time() + + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Exit the context manager and print timing results.""" + if self.start_time is not None: + if not _show_timing: + return + + duration = time.time() - self.start_time + printable_operation = self.operation.replace("[", "_").replace("]", "_") + printable_name = self.name.replace("[", "_").replace("]", "_") + logger.warning( + f"{self.timestamp()} timing_logger [{printable_operation}][{printable_name}][{duration:.2f}s]" + ) + + +@contextmanager +def time_operation( + operation: str, name: str = "unknown" +) -> Generator[None, None, None]: + """ + Context manager for timing operations. + + Args: + type: Type/category of the operation being timed + name: Name/description of the operation being timed + + Example: + with time_operation("api", "my_task"): + # Your code here + time.sleep(1) + """ + timing_logger = TimingLogger(operation, name) + with timing_logger: + yield