Delay client import to reduce early import memory usage

AlexsanderHamir · AlexsanderHamir · commit b03746b8b9d7 · 2025-11-22T12:14:02.000-08:00
- Move client import from line 1053 to right before main.py import (line 1328) - This delays loading utils.py (which imports tiktoken) until after most other imports - client cannot be fully lazy-loaded because main.py needs it at import time for @client decorator - Reduces memory footprint during early import phase
diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -1047,10 +1047,6 @@ def add_known_models():
 from .timeout import timeout
 from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
 # Note: remove_index_from_tool_calls is lazy-loaded via __getattr__ to reduce import-time memory cost
-# Note: get_modified_max_tokens is not exported from __init__.py and is only used
-# internally in utils.py, so we don't need to import it here
-# client must be imported immediately as it's used as a decorator at function definition time
-from .utils import client
 # Note: Most other utils imports are lazy-loaded via __getattr__ to avoid loading utils.py 
 # (which imports tiktoken) at import time
 
@@ -1324,6 +1320,7 @@ def add_known_models():
 from .llms.cometapi.embed.transformation import CometAPIEmbeddingConfig
 from .llms.lemonade.chat.transformation import LemonadeChatConfig
 from .llms.snowflake.embedding.transformation import SnowflakeEmbeddingConfig
+from .utils import client
 from .main import *  # type: ignore
 from .integrations import *
 from .llms.custom_httpx.async_client_cleanup import close_litellm_async_clients