The main agent class for intelligent AI model cascading with automatic cost optimization.
from cascadeflow import CascadeAgent, ModelConfigCascadeAgent(
models: List[ModelConfig],
quality_config: Optional[QualityConfig] = None,
cascade: Optional[CascadeConfig] = None
)Parameters:
models(List[ModelConfig], required): List of models to cascade through, automatically sorted by costquality_config(QualityConfig, optional): Quality validation configurationcascade(CascadeConfig, optional): Advanced cascade settings
Example:
from cascadeflow import CascadeAgent, ModelConfig
agent = CascadeAgent(models=[
ModelConfig(name="gpt-4o-mini", provider="openai", cost=0.00015),
ModelConfig(name="gpt-4o", provider="openai", cost=0.00625)
])Execute a query with automatic cascading.
async def run(
prompt: str | List[Message],
max_tokens: Optional[int] = None,
temperature: Optional[float] = None,
system_prompt: Optional[str] = None,
tools: Optional[List[Dict]] = None,
force_direct: bool = False
) -> CascadeResultParameters:
prompt(strorList[Message]): Query text or message listmax_tokens(int, optional): Maximum tokens to generatetemperature(float, optional): Temperature (0-2), default: 0.7system_prompt(str, optional): System prompt overridetools(List[Dict], optional): Tools/functions available for callingforce_direct(bool, optional): Skip cascade, use best model directly
Returns: CascadeResult - Result object with content, costs, and metrics
Example:
# Basic usage
result = await agent.run("What is Python?")
print(result.content)
print(f"Cost: ${result.total_cost:.6f}")
# With options
result = await agent.run(
"Explain quantum computing",
max_tokens=500,
temperature=0.3,
system_prompt="You are a physics expert"
)Stream responses with real-time events.
async def stream(
prompt: str | List[Message],
max_tokens: Optional[int] = None,
temperature: Optional[float] = None,
system_prompt: Optional[str] = None,
tools: Optional[List[Dict]] = None
) -> AsyncIterator[StreamEvent]Parameters: Same as run()
Yields: StreamEvent - Stream events with incremental content
Example:
async for event in agent.stream("Tell me a story"):
if event.type == StreamEventType.CONTENT_DELTA:
print(event.content, end="", flush=True)
elif event.type == StreamEventType.COMPLETE:
print(f"\nCost: ${event.total_cost:.6f}")Create agent from built-in preset configuration.
@classmethod
def from_preset(cls, preset: PresetConfig) -> CascadeAgentParameters:
preset(PresetConfig): Preset configuration (e.g.,PRESET_BEST_OVERALL)
Returns: CascadeAgent - Configured agent instance
Example:
from cascadeflow import CascadeAgent, PRESET_BEST_OVERALL, PRESET_ULTRA_FAST
# Balanced performance
agent = CascadeAgent.from_preset(PRESET_BEST_OVERALL)
# Optimized for speed
agent_fast = CascadeAgent.from_preset(PRESET_ULTRA_FAST)from cascadeflow import CascadeAgent, ModelConfig
agent = CascadeAgent(models=[
ModelConfig(name="gpt-4o-mini", provider="openai", cost=0.00015),
ModelConfig(name="gpt-4o", provider="openai", cost=0.00625)
])
result = await agent.run("What is the capital of France?")
print(f"Answer: {result.content}")
print(f"Model used: {result.model_used}")
print(f"Cost: ${result.total_cost:.6f}")
print(f"Savings: {result.cost_saved_percentage:.1f}%")from cascadeflow import CascadeAgent, ModelConfig, QualityConfig
agent = CascadeAgent(
models=[
ModelConfig(name="gpt-4o-mini", provider="openai", cost=0.00015),
ModelConfig(name="gpt-4o", provider="openai", cost=0.00625)
],
quality_config=QualityConfig(
threshold=0.8, # Stricter quality requirements
require_minimum_tokens=20
)
)
result = await agent.run("Explain machine learning")agent = CascadeAgent(models=[
# Groq (fastest)
ModelConfig(name="llama-3.1-8b", provider="groq", cost=0.00005),
# OpenAI (balanced)
ModelConfig(name="gpt-4o-mini", provider="openai", cost=0.000375),
# Anthropic (quality)
ModelConfig(name="claude-sonnet-4-5", provider="anthropic", cost=0.003)
])tools = [
{
"name": "get_weather",
"description": "Get current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City name"}
},
"required": ["location"]
}
}
]
result = await agent.run(
"What's the weather in San Francisco?",
tools=tools
)
if result.has_tool_calls:
for call in result.tool_calls:
print(f"Tool: {call['name']}")
print(f"Args: {call['arguments']}")async for event in agent.stream("Write a short poem about AI"):
match event.type:
case StreamEventType.MODEL_SELECTED:
print(f"Using model: {event.model}")
case StreamEventType.CONTENT_DELTA:
print(event.content, end="", flush=True)
case StreamEventType.CASCADE_TRIGGERED:
print("\n[Escalating to better model...]")
case StreamEventType.COMPLETE:
print(f"\nTotal cost: ${event.total_cost:.6f}")- ModelConfig - Model configuration options (includes QualityConfig)
- CascadeResult - Result object documentation
- Streaming Guide - Streaming responses
- Tools Guide - Function calling
- Presets Guide - Built-in preset configurations