diff --git a/README.md b/README.md index 8c6cc49..26a122a 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,262 @@ # MarketInference -A multi-agent parallel inference orchestrator for MarketAgents + +A multi-agent parallel inference orchestrator for managing and coordinating AI model interactions. This library provides a robust framework for handling parallel AI completions, tool management, and structured outputs across multiple AI providers. + +## Features + +### Multi-Provider Support +- OpenAI (GPT models) +- Anthropic (Claude models) +- VLLM +- LiteLLM + +### Core Capabilities +- Parallel request processing with intelligent rate limiting +- Tool registration and execution framework +- Structured message handling and history tracking +- Entity management with immutable patterns +- Comprehensive logging and monitoring +- Type-safe implementations using Pydantic + +### Advanced Features +- Automatic schema validation for tools and outputs +- Asynchronous and synchronous execution support +- UUID-based entity tracking +- Configurable rate limiting per provider +- Message history management +- Tool execution orchestration + +## Dependencies + +Key dependencies and their purposes: +- **AI/ML**: + - `anthropic`: Anthropic Claude API integration + - `openai`: OpenAI API integration + - `tiktoken`: Token counting for rate limiting +- **Web/API**: + - `fastapi`: API framework support + - `uvicorn`: ASGI server + - `aiohttp`: Async HTTP client +- **Data Handling**: + - `pydantic`: Data validation and settings management + - `sqlmodel`: SQL database integration + - `polars`: Data manipulation +- **Development**: + - `pytest`: Testing framework + - `pytest-asyncio`: Async test support + - `pytest-cov`: Test coverage + - `pytest-benchmark`: Performance testing + +## Installation + +### Using pip +```bash +# Clone the repository +git clone https://github.com/marketagents-ai/MarketInference.git +cd MarketInference + +# Install dependencies +pip install -r requirements.txt + +# Optional: Install in development mode +pip install -e . +``` + +### Using Conda +```bash +# Clone the repository +git clone https://github.com/marketagents-ai/MarketInference.git +cd MarketInference + +# Create and activate conda environment +conda create -n market-inference python=3.10 +conda activate market-inference + +# Install dependencies +pip install -r requirements.txt + +# Optional: Install in development mode +pip install -e . +``` + +## Environment Setup + +1. Copy the example environment file: +```bash +cp .env.example .env +``` + +2. Configure the following environment variables in `.env`: +```bash +# OpenAI Configuration +OPENAI_API_KEY=your_openai_key +OPENAI_CONTEXT_LENGTH=8192 + +# Anthropic Configuration +ANTHROPIC_API_KEY=your_anthropic_key +ANTHROPIC_CONTEXT_LENGTH=100000 + +# Azure OpenAI (Optional) +AZURE_OPENAI_API_KEY=your_azure_key +AZURE_OPENAI_ENDPOINT=your_azure_endpoint +AZURE_OPENAI_CONTEXT_LENGTH=8192 + +# Rate Limiting +MAX_REQUESTS_PER_MINUTE=50 +MAX_TOKENS_PER_MINUTE=100000 +``` + +## Quick Start + +1. Basic usage example: +```python +from minference.lite.inference import InferenceOrchestrator +from minference.lite.models import ChatThread, LLMClient + +# Initialize the orchestrator +orchestrator = InferenceOrchestrator(oai_request_limits=oai_request_limits) + +# Create a chat thread +chat_thread = ChatThread( + llm_config=LLMConfig( + client=LLMClient.openai, + model="gpt-4", + response_format=ResponseFormat.tool, + max_tokens=1000, + temperature=0 + ), + tools=[weather_tool] + ) + + +# Run inference +result = await orchestrator.run_inference(chat_thread) +print(result.content) +``` + +2. Using tools: +```python +from minference.caregistry import CallableRegistry + +CallableRegistry() + +def calculate_sum(a: int, b: int) -> int: + """Calculate the sum of two numbers.""" + return a + b +calcuate_addition = CallableTool.from_callable(calculate_sum) + +# Use in chat with tool calling +chat_thread = ChatThread( + new_message=[{"role": "user", "content": "What is 5 + 3?"}], + llm_client=LLMClient( + provider="anthropic", + model="claude-3-opus-20240229", + tools=[calcuate_addition] + ) +) +``` + +## Example Files + +The `examples/` directory contains several example implementations: + +1. `lite_inference_example.py`: Basic inference usage +```python +# Example of basic inference with a single model +from minference.lite.inference import InferenceOrchestrator +# ... (see examples/lite_inference_example.py) +``` + +2. `lite_sequential_tool_inference.py`: Sequential tool execution +```python +# Example of sequential tool execution +from minference.lite.models import ChatThread +# ... (see examples/lite_sequential_tool_inference.py) +``` + +3. `lite_tools_inference_example.py`: Tool integration examples +```python +# Example of tool integration +from minference.caregistry import CallableRegistry +# ... (see examples/lite_tools_inference_example.py) +``` + +4. `tools_setup_example.py`: Complex tool setup +```python +# Example of advanced tool configuration +from minference.lite.models import StructuredTool +# ... (see examples/tools_setup_example.py) +``` + +5. `pathfinder.ipynb`: Interactive notebook with examples + +## Advanced Usage + +### Parallel Processing + +```python +# Process multiple chat threads in parallel +chat_threads = [ + ChatThread(...), + ChatThread(...), + ChatThread(...) +] + +results = await orchestrator.run_parallel_inference(chat_threads) +``` + +### Entity Management + +```python +from minference.lite.models import Entity +from uuid import UUID + +class CustomEntity(Entity): + name: str + data: Dict[str, Any] + + def process(self) -> None: + # Process entity data + pass +``` + +## Configuration + +The library can be configured through environment variables or programmatically: + +```python +from minference.lite.models import RequestLimits + +# Configure rate limits +limits = RequestLimits( + max_requests_per_minute=50, + max_tokens_per_minute=100000, + provider="openai" +) +``` + + +## Project Structure + +``` +MarketInference/ +├── minference/ +│ ├── lite/ # Lightweight implementation +│ │ ├── inference.py # Core inference logic +│ │ ├── models.py # Data models +│ │ └── requests.py # Request handling +│ ├── core/ # Core functionality +│ │ └── clients_models.py # Client implementations +│ ├── base_registry.py # Base registry implementation +│ ├── caregistry.py # Callable registry +│ ├── enregistry.py # Entity registry +│ └── utils.py # Utility functions +├── tests/ # Test suite +│ ├── test_fixtures/ # Test fixtures +│ ├── test_registration.py +│ ├── test_schemas.py +│ └── test_execution.py & more +├── examples/ # Usage examples +└── requirements.txt # Dependencies +``` + diff --git a/examples/parallel_inference_demo.py b/examples/parallel_inference_demo.py new file mode 100644 index 0000000..ab19710 --- /dev/null +++ b/examples/parallel_inference_demo.py @@ -0,0 +1,106 @@ +""" +Example demonstrating parallel inference with multiple tools. +""" +from typing import Dict, Any +import asyncio +import random +import logging +from datetime import datetime + +from minference.lite.inference import InferenceOrchestrator +from minference.lite.models import LLMConfig, LLMClient, ResponseFormat, ChatThread, MessageRole, CallableTool +from minference.caregistry import CallableRegistry +from minference.enregistry import EntityRegistry + +# Set up logging +logging.basicConfig(level=logging.INFO) +EntityRegistry._logger = logging.getLogger(__name__) +CallableRegistry._logger = logging.getLogger(__name__) + +# Initialize registries +EntityRegistry() +CallableRegistry() + +def get_current_weather(location: str, unit: str = "fahrenheit") -> Dict[str, Any]: + """Get the current weather in a given location.""" + # Simulate API call + + temperature = random.randint(0, 100) + return { + "location": location, + "temperature": temperature, + "unit": unit, + "timestamp": datetime.now().isoformat() + } + +def get_stock_price(symbol: str) -> Dict[str, Any]: + """Get the current stock price for a given symbol.""" + # Simulate API call + price = random.uniform(10, 1000) + return { + "symbol": symbol, + "price": round(price, 2), + "currency": "USD", + "timestamp": datetime.now().isoformat() + } + +# Register tools +CallableRegistry.register("get_current_weather", get_current_weather) +CallableRegistry.register("get_stock_price", get_stock_price) + +# Create CallableTools +weather_tool = CallableTool.from_callable(get_current_weather) +stock_tool = CallableTool.from_callable(get_stock_price) + +# Create orchestrator +orchestrator = InferenceOrchestrator() + +# Run parallel inference +async def main(): + chat_threads = [ + ChatThread( + llm_config=LLMConfig( + client=LLMClient.openai, + model="gpt-4o-mini", + response_format=ResponseFormat.auto_tools, # Changed back to tool for OpenAI + max_tokens=1000, + temperature=0 + ), + tools=[weather_tool] + ), + ChatThread( + llm_config=LLMConfig( + client=LLMClient.openai, + model="gpt-4o-mini", + response_format=ResponseFormat.auto_tools, # Changed back to tool for OpenAI + max_tokens=1000, + temperature=0 + ), + tools=[stock_tool] + ) + ] + + # Add messages to threads with clear instructions + weather_message = """Please use the get_current_weather tool to check the weather in New York and Tokyo. + I need actual temperature data for both cities. Make sure to call the tool twice, once for each city. + Please provide your response in a clear, structured format.""" + chat_threads[0].new_message = weather_message + chat_threads[0].add_user_message() + + stock_message = """Please use the get_stock_price tool to check the current stock prices for AAPL and GOOGL. + I need the actual current prices for both stocks. Make sure to call the tool twice, once for each stock symbol. + Please provide your response in a clear, structured format.""" + chat_threads[1].new_message = stock_message + chat_threads[1].add_user_message() + + results = await orchestrator.run_parallel_ai_completion(chat_threads) + for i, result in enumerate(results): + print(f"\nQuery: {chat_threads[i].history[-1].content}") + print(f"Response: {result.content}") + if result.json_object: + print(f"Tool Call: {result.json_object.name}") + print(f"Arguments: {result.json_object.object}") + print("---") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/examples/sequential_tools_demo.py b/examples/sequential_tools_demo.py new file mode 100644 index 0000000..c19929c --- /dev/null +++ b/examples/sequential_tools_demo.py @@ -0,0 +1,111 @@ +""" +Example demonstrating sequential tool execution with structured outputs. +This example shows how to: +1. Register multiple tools with structured outputs +2. Use tools in sequence +3. Handle structured data between tool calls +""" +import asyncio +import logging +from minference.lite.inference import InferenceOrchestrator +from minference.lite.models import ChatThread, LLMClient, LLMConfig, MessageRole, CallableTool, ResponseFormat +from minference.caregistry import CallableRegistry +from minference.enregistry import EntityRegistry +from typing import Dict, Any, List + +# Set up logging +logging.basicConfig(level=logging.INFO) +EntityRegistry._logger = logging.getLogger(__name__) +CallableRegistry._logger = logging.getLogger(__name__) + +# Initialize registries +EntityRegistry() +CallableRegistry() + +# Register tools for a simple data processing pipeline +def fetch_stock_data(symbol: str) -> Dict[str, Any]: + """Fetch stock data (mock implementation).""" + # Mock data + return { + "symbol": symbol, + "price": 150.25, + "volume": 1000000, + "change": 2.5 + } + +def analyze_sentiment(text: str) -> Dict[str, Any]: + """Analyze sentiment of text (mock implementation).""" + return { + "text": text, + "sentiment": "positive", + "confidence": 0.85 + } + +def generate_trade_recommendation( + stock_data: Dict[str, Any], + sentiment: Dict[str, Any] +) -> Dict[str, Any]: + """Generate trade recommendation based on data and sentiment.""" + return { + "symbol": stock_data["symbol"], + "action": "BUY" if sentiment["sentiment"] == "positive" else "SELL", + "confidence": sentiment["confidence"] * (stock_data["change"] / 5), + "target_price": stock_data["price"] * 1.1 + } + +# Register the tools +CallableRegistry.register("fetch_stock_data", fetch_stock_data) +CallableRegistry.register("analyze_sentiment", analyze_sentiment) +CallableRegistry.register("generate_trade_recommendation", generate_trade_recommendation) + +# Create CallableTools +fetch_stock_tool = CallableTool.from_callable(fetch_stock_data) +analyze_sentiment_tool = CallableTool.from_callable(analyze_sentiment) +generate_recommendation_tool = CallableTool.from_callable(generate_trade_recommendation) + +async def main(): + # Initialize the orchestrator + orchestrator = InferenceOrchestrator() + + # Create a chat thread with sequential tool usage + chat_thread = ChatThread( + llm_config=LLMConfig( + client=LLMClient.openai, + model="gpt-4o-mini", # no anthropic tools + response_format=ResponseFormat.workflow, ## .worksflow + max_tokens=1000, + temperature=0, + ), + tools=[fetch_stock_tool, analyze_sentiment_tool, generate_recommendation_tool], + new_message= """Please help me analyze AAPL stock by following these steps: + 1. Use the fetch_stock_data tool to get current AAPL stock data + 2. Use the analyze_sentiment tool to analyze this news: 'Apple reports record iPhone sales' + 3. Use the generate_trade_recommendation tool with the data from steps 1 and 2 to get a recommendation + + Please execute these tools in sequence and show me the results of each step. + Please provide your response in JSON format. + """, + + + + ) + + # Add the message with clear instructions + try: + # Run inference + results = await orchestrator.run_parallel_ai_completion([chat_thread]) + result = results[0] # Get the first result since we only have one thread + + # Print results + print("\nAnalysis Results:") + print(f"Response: {result.content}") + if result.json_object: + print("\nTool Call:") + print(f"Name: {result.json_object.name}") + print(f"Arguments: {result.json_object.object}") + + except Exception as e: + print(f"Error during inference: {str(e)}") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/minference/lite/models.py b/minference/lite/models.py index e5b3d69..531c453 100644 --- a/minference/lite/models.py +++ b/minference/lite/models.py @@ -7,7 +7,9 @@ 3. Serialization and persistence capabilities 4. Registry integration for both entities and callables """ -from typing import Dict, Any, Optional, ClassVar, Type, TypeVar, List, Generic, Callable, Literal, Union, Tuple, Self +from typing import Dict, Any, Optional, ClassVar, Type, TypeVar, List, Generic, Callable, Literal, Union, Tuple +from typing_extensions import Self + from enum import Enum from uuid import UUID, uuid4 diff --git a/minference/utils.py b/minference/utils.py index eb48d15..6619cfd 100644 --- a/minference/utils.py +++ b/minference/utils.py @@ -71,7 +71,7 @@ def convert_message(msg: Dict[str, Any]) -> ChatCompletionMessageParam: if "function_call" in msg: assistant_msg["function_call"] = msg["function_call"] if "tool_calls" in msg: - print(f"validating tool_calls during conversion: {[ChatCompletionMessageToolCallParam(**tool_call) for tool_call in msg["tool_calls"]]}") + print(f'validating tool_calls during conversion: {[ChatCompletionMessageToolCallParam(**tool_call) for tool_call in msg["tool_calls"]]}') assistant_msg["tool_calls"] = [ChatCompletionMessageToolCallParam(**tool_call) for tool_call in msg["tool_calls"]] return assistant_msg elif role == "tool": @@ -135,4 +135,4 @@ def convert_message(msg: Dict[str, Any],use_cache:bool=False) -> Union[MessagePa converted_messages.append(convert_message(message,use_cache= use_cache_final)) - return system_message, [msg for msg in converted_messages if msg is not None] + return system_message, [msg for msg in converted_messages if msg is not None] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index bdeb335..b5d684a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,4 +22,5 @@ pytest-benchmark aiohttp polars jsonschema -ipykernel \ No newline at end of file +ipykernel +typing_extensions \ No newline at end of file diff --git a/todo b/todo new file mode 100644 index 0000000..1f1be2b --- /dev/null +++ b/todo @@ -0,0 +1,6 @@ +Pydantic models are in for input and output of tools. +careful with the imports for the tools and the callable registry. +make sure to imports of the tool be in the same file as the callable registry. + +adding more examples with knowledge base and more complex tools. +log setting and how spammy and verbose (also they might be used by agents to reflect and think and look back) the logs are. talk to lizardperson. (make it pydantic !!! ) \ No newline at end of file