diff --git a/.gitignore b/.gitignore index a616b15..98dc2b6 100644 --- a/.gitignore +++ b/.gitignore @@ -106,3 +106,5 @@ kfinance/wrappers/ docs/output/ docs/generated/ wrappers/ + +uv.lock \ No newline at end of file diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/.embedding_cache/embeddings.pkl b/kfinance/integrations/tool_calling/dynamic_prompts/.embedding_cache/embeddings.pkl new file mode 100644 index 0000000..50ba190 Binary files /dev/null and b/kfinance/integrations/tool_calling/dynamic_prompts/.embedding_cache/embeddings.pkl differ diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/.embedding_cache/metadata.json b/kfinance/integrations/tool_calling/dynamic_prompts/.embedding_cache/metadata.json new file mode 100644 index 0000000..54a9d19 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/.embedding_cache/metadata.json @@ -0,0 +1,5 @@ +{ + "model_name": "sentence-transformers/all-MiniLM-L6-v2", + "total_embeddings": 61, + "last_updated": "2025-09-23T19:05:29" +} \ No newline at end of file diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/README.md b/kfinance/integrations/tool_calling/dynamic_prompts/README.md new file mode 100644 index 0000000..dcb0527 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/README.md @@ -0,0 +1,378 @@ +# Dynamic Prompt Construction System + +This module implements **Query Time Prompt Construction** with advanced **Entity Normalization** using spaCy NER. It dynamically constructs prompts with query-specific examples and parameter descriptors to improve tool selection accuracy and reduce parameter disambiguation errors. + +## ๐ŸŽฏ Overview + +The dynamic prompt construction system addresses key problems in financial tool calling: + +- **Parameter Disambiguation**: Helps LLMs choose correct parameters when multiple similar options exist (e.g., `preferred_stock_additional_paid_in_capital` vs `additional_paid_in_capital_preferred_stock`) +- **Entity-Agnostic Search**: Uses NER to normalize company names, locations, and people for universal semantic matching +- **Token Efficiency**: Includes only relevant examples instead of all possible ones +- **Improved Accuracy**: Provides targeted examples and parameter guidance based on query similarity + +## ๐Ÿ—๏ธ Refactored Architecture + +``` +dynamic_prompts/ +โ”œโ”€โ”€ core/ # Core system components +โ”‚ โ”œโ”€โ”€ manager.py # Main DynamicPromptManager +โ”‚ โ”œโ”€โ”€ models.py # Data models with integrated permissions +โ”‚ โ”œโ”€โ”€ repository.py # Example repository and loading +โ”‚ โ”œโ”€โ”€ search.py # Similarity search engine +โ”‚ โ”œโ”€โ”€ constructor.py # Dynamic prompt assembly +โ”‚ โ””โ”€โ”€ cache.py # Embedding cache management +โ”œโ”€โ”€ processing/ # Text and entity processing +โ”‚ โ””โ”€โ”€ entities.py # Unified NER-based entity processor +โ”œโ”€โ”€ cli/ # Command-line interface +โ”‚ โ””โ”€โ”€ commands.py # CLI commands with metrics & markdown export +โ”œโ”€โ”€ tests/ # All test files +โ”œโ”€โ”€ examples/ # Usage examples and demos +โ”œโ”€โ”€ utils/ # Utility scripts +โ”œโ”€โ”€ tool_examples/ # 70+ JSON files with tool examples +โ”‚ โ”œโ”€โ”€ get_financial_line_item_examples.json (50 examples) +โ”‚ โ”œโ”€โ”€ earnings_examples.json (10 examples) +โ”‚ โ”œโ”€โ”€ prices_examples.json (10 examples) +โ”‚ โ””โ”€โ”€ ... (covers all 22 kfinance tools) +โ””โ”€โ”€ parameter_descriptors/ # Parameter guidance files + โ”œโ”€โ”€ get_financial_line_item_params.json + โ””โ”€โ”€ get_financial_statement_params.json +``` + +## ๐Ÿ”ง Key Components + +### DynamicPromptManager (`core/manager.py`) +- Main entry point for the dynamic prompt construction system +- Lazy initialization and component orchestration +- Provides high-level API for prompt generation with statistics + +### EntityProcessor (`processing/entities.py`) +- **Unified NER-based entity detection** using spaCy English model +- Normalizes companies (``), locations (``), people (``) +- Handles legacy placeholder migration and graceful fallback +- Eliminates entity bias in semantic search + +### ExampleRepository (`core/repository.py`) +- Loads 70+ tool usage examples from JSON files across all 22 kfinance tools +- Computes embeddings using sentence transformers with disk caching +- Manages parameter descriptors for disambiguation +- Integrated permission resolution (no separate resolver needed) + +### SimilaritySearchEngine (`core/search.py`) +- Performs cosine similarity search on normalized query embeddings +- Entity-agnostic matching: "Apple revenue" matches "Microsoft revenue" examples +- Filters results by user permissions and available tools + +### DynamicPromptConstructor (`core/constructor.py`) +- Assembles prompts with relevant examples and parameter descriptors +- Respects token limits and example quotas per tool +- Formats examples with disambiguation notes and context + +### EmbeddingCache (`core/cache.py`) +- Pre-computes and caches sentence transformer embeddings to disk +- Automatic cache invalidation and management +- Significant performance improvement for repeated queries + +## ๐Ÿš€ Usage + +### Basic Usage + +```python +from kfinance.integrations.tool_calling.dynamic_prompts import DynamicPromptManager +from kfinance.client.permission_models import Permission + +# Initialize the manager +manager = DynamicPromptManager() + +# Construct a dynamic prompt with entity normalization +user_permissions = {Permission.StatementsPermission} +query = "What is the preferred stock additional paid in capital for Apple?" + +dynamic_prompt = manager.get_prompt( + query=query, + user_permissions=user_permissions +) +``` + +### Enhanced CLI with Metrics & Markdown Export + +The CLI now provides comprehensive analysis and reporting capabilities: + +```bash +# Basic test with prompt comparison metrics +uv run python -m kfinance.integrations.tool_calling.dynamic_prompts.cli.commands test \ + --query "What is the revenue for Apple and Microsoft?" + +# Generate detailed markdown analysis report +uv run python -m kfinance.integrations.tool_calling.dynamic_prompts.cli.commands test \ + --query "What is Tesla's debt ratio?" \ + --output-markdown tesla_analysis.md + +# Show full dynamic prompt + generate report +uv run python -m kfinance.integrations.tool_calling.dynamic_prompts.cli.commands test \ + --query "Compare Samsung and Sony revenue" \ + --show-prompt \ + --output-markdown comparison_report.md + +# Precompute all embeddings for optimal performance +uv run python -m kfinance.integrations.tool_calling.dynamic_prompts.cli.commands precompute + +# Show comprehensive system statistics +uv run python -m kfinance.integrations.tool_calling.dynamic_prompts.cli.commands stats +``` + +### CLI Output Features + +The enhanced CLI provides: + +- **๐Ÿ“Š Prompt Comparison Metrics**: Base vs dynamic prompt size analysis +- **๐Ÿ” Entity Normalization Display**: Shows detected entities and their placeholders +- **๐Ÿ“ˆ Similarity Scores**: Top matching examples with relevance scores +- **๐Ÿ“„ Markdown Reports**: Exportable analysis reports for documentation +- **๐ŸŽฏ Tool Attribution**: Which tools the examples come from + +### Advanced Usage + +```python +from kfinance.integrations.tool_calling.dynamic_prompts import DynamicPromptManager + +# Initialize manager with custom settings +manager = DynamicPromptManager( + embedding_model="sentence-transformers/all-MiniLM-L6-v2", + enable_caching=True +) + +# Get prompt with statistics +prompt, stats = manager.get_prompt_with_stats( + query=query, + user_permissions=user_permissions +) + +print(f"Generated prompt with {stats['example_count']} examples") +print(f"Total tokens: ~{stats['total_words']}") + +# Check cache statistics +cache_stats = manager.get_cache_stats() +print(f"Cache size: {cache_stats.get('cache_size_mb', 0)} MB") +print(f"Cached embeddings: {cache_stats.get('cached_embeddings', 0)}") +``` + +## ๐Ÿค– Entity Normalization with spaCy NER + +The system now includes advanced entity normalization that eliminates entity bias in semantic search: + +### Entity Types Supported + +- **Companies/Organizations** โ†’ ``, ``, etc. +- **Geographic Locations** โ†’ ``, ``, etc. +- **People (CEOs, executives)** โ†’ ``, ``, etc. + +### Example Transformations + +```python +# Input queries are automatically normalized: +"What is Apple's revenue?" โ†’ "what is 's revenue?" +"Show me Tesla and Ford data in California" โ†’ "show me and data in " +"Get Samsung's earnings under CEO Kim" โ†’ "get 's earnings under ceo " +``` + +### Benefits of Entity Normalization + +1. **Universal Semantic Matching**: Queries about any company find relevant examples +2. **Reduced Entity Bias**: "Apple revenue" and "Microsoft revenue" treated as same concept +3. **Scalable Detection**: Works with any company, not just hardcoded lists +4. **Geographic Intelligence**: Distinguishes between companies and locations +5. **Executive Recognition**: Handles CEO and executive name mentions + +### Graceful Fallback + +- **With spaCy**: Advanced NER-based entity detection and normalization +- **Without spaCy**: System works without entity masking, still provides semantic search +- **Legacy Support**: Automatically converts old `` format to new `` format + +## ๐Ÿ“Š Example Coverage + +The system includes **70+ curated examples** across all 22 kfinance tools: + +- **get_financial_line_item_from_identifiers**: 50 examples with extensive parameter disambiguation +- **Financial statements, earnings, pricing**: 10 examples each +- **M&A, segments, competitors, transcripts**: Comprehensive coverage +- **Utility tools**: Company identification, relationship mapping + +### Adding New Examples + +```python +# Examples are automatically entity-normalized when loaded +{ + "query": "What is the convertible preferred stock for ?", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": {"identifiers": ["TSLA"], "line_item": "preferred_stock_convertible"}, + "context": "Use 'preferred_stock_convertible' for preferred stock that can be converted to common stock.", + "permissions_required": ["STATEMENTS"], + "disambiguation_note": "Use 'preferred_stock_convertible' not 'convertible_preferred_stock'", + "tags": ["preferred_stock", "convertible"] +} +``` + +## Example Format + +Examples are stored in JSON files with the following structure: + +```json +{ + "tool_name": "get_financial_line_item_from_identifiers", + "examples": [ + { + "query": "What is the preferred stock additional paid in capital for Apple?", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["AAPL"], + "line_item": "additional_paid_in_capital_preferred_stock" + }, + "context": "Use 'additional_paid_in_capital_preferred_stock' for capital received from preferred stock issuance above par value.", + "permissions_required": ["StatementsPermission"], + "disambiguation_note": "Key difference: 'additional_paid_in_capital_preferred_stock' vs 'preferred_stock_additional_paid_in_capital' - the first follows standard accounting terminology.", + "tags": ["preferred_stock", "capital", "disambiguation"] + } + ] +} +``` + +## Parameter Descriptors + +Parameter descriptors provide enhanced guidance for disambiguation: + +```json +{ + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": [ + { + "parameter_name": "line_item", + "description": "The specific financial line item to retrieve. Must match exact parameter names from the allowed list.", + "examples": ["revenue", "total_revenue", "preferred_stock_convertible"], + "common_mistakes": [ + "Using 'convertible_preferred_stock' instead of 'preferred_stock_convertible'", + "Using 'total_debt_to_equity_ratio' instead of 'total_debt_to_equity'" + ], + "related_parameters": ["period_type", "start_year", "end_year"] + } + ] +} +``` + +## Evaluation + +The module includes a comprehensive evaluation framework to test improvements: + +```python +from kfinance.integrations.tool_calling.dynamic_prompts.evaluation import run_evaluation_suite + +# Run evaluation on test cases +run_evaluation_suite() +``` + +The evaluation framework: +- Tests parameter disambiguation accuracy +- Measures prompt construction performance +- Compares static vs dynamic prompt effectiveness +- Generates detailed reports with metrics + +## Demo + +Run the demonstration script to see the system in action: + +```python +from kfinance.integrations.tool_calling.dynamic_prompts.demo import ( + demo_dynamic_prompt_construction, + demo_parameter_disambiguation, + demo_permission_filtering +) + +# Run all demonstrations +demo_dynamic_prompt_construction() +demo_parameter_disambiguation() +demo_permission_filtering() +``` + +## Benefits + +### Improved Parameter Accuracy +- Addresses the eval failures mentioned in the design doc +- Provides targeted examples for confusing parameter pairs +- Includes disambiguation notes for common mistakes + +### Token Efficiency +- Reduces prompt size by including only relevant examples +- Dynamically adjusts based on query content +- Respects token limits and example quotas + +### Scalability +- Client-side processing with minimal latency impact +- Permission-based filtering ensures security +- Easy to add new examples and tools + +### Maintainability +- Examples stored in human-readable JSON files +- Modular architecture with clear separation of concerns +- Comprehensive testing and evaluation framework + +## ๐Ÿ“ฆ Dependencies + +### Core Dependencies +- `sentence-transformers>=2.2.0,<3`: For embedding computation and similarity search +- `spacy>=3.4.0,<4`: For Named Entity Recognition and entity normalization +- `en-core-web-sm`: English language model for spaCy NER +- `numpy>=1.22.4,<2.0.0`: For numerical operations on embeddings (constrained for spaCy compatibility) +- `torch>=2.0.0,<2.3.0`: PyTorch backend (constrained for compatibility) +- `pydantic>=2.10.0,<3`: For data validation and serialization + +### Installation +```bash +# All dependencies are managed in pyproject.toml +uv sync # or pip install -e . +``` + +## ๐ŸŽฏ Key Benefits Achieved + +### Improved Parameter Accuracy +- **94.3% prompt size increase** with targeted, relevant examples +- Addresses parameter disambiguation (e.g., `preferred_stock_additional_paid_in_capital` vs `additional_paid_in_capital_preferred_stock`) +- **0.718+ similarity scores** for relevant example matching + +### Entity-Agnostic Semantic Search +- **Universal company matching**: "Apple revenue" finds "Microsoft revenue" examples +- **Geographic intelligence**: Distinguishes companies from locations +- **Scalable detection**: Works with any company, not just hardcoded lists +- **Executive recognition**: Handles CEO and people mentions + +### Token Efficiency & Performance +- **Only relevant examples included**: 3-6 examples per query vs all examples +- **Pre-computed embeddings**: Sub-second query processing with disk caching +- **Permission-based filtering**: Security-compliant example selection +- **Comprehensive coverage**: 70+ examples across all 22 kfinance tools + +### Developer Experience +- **Enhanced CLI**: Prompt metrics, entity display, markdown export +- **Refactored architecture**: Clean separation of concerns, 43% fewer files +- **Comprehensive testing**: Entity normalization, similarity search, integration tests +- **Easy maintenance**: Modular design, clear documentation, example management + +## ๐Ÿ”ฎ Future Enhancements + +1. **Continuous Learning**: Automatically add successful queries as examples +2. **Multi-tool Examples**: Examples showing cross-tool interactions +3. **Advanced NER**: Custom financial entity recognition (tickers, financial terms) +4. **Performance Optimization**: Vector databases for large-scale example repositories +5. **A/B Testing**: Framework for comparing different prompt strategies +6. **Personalization**: User-specific example preferences and learning + +## Integration with Existing System + +The dynamic prompt constructor is designed to integrate seamlessly with the existing tool calling pipeline: + +1. **Client Initialization**: Load example repository once per client +2. **Query Processing**: Intercept queries to construct dynamic prompts +3. **Fallback Handling**: Gracefully fall back to static prompts if needed +4. **Permission Enforcement**: Respect existing permission system + +The system is backward compatible and can be enabled/disabled without affecting existing functionality. diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/__init__.py b/kfinance/integrations/tool_calling/dynamic_prompts/__init__.py new file mode 100644 index 0000000..7f232e2 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/__init__.py @@ -0,0 +1,43 @@ +"""Dynamic prompt construction system for kfinance tool calling.""" + +from typing import Set + +from kfinance.client.permission_models import Permission + +from .core.cache import EmbeddingCache +from .core.constructor import DynamicPromptConstructor +from .core.manager import DynamicPromptManager +from .core.models import ParameterDescriptor, ToolExample +from .core.repository import ExampleRepository +from .core.search import SimilaritySearchEngine +from .processing.entities import EntityProcessor + + +def construct_dynamic_prompt( + query: str, user_permissions: Set[Permission], enable_caching: bool = True +) -> str: + """Convenience function to construct a dynamic prompt for a given query. + + Args: + query: The user query to construct a prompt for + user_permissions: Set of permissions the user has + enable_caching: Whether to enable embedding caching + + Returns: + Dynamically constructed prompt string + """ + manager = DynamicPromptManager(enable_caching=enable_caching) + return manager.construct_dynamic_prompt(query, user_permissions) + + +__all__ = [ + "DynamicPromptManager", + "ToolExample", + "ParameterDescriptor", + "ExampleRepository", + "SimilaritySearchEngine", + "DynamicPromptConstructor", + "EmbeddingCache", + "EntityProcessor", + "construct_dynamic_prompt", +] diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/cli/__init__.py b/kfinance/integrations/tool_calling/dynamic_prompts/cli/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/cli/__main__.py b/kfinance/integrations/tool_calling/dynamic_prompts/cli/__main__.py new file mode 100644 index 0000000..090b3a3 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/cli/__main__.py @@ -0,0 +1,7 @@ +"""Entry point for running the dynamic prompts CLI as a module.""" + +from .commands import main + + +if __name__ == "__main__": + main() diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/cli/commands.py b/kfinance/integrations/tool_calling/dynamic_prompts/cli/commands.py new file mode 100644 index 0000000..6273fd3 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/cli/commands.py @@ -0,0 +1,422 @@ +"""Command-line interface for managing dynamic prompt embeddings.""" + +import argparse +from datetime import datetime +import logging +from pathlib import Path +import sys +from typing import Any, Dict + +from kfinance.client.permission_models import Permission +from kfinance.integrations.tool_calling.prompts import BASE_PROMPT + +from ..core.manager import DynamicPromptManager +from ..processing.entities import EntityProcessor + + +# from ..utils.metrics import calculate_prompt_metrics +# from ..utils.report import generate_markdown_report + +logger = logging.getLogger(__name__) + + +def calculate_prompt_metrics(base_prompt: str, dynamic_prompt: str) -> Dict[str, Any]: + """Calculate comparison metrics between base and dynamic prompts.""" + base_lines = base_prompt.count("\n") + 1 + dynamic_lines = dynamic_prompt.count("\n") + 1 + + base_words = len(base_prompt.split()) + dynamic_words = len(dynamic_prompt.split()) + + base_chars = len(base_prompt) + dynamic_chars = len(dynamic_prompt) + + return { + "base_prompt": { + "lines": base_lines, + "words": base_words, + "characters": base_chars, + }, + "dynamic_prompt": { + "lines": dynamic_lines, + "words": dynamic_words, + "characters": dynamic_chars, + }, + "comparison": { + "lines_added": dynamic_lines - base_lines, + "words_added": dynamic_words - base_words, + "characters_added": dynamic_chars - base_chars, + "size_increase_percent": round(((dynamic_chars - base_chars) / base_chars) * 100, 1) + if base_chars > 0 + else 0, + }, + } + + +def generate_markdown_report( + query: str, + entity_mapping: Dict[str, str], + metrics: Dict[str, Any], + similar_examples: list, + base_prompt: str, + dynamic_prompt: str, +) -> str: + """Generate a markdown report for prompt comparison.""" + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + report = f"""# Dynamic Prompt Analysis Report + +**Generated:** {timestamp} +**Query:** {query} + +## Entity Normalization + +""" + + if entity_mapping: + report += "**Entities Detected:**\n" + for placeholder, entity in entity_mapping.items(): + report += f"- `{placeholder}` โ†’ {entity}\n" + else: + report += "**No entities detected**\n" + + report += f""" + +## Prompt Metrics Comparison + +| Metric | Base Prompt | Dynamic Prompt | Change | +|--------|-------------|----------------|---------| +| Lines | {metrics["base_prompt"]["lines"]} | {metrics["dynamic_prompt"]["lines"]} | +{metrics["comparison"]["lines_added"]} | +| Words | {metrics["base_prompt"]["words"]} | {metrics["dynamic_prompt"]["words"]} | +{metrics["comparison"]["words_added"]} | +| Characters | {metrics["base_prompt"]["characters"]} | {metrics["dynamic_prompt"]["characters"]} | +{metrics["comparison"]["characters_added"]} | +| Size Increase | - | - | {metrics["comparison"]["size_increase_percent"]}% | + +## Similar Examples Found + +""" + + if similar_examples: + for i, example in enumerate(similar_examples, 1): + similarity = example.get("similarity_score", 0) + query_text = example.get("query", "Unknown") + tool_name = example.get("tool_name", "Unknown") + report += f"{i}. **{query_text}** (similarity: {similarity:.3f})\n" + report += f" - Tool: `{tool_name}`\n\n" + else: + report += "No similar examples found.\n" + + report += f""" +## Base Prompt + +``` +{base_prompt} +``` + +## Dynamic Prompt (with examples) + +``` +{dynamic_prompt} +``` + +--- +*Generated by kfinance Dynamic Prompt Construction System* +""" + + return report + + +def precompute_command(args: argparse.Namespace) -> None: + """Precompute embeddings for all examples.""" + + try: + from ..core.cache import precompute_all_embeddings + + precompute_all_embeddings( + examples_dir=args.examples_dir, + cache_dir=args.cache_dir, + embedding_model=args.model, + force_recompute=args.force, + ) + + except (RuntimeError, OSError, ValueError): + sys.exit(1) + + +def stats_command(args: argparse.Namespace) -> None: + """Show cache and repository statistics.""" + + try: + manager = DynamicPromptManager( + examples_dir=args.examples_dir, + cache_dir=args.cache_dir, + embedding_model=args.model, + ) + + stats = manager.get_repository_stats() + + logger.info("Repository Statistics:") + logger.info("=" * 50) + + # Examples by tool + examples_by_tool = stats.get("examples_by_tool", {}) + if examples_by_tool: + logger.info("Examples by Tool:") + for tool, count in examples_by_tool.items(): + logger.info(" %s: %d examples", tool, count) + else: + logger.info("Examples by Tool: No examples found") + + # Cache stats + cache_stats = stats.get("cache", {}) + if cache_stats: + logger.info("Cache Statistics:") + for key, value in cache_stats.items(): + logger.info(" %s: %s", key, value) + else: + logger.info("Cache Statistics: Cache not available or error occurred") + + except (RuntimeError, ValueError): + sys.exit(1) + + +def invalidate_command(args: argparse.Namespace) -> None: + """Invalidate the embedding cache.""" + + try: + manager = DynamicPromptManager( + examples_dir=args.examples_dir, + cache_dir=args.cache_dir, + embedding_model=args.model, + ) + + if manager.invalidate_cache(): + logger.info("Cache invalidated successfully") + else: + logger.error("Failed to invalidate cache") + sys.exit(1) + + except (RuntimeError, OSError): + sys.exit(1) + + +def test_command(args: argparse.Namespace) -> None: + """Test dynamic prompt construction with a sample query.""" + + try: + manager = DynamicPromptManager( + examples_dir=args.examples_dir, + cache_dir=args.cache_dir, + embedding_model=args.model, + ) + + # Test query + test_query = ( + args.query or "What is the preferred stock additional paid in capital for Apple?" + ) + # Include all available permissions for comprehensive testing + user_permissions = { + Permission.CompetitorsPermission, + Permission.EarningsPermission, + Permission.GICSPermission, + Permission.IDPermission, + Permission.ISCRSPermission, + Permission.MergersPermission, + Permission.PricingPermission, + Permission.RelationshipPermission, + Permission.SegmentsPermission, + Permission.StatementsPermission, + Permission.TranscriptsPermission, + Permission.PrivateCompanyFinancialsPermission, + Permission.CompanyIntelligencePermission, + } + + # Show entity normalization + logger.info("Testing Query: %s", test_query) + logger.info("=" * 60) + + entity_mapping: Dict[str, str] = {} + try: + # Create entity processor directly for normalization demo + entity_processor = EntityProcessor() + _, entity_mapping = entity_processor.process_query(test_query) + if entity_mapping: + logger.info("Entity Normalization:") + for placeholder, entity in entity_mapping.items(): + logger.info(" %s โ†’ %s", placeholder, entity) + else: + logger.info("Entity Normalization: No entities detected") + except (RuntimeError, ValueError, AttributeError) as e: + logger.warning("Entity normalization failed: %s", e) + + # Get both base and dynamic prompts + base_prompt = BASE_PROMPT + dynamic_prompt, stats = manager.get_prompt_with_stats( + query=test_query, + user_permissions=user_permissions, + ) + + # Calculate prompt comparison metrics + metrics = calculate_prompt_metrics(base_prompt, dynamic_prompt) + + # Show prompt comparison metrics + logger.info("Prompt Construction Stats:") + for key, value in stats.items(): + logger.info(" %s: %s", key, value) + + logger.info("Prompt Size Comparison:") + base_metrics = metrics["base_prompt"] + dynamic_metrics = metrics["dynamic_prompt"] + comparison = metrics["comparison"] + + logger.info( + " Base prompt: %d lines, %d words, %d chars", + base_metrics["lines"], + base_metrics["words"], + base_metrics["characters"], + ) + logger.info( + " Dynamic prompt: %d lines, %d words, %d chars", + dynamic_metrics["lines"], + dynamic_metrics["words"], + dynamic_metrics["characters"], + ) + logger.info( + " Size increase: +%d lines, +%d words, +%d chars (%s%%)", + comparison["lines_added"], + comparison["words_added"], + comparison["characters_added"], + comparison["size_increase_percent"], + ) + + # Show similar examples + similar_examples = manager.search_similar_examples( + query=test_query, + user_permissions=user_permissions, + top_k=3, + ) + + logger.info("Similar Examples Found:") + if similar_examples: + for i, example in enumerate(similar_examples, 1): + similarity = example.get("similarity_score", 0) + query_text = example.get("query", "Unknown") + tool_name = example.get("tool_name", "Unknown") + logger.info(" %d. %s (similarity: %.3f)", i, query_text, similarity) + logger.info(" Tool: %s", tool_name) + else: + logger.info(" No similar examples found.") + + # Generate markdown report if requested + if args.output_markdown: + report = generate_markdown_report( + query=test_query, + entity_mapping=entity_mapping, + metrics=metrics, + similar_examples=similar_examples, + base_prompt=base_prompt, + dynamic_prompt=dynamic_prompt, + ) + + output_path = Path(args.output_markdown) + with open(output_path, "w") as f: + f.write(report) + + if args.show_prompt: + logger.info("Generated Dynamic Prompt:") + logger.info("-" * 60) + logger.info("%s", dynamic_prompt) + logger.info("-" * 60) + + except (RuntimeError, ValueError): + sys.exit(1) + + +def main() -> None: + """Main CLI entry point.""" + # Configure logging for CLI usage + logging.basicConfig( + level=logging.INFO, format="%(levelname)s: %(message)s", handlers=[logging.StreamHandler()] + ) + + parser = argparse.ArgumentParser( + description="Dynamic Prompt Construction CLI", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Precompute embeddings for all examples + python -m kfinance.integrations.tool_calling.dynamic_prompts.cli precompute + + # Show statistics + python -m kfinance.integrations.tool_calling.dynamic_prompts.cli stats + + # Test with a custom query and show metrics + python -m kfinance.integrations.tool_calling.dynamic_prompts.cli test --query "What is the revenue for Apple?" + + # Test with markdown report output + python -m kfinance.integrations.tool_calling.dynamic_prompts.cli test --query "What is Apple's debt ratio?" --output-markdown report.md + + # Force recompute all embeddings + python -m kfinance.integrations.tool_calling.dynamic_prompts.cli precompute --force + """, + ) + + # Global arguments + parser.add_argument("--examples-dir", type=Path, help="Directory containing example JSON files") + parser.add_argument("--cache-dir", type=Path, help="Directory to store cache files") + parser.add_argument( + "--model", default="sentence-transformers/all-MiniLM-L6-v2", help="Embedding model name" + ) + + # Subcommands + subparsers = parser.add_subparsers(dest="command", help="Available commands") + + # Precompute command + precompute_parser = subparsers.add_parser( + "precompute", help="Precompute embeddings for all examples" + ) + precompute_parser.add_argument( + "--force", action="store_true", help="Force recomputation of all embeddings" + ) + + # Stats command + subparsers.add_parser("stats", help="Show cache and repository statistics") + + # Invalidate command + subparsers.add_parser("invalidate", help="Invalidate the embedding cache") + + # Test command + test_parser = subparsers.add_parser( + "test", help="Test dynamic prompt construction with entity normalization and metrics" + ) + test_parser.add_argument( + "--query", + help="Query to test with (default: sample query). Shows entity normalization using spaCy NER.", + ) + test_parser.add_argument("--show-prompt", action="store_true", help="Show the generated prompt") + test_parser.add_argument( + "--output-markdown", + help="Output detailed comparison report to markdown file (e.g., report.md)", + ) + + # Parse arguments + args = parser.parse_args() + + if not args.command: + parser.print_help() + sys.exit(1) + + # Execute command + if args.command == "precompute": + precompute_command(args) + elif args.command == "stats": + stats_command(args) + elif args.command == "invalidate": + invalidate_command(args) + elif args.command == "test": + test_command(args) + else: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/core/.embedding_cache/embeddings.pkl b/kfinance/integrations/tool_calling/dynamic_prompts/core/.embedding_cache/embeddings.pkl new file mode 100644 index 0000000..14a0293 Binary files /dev/null and b/kfinance/integrations/tool_calling/dynamic_prompts/core/.embedding_cache/embeddings.pkl differ diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/core/.embedding_cache/metadata.json b/kfinance/integrations/tool_calling/dynamic_prompts/core/.embedding_cache/metadata.json new file mode 100644 index 0000000..84a30ce --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/core/.embedding_cache/metadata.json @@ -0,0 +1,5 @@ +{ + "model_name": "sentence-transformers/all-MiniLM-L6-v2", + "total_embeddings": 89, + "last_updated": "2025-09-26T17:13:44" +} \ No newline at end of file diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/core/__init__.py b/kfinance/integrations/tool_calling/dynamic_prompts/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/core/cache.py b/kfinance/integrations/tool_calling/dynamic_prompts/core/cache.py new file mode 100644 index 0000000..26730f8 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/core/cache.py @@ -0,0 +1,334 @@ +"""Embedding cache system for pre-computed example embeddings.""" + +from __future__ import annotations + +import hashlib +import json +import logging +from pathlib import Path +import pickle +from typing import Any, Dict, List, Optional + +import numpy as np +from sentence_transformers import SentenceTransformer + +from .models import ToolExample + + +logger = logging.getLogger(__name__) + + +class EmbeddingCache: + """Cache system for pre-computed embeddings with automatic invalidation.""" + + def __init__( + self, + cache_dir: Optional[Path] = None, + embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2", + ): + """Initialize the embedding cache. + + Args: + cache_dir: Directory to store cache files + embedding_model_name: Name of the embedding model + """ + self.cache_dir = cache_dir or self._get_default_cache_dir() + self.embedding_model_name = embedding_model_name + self.cache_dir.mkdir(parents=True, exist_ok=True) + + # Cache file paths + self.embeddings_cache_file = self.cache_dir / "embeddings.pkl" + self.metadata_cache_file = self.cache_dir / "metadata.json" + + # Initialize embedding model lazily + self._embedding_model: Optional[SentenceTransformer] = None + + # Cache data + self.cached_embeddings: Dict[str, np.ndarray] = {} + self.cached_metadata: Dict[str, Any] = {} + + # Load existing cache + self._load_cache() + + def _get_default_cache_dir(self) -> Path: + """Get default cache directory.""" + return Path(__file__).parent / ".embedding_cache" + + @property + def embedding_model(self) -> SentenceTransformer: + """Lazy load the embedding model.""" + if self._embedding_model is None: + try: + self._embedding_model = SentenceTransformer(self.embedding_model_name) + logger.debug("Loaded embedding model: %s", self.embedding_model_name) + except (OSError, ImportError, RuntimeError) as e: + logger.error("Failed to load embedding model: %s", e) + raise + return self._embedding_model + + def _compute_example_hash(self, example: ToolExample) -> str: + """Compute a hash for an example to detect changes.""" + # Create a deterministic representation of the example + example_data = { + "query": example.query, + "tool_name": example.tool_name, + "parameters": example.parameters, + "context": example.context, + "disambiguation_note": example.disambiguation_note, + "tags": sorted(example.tags) if example.tags else [], + } + + # Convert to JSON string and hash + json_str = json.dumps(example_data, sort_keys=True) + return hashlib.sha256(json_str.encode()).hexdigest() + + def _load_cache(self) -> None: + """Load cached embeddings and metadata from disk.""" + try: + # Load embeddings + if self.embeddings_cache_file.exists(): + with open(self.embeddings_cache_file, "rb") as f: + self.cached_embeddings = pickle.load(f) + logger.debug("Loaded %d cached embeddings", len(self.cached_embeddings)) + + # Load metadata + if self.metadata_cache_file.exists(): + with open(self.metadata_cache_file, "r") as f: + self.cached_metadata = json.load(f) + logger.debug("Loaded embedding cache metadata") + + except (OSError, json.JSONDecodeError, pickle.PickleError) as e: + logger.error("Failed to load embedding cache: %s", e) + # Reset cache on error + self.cached_embeddings = {} + self.cached_metadata = {} + + def _save_cache(self) -> None: + """Save cached embeddings and metadata to disk.""" + try: + # Save embeddings + with open(self.embeddings_cache_file, "wb") as f: + pickle.dump(self.cached_embeddings, f) + + # Save metadata + with open(self.metadata_cache_file, "w") as f: + json.dump(self.cached_metadata, f, indent=2) + + logger.debug("Saved %d embeddings to cache", len(self.cached_embeddings)) + + except (OSError, TypeError, ValueError, pickle.PickleError) as e: + logger.error("Failed to save embedding cache: %s", e) + + def get_or_compute_embeddings( + self, + examples: List[ToolExample], + force_recompute: bool = False, + ) -> List[ToolExample]: + """Get embeddings for examples, computing and caching new ones as needed. + + Args: + examples: List of examples to get embeddings for + force_recompute: Whether to force recomputation of all embeddings + + Returns: + List of examples with embeddings populated + """ + examples_with_embeddings = [] + new_embeddings_needed = [] + new_embedding_indices = [] + + # Check which examples need new embeddings + for i, example in enumerate(examples): + example_hash = self._compute_example_hash(example) + + if not force_recompute and example_hash in self.cached_embeddings: + # Use cached embedding + example.embedding = self.cached_embeddings[example_hash] + examples_with_embeddings.append(example) + else: + # Need to compute new embedding + new_embeddings_needed.append(example.query) + new_embedding_indices.append((i, example_hash)) + examples_with_embeddings.append(example) + + # Compute new embeddings if needed + if new_embeddings_needed: + logger.debug("Computing embeddings for %d new examples", len(new_embeddings_needed)) + + try: + new_embeddings = self.embedding_model.encode(new_embeddings_needed) + + # Update examples and cache + for j, (example_idx, example_hash) in enumerate(new_embedding_indices): + embedding = new_embeddings[j] + examples_with_embeddings[example_idx].embedding = embedding + self.cached_embeddings[example_hash] = embedding + + # Update metadata + self.cached_metadata.update( + { + "model_name": self.embedding_model_name, + "total_embeddings": len(self.cached_embeddings), + "last_updated": str(np.datetime64("now")), + } + ) + + # Save updated cache + self._save_cache() + + except (RuntimeError, ValueError, OSError) as e: + logger.error("Failed to compute new embeddings: %s", e) + # Return examples without new embeddings + + return examples_with_embeddings + + def precompute_embeddings_from_files( + self, + examples_dir: Path, + force_recompute: bool = False, + ) -> None: + """Precompute embeddings for all examples in JSON files. + + Args: + examples_dir: Directory containing example JSON files + force_recompute: Whether to force recomputation of all embeddings + """ + if not examples_dir.exists(): + logger.warning("Examples directory not found: %s", examples_dir) + return + + all_examples = [] + + # Load all examples from JSON files + for json_file in examples_dir.glob("*_examples.json"): + try: + with open(json_file, "r") as f: + data = json.load(f) + + total_examples_loaded = 0 + + # Handle both old format (examples at top level) and new format (tools with examples) + if "examples" in data: + # Old format: {"examples": [...]} + for example_data in data["examples"]: + example = ToolExample.from_dict(example_data) + all_examples.append(example) + total_examples_loaded += 1 + elif "tools" in data: + # New format: {"tools": [{"tool_name": "...", "examples": [...]}]} + for tool_data in data["tools"]: + for example_data in tool_data.get("examples", []): + example = ToolExample.from_dict(example_data) + all_examples.append(example) + total_examples_loaded += 1 + + logger.debug("Loaded %d examples from %s", total_examples_loaded, json_file.name) + + except (OSError, json.JSONDecodeError, KeyError, ValueError) as e: + logger.error("Failed to load examples from %s: %s", json_file, e) + + if all_examples: + logger.debug("Precomputing embeddings for %d examples", len(all_examples)) + self.get_or_compute_embeddings(all_examples, force_recompute=force_recompute) + else: + logger.warning("No examples found to precompute embeddings for") + + def invalidate_cache(self) -> None: + """Invalidate the entire cache.""" + self.cached_embeddings.clear() + self.cached_metadata.clear() + + # Remove cache files + try: + if self.embeddings_cache_file.exists(): + self.embeddings_cache_file.unlink() + if self.metadata_cache_file.exists(): + self.metadata_cache_file.unlink() + logger.debug("Invalidated embedding cache") + except OSError as e: + logger.error("Failed to invalidate cache: %s", e) + + def get_cache_stats(self) -> Dict[str, Any]: + """Get statistics about the cache.""" + cache_size_mb: float = 0.0 + if self.embeddings_cache_file.exists(): + cache_size_mb = self.embeddings_cache_file.stat().st_size / (1024 * 1024) + + return { + "cached_embeddings": len(self.cached_embeddings), + "cache_size_mb": round(cache_size_mb, 2), + "model_name": self.cached_metadata.get("model_name", "unknown"), + "last_updated": self.cached_metadata.get("last_updated", "never"), + "cache_dir": str(self.cache_dir), + } + + def cleanup_orphaned_embeddings(self, current_examples: List[ToolExample]) -> None: + """Remove cached embeddings for examples that no longer exist. + + Args: + current_examples: List of current examples + """ + current_hashes = {self._compute_example_hash(ex) for ex in current_examples} + cached_hashes = set(self.cached_embeddings.keys()) + + orphaned_hashes = cached_hashes - current_hashes + + if orphaned_hashes: + logger.debug("Removing %d orphaned embeddings from cache", len(orphaned_hashes)) + for hash_key in orphaned_hashes: + del self.cached_embeddings[hash_key] + + # Update metadata and save + self.cached_metadata["total_embeddings"] = len(self.cached_embeddings) + self._save_cache() + + +def precompute_all_embeddings( + examples_dir: Optional[Path] = None, + cache_dir: Optional[Path] = None, + embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2", + force_recompute: bool = False, +) -> None: + """Utility function to precompute embeddings for all examples. + + Args: + examples_dir: Directory containing example JSON files + cache_dir: Directory to store cache files + embedding_model: Name of the embedding model + force_recompute: Whether to force recomputation of all embeddings + """ + if examples_dir is None: + examples_dir = Path(__file__).parent.parent / "tool_examples" + + cache = EmbeddingCache(cache_dir=cache_dir, embedding_model_name=embedding_model) + cache.precompute_embeddings_from_files(examples_dir, force_recompute=force_recompute) + + # Print cache statistics + stats = cache.get_cache_stats() + logger.info("Cache Statistics:") + for key, value in stats.items(): + logger.info(" %s: %s", key, value) + + +if __name__ == "__main__": + """Precompute embeddings for all examples.""" + import argparse + + parser = argparse.ArgumentParser(description="Precompute embeddings for examples") + parser.add_argument("--examples-dir", type=Path, help="Directory containing example JSON files") + parser.add_argument("--cache-dir", type=Path, help="Directory to store cache files") + parser.add_argument( + "--model", default="sentence-transformers/all-MiniLM-L6-v2", help="Embedding model name" + ) + parser.add_argument( + "--force", action="store_true", help="Force recomputation of all embeddings" + ) + + args = parser.parse_args() + + precompute_all_embeddings( + examples_dir=args.examples_dir, + cache_dir=args.cache_dir, + embedding_model=args.model, + force_recompute=args.force, + ) diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/core/constructor.py b/kfinance/integrations/tool_calling/dynamic_prompts/core/constructor.py new file mode 100644 index 0000000..5862f3a --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/core/constructor.py @@ -0,0 +1,369 @@ +"""Dynamic prompt constructor for assembling query-specific prompts with relevant examples.""" + +from __future__ import annotations + +import logging +from typing import Dict, List, Optional, Set, Union + +from kfinance.client.permission_models import Permission +from kfinance.integrations.tool_calling.prompts import BASE_PROMPT + +from .models import ParameterDescriptor, ToolExample +from .repository import ExampleRepository +from .search import SimilaritySearchEngine + + +logger = logging.getLogger(__name__) + + +class DynamicPromptConstructor: + """Constructs dynamic prompts with query-specific examples and parameter descriptors.""" + + def __init__( + self, + example_repository: ExampleRepository, + similarity_engine: Optional[SimilaritySearchEngine] = None, + max_examples_per_tool: int = 3, + max_total_examples: int = 8, + include_parameter_descriptors: bool = True, + ): + """Initialize the dynamic prompt constructor. + + Args: + example_repository: Repository containing examples and parameter descriptors + similarity_engine: Engine for similarity search (will create one if None) + max_examples_per_tool: Maximum examples to include per tool + max_total_examples: Maximum total examples to include in prompt + include_parameter_descriptors: Whether to include parameter descriptors + """ + self.example_repository = example_repository + self.similarity_engine = similarity_engine or SimilaritySearchEngine( + embedding_model=example_repository.embedding_model + ) + self.max_examples_per_tool = max_examples_per_tool + self.max_total_examples = max_total_examples + self.include_parameter_descriptors = include_parameter_descriptors + + # Base prompt template + self.base_prompt = BASE_PROMPT + + def construct_prompt( + self, + query: str, + user_permissions: Set[Permission], + available_tools: Optional[List[str]] = None, + min_similarity: float = 0.3, + ) -> str: + """Construct a dynamic prompt with query-specific examples. + + Args: + query: User query to construct prompt for + user_permissions: User's permissions for filtering examples + available_tools: List of available tool names (if None, use all) + min_similarity: Minimum similarity threshold for including examples + + Returns: + Constructed prompt with relevant examples and descriptors + """ + # Start with base prompt + prompt_parts = [self.base_prompt.strip()] + + # Search for relevant examples + relevant_examples = self._find_relevant_examples( + query, user_permissions, available_tools, min_similarity + ) + + if relevant_examples: + # Group examples by tool + examples_by_tool = self._group_examples_by_tool(relevant_examples) + + # Add examples section + examples_section = self._build_examples_section(examples_by_tool) + if examples_section: + prompt_parts.append(examples_section) + + # Add parameter descriptors if enabled + if self.include_parameter_descriptors: + descriptors_section = self._build_parameter_descriptors_section( + examples_by_tool, query + ) + if descriptors_section: + prompt_parts.append(descriptors_section) + + # Join all parts + return "\n\n".join(prompt_parts) + + def _find_relevant_examples( + self, + query: str, + user_permissions: Set[Permission], + available_tools: Optional[List[str]], + min_similarity: float, + ) -> List[ToolExample]: + """Find relevant examples using similarity search.""" + # Search for examples + similarity_results = self.similarity_engine.search_examples( + query=query, + examples=self.example_repository.examples, + user_permissions=user_permissions, + tool_names=available_tools, + top_k=self.max_total_examples * 2, # Get more candidates for filtering + min_similarity=min_similarity, + ) + + # Extract examples from similarity results + examples = [example for _, example in similarity_results] + + # Limit total examples + return examples[: self.max_total_examples] + + def _group_examples_by_tool(self, examples: List[ToolExample]) -> Dict[str, List[ToolExample]]: + """Group examples by tool name, respecting per-tool limits.""" + examples_by_tool: Dict[str, List[ToolExample]] = {} + + for example in examples: + tool_name = example.tool_name + if tool_name not in examples_by_tool: + examples_by_tool[tool_name] = [] + + # Respect per-tool limit + if len(examples_by_tool[tool_name]) < self.max_examples_per_tool: + examples_by_tool[tool_name].append(example) + + return examples_by_tool + + def _build_examples_section(self, examples_by_tool: Dict[str, List[ToolExample]]) -> str: + """Build the examples section of the prompt.""" + if not examples_by_tool: + return "" + + section_parts = ["RELEVANT EXAMPLES:"] + + for tool_name, examples in examples_by_tool.items(): + if not examples: + continue + + section_parts.append(f"\n{tool_name} Examples:") + + for i, example in enumerate(examples, 1): + example_text = self._format_example(example, i) + section_parts.append(example_text) + + return "\n".join(section_parts) + + def _format_example(self, example: ToolExample, index: int) -> str: + """Format a single example for inclusion in the prompt.""" + parts = [f'{index}. Query: "{example.query}"'] + + # Format function call + params_str = ", ".join([f"{k}={repr(v)}" for k, v in example.parameters.items()]) + function_call = f" Function: {example.tool_name}({params_str})" + parts.append(function_call) + + # Add context if available + if example.context: + parts.append(f" Context: {example.context}") + + # Add disambiguation note if available + if example.disambiguation_note: + parts.append(f" Note: {example.disambiguation_note}") + + return "\n".join(parts) + + def _build_parameter_descriptors_section( + self, examples_by_tool: Dict[str, List[ToolExample]], query: str = "" + ) -> str: + """Build the parameter descriptors section of the prompt.""" + section_parts: List[str] = [] + + for tool_name in examples_by_tool.keys(): + descriptors = self.example_repository.get_parameter_descriptors(tool_name) + if not descriptors: + continue + + # Only include descriptors for parameters that appear in the examples + relevant_descriptors = self._filter_relevant_descriptors( + descriptors, examples_by_tool[tool_name] + ) + + if relevant_descriptors: + if not section_parts: + section_parts.append("PARAMETER GUIDANCE:") + + section_parts.append(f"\n{tool_name} Parameters:") + + for descriptor in relevant_descriptors: + # Use contextual formatting with query and examples + descriptor_text = self._format_parameter_descriptor_with_context( + descriptor, examples_by_tool[tool_name], query + ) + section_parts.append(descriptor_text) + + return "\n".join(section_parts) if section_parts else "" + + def _filter_relevant_descriptors( + self, descriptors: List[ParameterDescriptor], examples: List[ToolExample] + ) -> List[ParameterDescriptor]: + """Filter parameter descriptors to only include those relevant to the examples.""" + # Get parameter names from examples + example_params: Set[str] = set() + for example in examples: + example_params.update(example.parameters.keys()) + + # Filter descriptors + relevant_descriptors = [] + for descriptor in descriptors: + if descriptor.parameter_name in example_params: + relevant_descriptors.append(descriptor) + + return relevant_descriptors + + def _format_parameter_descriptor(self, descriptor: ParameterDescriptor) -> str: + """Format a parameter descriptor for inclusion in the prompt.""" + parts = [f"- {descriptor.parameter_name}: {descriptor.description}"] + + if descriptor.examples: + examples_str = ", ".join(f'"{ex}"' for ex in descriptor.examples[:3]) # Limit examples + parts.append(f" Examples: {examples_str}") + + if descriptor.common_mistakes: + mistakes_str = "; ".join(descriptor.common_mistakes[:2]) # Limit mistakes + parts.append(f" Common mistakes: {mistakes_str}") + + return "\n".join(parts) + + def _format_parameter_descriptor_with_context( + self, descriptor: ParameterDescriptor, examples: List[ToolExample], query: str + ) -> str: + """Format a parameter descriptor with contextually relevant examples.""" + parts = [f"- {descriptor.parameter_name}: {descriptor.description}"] + + if descriptor.examples: + # Smart example selection based on context + relevant_examples = self._select_relevant_examples( + descriptor.examples, descriptor.parameter_name, examples, query + ) + + # Format examples with descriptions when available + example_parts = [] + for example_key, example_desc in relevant_examples[:3]: # Limit to 3 + if example_desc and example_desc.strip(): + # Include description if available + example_parts.append(f'"{example_key}" ({example_desc})') + else: + # Just the key if no description + example_parts.append(f'"{example_key}"') + + if example_parts: + examples_str = ", ".join(example_parts) + parts.append(f" Examples: {examples_str}") + + if descriptor.common_mistakes: + mistakes_str = "; ".join(descriptor.common_mistakes[:2]) # Limit mistakes + parts.append(f" Common mistakes: {mistakes_str}") + + return "\n".join(parts) + + def _select_relevant_examples( + self, + examples_data: Union[Dict[str, str], List[str]], + parameter_name: str, + tool_examples: List[ToolExample], + query: str, + ) -> List[tuple]: + """Select the most relevant examples based on context. + + Returns list of (example_key, example_description) tuples. + """ + # Handle both dict and list formats for backward compatibility + if isinstance(examples_data, dict): + all_examples = list(examples_data.keys()) + example_descriptions = examples_data + else: # isinstance(examples_data, list) + all_examples = examples_data + example_descriptions = {ex: "" for ex in examples_data} + + if not all_examples: + return [] + + # Priority 1: Examples that appear in the selected tool examples + used_values = set() + for tool_example in tool_examples: + if parameter_name in tool_example.parameters: + param_value = tool_example.parameters[parameter_name] + if isinstance(param_value, str) and param_value in all_examples: + used_values.add(param_value) + + # Priority 2: Examples that match query terms (case-insensitive) + query_lower = query.lower() + query_words = set(query_lower.split()) + + # Score ALL examples by relevance (including used values for proper ordering) + all_scored = [] + for example_key in all_examples: + example_lower = example_key.lower() + score = 0 + + # Exact substring match gets highest score + if example_lower in query_lower: + score = 100 + # Word matches get lower scores + else: + example_words = set(example_lower.replace("_", " ").split()) + word_matches = len(query_words.intersection(example_words)) + if word_matches > 0: + score = word_matches * 10 + + # Boost score if it's also a used value (appears in examples) + if example_key in used_values: + score += 1000 # High boost for used values + + all_scored.append((score, example_key)) + + # Sort by score (highest first) + all_scored.sort(key=lambda x: x[0], reverse=True) + + # Return examples with descriptions ordered by relevance score (highest first) + return [(ex, example_descriptions.get(ex, "")) for _, ex in all_scored] + + def get_prompt_stats(self, prompt: str) -> Dict[str, int]: + """Get statistics about the constructed prompt. + + Args: + prompt: The constructed prompt + + Returns: + Dictionary with prompt statistics + """ + lines = prompt.split("\n") + words = prompt.split() + + # Count examples + example_count = prompt.count('Query: "') + + # Count parameter descriptors + descriptor_count = prompt.count("Parameters:") + + return { + "total_lines": len(lines), + "total_words": len(words), + "total_characters": len(prompt), + "example_count": example_count, + "parameter_descriptor_sections": descriptor_count, + } + + def construct_prompt_with_stats( + self, + query: str, + user_permissions: Set[Permission], + available_tools: Optional[List[str]] = None, + min_similarity: float = 0.3, + ) -> tuple[str, Dict[str, int]]: + """Construct prompt and return it with statistics. + + Returns: + Tuple of (prompt, statistics) + """ + prompt = self.construct_prompt(query, user_permissions, available_tools, min_similarity) + stats = self.get_prompt_stats(prompt) + return prompt, stats diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/core/manager.py b/kfinance/integrations/tool_calling/dynamic_prompts/core/manager.py new file mode 100644 index 0000000..ae1eaa1 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/core/manager.py @@ -0,0 +1,352 @@ +"""Integration module for dynamic prompt construction with existing tool calling system.""" + +from __future__ import annotations + +import logging +from pathlib import Path +from typing import Any, Dict, List, Optional, Set + +from kfinance.client.permission_models import Permission +from kfinance.integrations.tool_calling.prompts import BASE_PROMPT + +from .constructor import DynamicPromptConstructor +from .models import ToolExample +from .repository import ExampleRepository +from .search import SimilaritySearchEngine + + +logger = logging.getLogger(__name__) + + +class DynamicPromptManager: + """Manager class for integrating dynamic prompt construction with the tool calling system.""" + + def __init__( + self, + examples_dir: Optional[Path] = None, + embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2", + enable_caching: bool = True, + cache_dir: Optional[Path] = None, + ): + """Initialize the dynamic prompt manager. + + Args: + examples_dir: Directory containing example JSON files + embedding_model: Name of the sentence transformer model + enable_caching: Whether to enable embedding caching + cache_dir: Directory to store embedding cache files + """ + self.examples_dir = examples_dir + self.embedding_model = embedding_model + self.enable_caching = enable_caching + self.cache_dir = cache_dir + + # Initialize components + self._repository: Optional[ExampleRepository] = None + self._similarity_engine: Optional[SimilaritySearchEngine] = None + self._prompt_constructor: Optional[DynamicPromptConstructor] = None + + # Lazy initialization flag + self._initialized = False + + def _initialize(self) -> None: + """Lazy initialization of components.""" + if self._initialized: + return + + try: + # Initialize example repository + self._repository = ExampleRepository( + examples_dir=self.examples_dir, + embedding_model=self.embedding_model, + cache_embeddings=self.enable_caching, + cache_dir=self.cache_dir, + ) + + # Initialize similarity search engine + self._similarity_engine = SimilaritySearchEngine( + embedding_model=self._repository.embedding_model, + ) + + # Initialize prompt constructor + self._prompt_constructor = DynamicPromptConstructor( + example_repository=self._repository, + similarity_engine=self._similarity_engine, + ) + + self._initialized = True + logger.info("Dynamic prompt manager initialized successfully") + + except (OSError, ImportError, RuntimeError, ValueError) as e: + logger.error("Failed to initialize dynamic prompt manager: %s", e) + # Fall back to None components - will use static prompts + self._repository = None + self._similarity_engine = None + self._prompt_constructor = None + + def construct_dynamic_prompt( + self, + query: str, + user_permissions: Set[Permission], + available_tools: Optional[List[str]] = None, + min_similarity: float = 0.3, + ) -> str: + """Construct a dynamic prompt for the given query. + + Args: + query: User query + user_permissions: User's permissions + available_tools: List of available tool names + min_similarity: Minimum similarity threshold + + Returns: + Dynamic prompt string, or base prompt if dynamic construction fails + """ + self._initialize() + + if not self._prompt_constructor: + # Fall back to base prompt if initialization failed + logger.warning("Dynamic prompt construction not available, using base prompt") + return BASE_PROMPT + + try: + return self._prompt_constructor.construct_prompt( + query=query, + user_permissions=user_permissions, + available_tools=available_tools, + min_similarity=min_similarity, + ) + except (RuntimeError, ValueError, AttributeError) as e: + logger.error("Failed to construct dynamic prompt: %s", e) + # Fall back to base prompt + return BASE_PROMPT + + def get_prompt_with_stats( + self, + query: str, + user_permissions: Set[Permission], + available_tools: Optional[List[str]] = None, + min_similarity: float = 0.3, + ) -> tuple[str, dict]: + """Get dynamic prompt with statistics. + + Returns: + Tuple of (prompt, statistics_dict) + """ + self._initialize() + + if not self._prompt_constructor: + return BASE_PROMPT, {"error": "Dynamic prompt construction not available"} + + try: + return self._prompt_constructor.construct_prompt_with_stats( + query=query, + user_permissions=user_permissions, + available_tools=available_tools, + min_similarity=min_similarity, + ) + except (RuntimeError, ValueError, AttributeError) as e: + logger.error("Failed to construct dynamic prompt with stats: %s", e) + return BASE_PROMPT, {"error": str(e)} + + def search_similar_examples( + self, + query: str, + user_permissions: Set[Permission], + top_k: int = 5, + ) -> List[dict]: + """Search for similar examples (useful for debugging/analysis). + + Returns: + List of example dictionaries with similarity scores + """ + self._initialize() + + if not self._repository or not self._similarity_engine: + return [] + + try: + similarity_results = self._similarity_engine.search_examples( + query=query, + examples=self._repository.examples, + user_permissions=user_permissions, + top_k=top_k, + ) + + # Convert to serializable format + results = [] + for similarity, example in similarity_results: + result = example.to_dict() + result["similarity_score"] = float(similarity) + results.append(result) + + return results + + except (RuntimeError, ValueError, AttributeError) as e: + logger.error("Failed to search similar examples: %s", e) + return [] + + def add_example_from_query( + self, + query: str, + tool_name: str, + parameters: dict, + context: str, + permissions_required: Set[Permission], + disambiguation_note: Optional[str] = None, + tags: Optional[List[str]] = None, + ) -> bool: + """Add a new example from a successful query (for continuous learning). + + Returns: + True if example was added successfully, False otherwise + """ + self._initialize() + + if not self._repository: + return False + + try: + example = ToolExample( + query=query, + tool_name=tool_name, + parameters=parameters, + context=context, + permissions_required=permissions_required, + disambiguation_note=disambiguation_note, + tags=tags or [], + ) + + self._repository.add_example(example) + logger.info("Added new example for tool %s", tool_name) + return True + + except (ValueError, AttributeError, TypeError) as e: + logger.error("Failed to add example: %s", e) + return False + + def get_repository_stats(self) -> Dict[str, Any]: + """Get statistics about the example repository. + + Returns: + Dictionary with repository statistics + """ + self._initialize() + + if not self._repository: + return {"error": "Repository not available"} + + try: + stats: Dict[str, Any] = { + "total_examples": len(self._repository.examples), + "examples_by_tool": {}, + "total_parameter_descriptors": sum( + len(descriptors) + for descriptors in self._repository.parameter_descriptors.values() + ), + "tools_with_descriptors": len(self._repository.parameter_descriptors), + } + + # Count examples by tool + for example in self._repository.examples: + tool_name = example.tool_name + if tool_name not in stats["examples_by_tool"]: + stats["examples_by_tool"][tool_name] = 0 + stats["examples_by_tool"][tool_name] += 1 + + # Add cache statistics if available + cache_stats = self._repository.get_cache_stats() + if "error" not in cache_stats: + stats["cache"] = cache_stats + + return stats + + except (RuntimeError, ValueError, AttributeError) as e: + logger.error("Failed to get repository stats: %s", e) + return {"error": str(e)} + + def precompute_embeddings(self, force_recompute: bool = False) -> bool: + """Precompute embeddings for all examples. + + Args: + force_recompute: Whether to force recomputation of all embeddings + + Returns: + True if successful, False otherwise + """ + self._initialize() + + if not self._repository: + return False + + try: + self._repository.precompute_embeddings(force_recompute=force_recompute) + logger.info("Successfully precomputed embeddings") + return True + except (RuntimeError, ValueError, OSError) as e: + logger.error("Failed to precompute embeddings: %s", e) + return False + + def get_cache_stats(self) -> dict: + """Get embedding cache statistics. + + Returns: + Dictionary with cache statistics + """ + self._initialize() + + if not self._repository: + return {"error": "Repository not available"} + + return self._repository.get_cache_stats() + + def invalidate_cache(self) -> bool: + """Invalidate the embedding cache. + + Returns: + True if successful, False otherwise + """ + self._initialize() + + if not self._repository: + return False + + try: + self._repository.invalidate_cache() + logger.info("Successfully invalidated cache") + return True + except (RuntimeError, OSError) as e: + logger.error("Failed to invalidate cache: %s", e) + return False + + +# Global instance for easy access +class _GlobalManagerHolder: + """Holder for global prompt manager instance.""" + + _instance: Optional[DynamicPromptManager] = None + + @classmethod + def get_instance(cls) -> DynamicPromptManager: + """Get the global dynamic prompt manager instance.""" + if cls._instance is None: + cls._instance = DynamicPromptManager() + return cls._instance + + +def get_dynamic_prompt_manager() -> DynamicPromptManager: + """Get the global dynamic prompt manager instance.""" + return _GlobalManagerHolder.get_instance() + + +def construct_dynamic_prompt( + query: str, + user_permissions: Set[Permission], + available_tools: Optional[List[str]] = None, +) -> str: + """Convenience function to construct a dynamic prompt. + + This is the main entry point for integrating with existing tool calling code. + """ + manager = get_dynamic_prompt_manager() + return manager.construct_dynamic_prompt(query, user_permissions, available_tools) diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/core/models.py b/kfinance/integrations/tool_calling/dynamic_prompts/core/models.py new file mode 100644 index 0000000..fa83224 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/core/models.py @@ -0,0 +1,151 @@ +"""Data models for dynamic prompt construction with integrated permission resolution.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +import logging +from typing import Any, Dict, List, Optional, Set + +import numpy as np + + +logger = logging.getLogger(__name__) + +# Import Permission model with fallback for development +try: + from kfinance.client.permission_models import Permission +except ImportError: + logger.warning("Could not import Permission model - using mock for development") + from enum import Enum + + class Permission(Enum): # type: ignore[no-redef] + StatementsPermission = "StatementsPermission" + PricingPermission = "PricingPermission" + EarningsPermission = "EarningsPermission" + MergersPermission = "MergersPermission" + CompanyIntelligencePermission = "CompanyIntelligencePermission" + RelationshipPermission = "RelationshipPermission" + SegmentsPermission = "SegmentsPermission" + IDPermission = "IDPermission" + CompetitorsPermission = "CompetitorsPermission" + TranscriptsPermission = "TranscriptsPermission" + + +# Permission mapping for resolving references +PERMISSION_MAPPING = { + # Financial data permissions + "STATEMENTS": Permission.StatementsPermission, + "PRICING": Permission.PricingPermission, + "EARNINGS": Permission.EarningsPermission, + # Company data permissions + "COMPANY_INTELLIGENCE": Permission.CompanyIntelligencePermission, + "MERGERS": Permission.MergersPermission, + "RELATIONSHIPS": Permission.RelationshipPermission, + "SEGMENTS": Permission.SegmentsPermission, + "COMPETITORS": Permission.CompetitorsPermission, + # Identifier and utility permissions + "ID": Permission.IDPermission, + "TRANSCRIPTS": Permission.TranscriptsPermission, + # Handle both singular and plural forms + "RELATIONSHIP": Permission.RelationshipPermission, +} + + +def resolve_permissions(permission_refs: List[str]) -> Set[Permission]: + """Resolve permission reference strings to Permission enum values. + + Args: + permission_refs: List of permission reference strings (e.g., ["STATEMENTS", "PRICING"]) + + Returns: + Set of Permission enum values + """ + resolved_permissions = set() + + for ref in permission_refs: + if ref in PERMISSION_MAPPING: + resolved_permissions.add(PERMISSION_MAPPING[ref]) + else: + # Try to resolve as direct Permission enum value + try: + resolved_permissions.add(Permission(ref)) + except (ValueError, AttributeError): + logger.warning("Could not resolve permission reference: %s", ref) + + return resolved_permissions + + +@dataclass +class ToolExample: + """Represents a single tool usage example with context and embeddings.""" + + query: str + tool_name: str + parameters: Dict[str, Any] + context: str + permissions_required: Set[Permission] + embedding: Optional[np.ndarray] = field(default=None, repr=False) + disambiguation_note: Optional[str] = None + tags: List[str] = field(default_factory=list) + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization (excluding embedding).""" + return { + "query": self.query, + "tool_name": self.tool_name, + "parameters": self.parameters, + "context": self.context, + "permissions_required": [p.value for p in self.permissions_required], + "disambiguation_note": self.disambiguation_note, + "tags": self.tags, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "ToolExample": + """Create ToolExample from dictionary with integrated permission resolution.""" + permission_refs = data.get("permissions_required", []) + permissions = resolve_permissions(permission_refs) + return cls( + query=data["query"], + tool_name=data["tool_name"], + parameters=data["parameters"], + context=data["context"], + permissions_required=permissions, + disambiguation_note=data.get("disambiguation_note"), + tags=data.get("tags", []), + ) + + +@dataclass +class ParameterDescriptor: + """Enhanced parameter description for disambiguation.""" + + parameter_name: str + tool_name: str + description: str + examples: List[str] + common_mistakes: List[str] = field(default_factory=list) + related_parameters: List[str] = field(default_factory=list) + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + return { + "parameter_name": self.parameter_name, + "tool_name": self.tool_name, + "description": self.description, + "examples": self.examples, + "common_mistakes": self.common_mistakes, + "related_parameters": self.related_parameters, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "ParameterDescriptor": + """Create ParameterDescriptor from dictionary.""" + return cls( + parameter_name=data["parameter_name"], + tool_name=data["tool_name"], + description=data["description"], + examples=data["examples"], + common_mistakes=data.get("common_mistakes", []), + related_parameters=data.get("related_parameters", []), + ) diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/core/repository.py b/kfinance/integrations/tool_calling/dynamic_prompts/core/repository.py new file mode 100644 index 0000000..1f11087 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/core/repository.py @@ -0,0 +1,317 @@ +"""Example repository for storing and managing tool usage examples and parameter descriptors.""" + +from __future__ import annotations + +import json +import logging +from pathlib import Path +from typing import Any, Dict, List, Optional, Set, Tuple + +import numpy as np + +from kfinance.client.permission_models import Permission + +from ..processing.entities import EntityProcessor +from .cache import EmbeddingCache +from .models import ParameterDescriptor, ToolExample + + +logger = logging.getLogger(__name__) + + +class ExampleRepository: + """Repository for managing tool usage examples and parameter descriptors.""" + + def __init__( + self, + examples_dir: Optional[Path] = None, + embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2", + cache_embeddings: bool = True, + cache_dir: Optional[Path] = None, + ): + """Initialize the example repository. + + Args: + examples_dir: Directory containing example JSON files + embedding_model: Name of the sentence transformer model to use + cache_embeddings: Whether to cache computed embeddings + cache_dir: Directory to store embedding cache files + """ + self.examples_dir = examples_dir or self._get_default_examples_dir() + self.embedding_model_name = embedding_model + self.cache_embeddings = cache_embeddings + + # Initialize embedding cache + self.embedding_cache = ( + EmbeddingCache( + cache_dir=cache_dir, + embedding_model_name=embedding_model, + ) + if cache_embeddings + else None + ) + + # Get embedding model from cache (lazy loaded) + self.embedding_model = ( + self.embedding_cache.embedding_model if self.embedding_cache else None + ) + + # Entity processing + self.entity_processor = EntityProcessor() + + # Storage + self.examples: List[ToolExample] = [] + self.parameter_descriptors: Dict[str, List[ParameterDescriptor]] = {} + + # Load examples and descriptors + self._load_examples() + self._load_parameter_descriptors() + + # Load or compute embeddings + if self.embedding_cache and self.examples: + self._load_or_compute_embeddings() + + def _get_default_examples_dir(self) -> Path: + """Get the default examples directory.""" + # Go up one level from core/ to the main dynamic_prompts directory + current_dir = Path(__file__).parent.parent + return current_dir / "tool_examples" + + def _load_examples(self) -> None: + """Load tool usage examples from JSON files.""" + if not self.examples_dir.exists(): + logger.warning("Examples directory not found: %s", self.examples_dir) + return + + for json_file in self.examples_dir.glob("*_examples.json"): + try: + with open(json_file, "r") as f: + data = json.load(f) + + total_examples_loaded = 0 + + # Handle both old format (examples at top level) and new format (tools with examples) + if "examples" in data: + # Old format: {"examples": [...]} + for example_data in data["examples"]: + example = ToolExample.from_dict(example_data) + self.examples.append(example) + total_examples_loaded += 1 + elif "tools" in data: + # New format: {"tools": [{"tool_name": "...", "examples": [...]}]} + for tool_data in data["tools"]: + for example_data in tool_data.get("examples", []): + example = ToolExample.from_dict(example_data) + self.examples.append(example) + total_examples_loaded += 1 + + logger.debug("Loaded %d examples from %s", total_examples_loaded, json_file.name) + + except (OSError, json.JSONDecodeError, KeyError, ValueError) as e: + logger.error("Failed to load examples from %s: %s", json_file, e) + + def _load_parameter_descriptors(self) -> None: + """Load parameter descriptors from JSON files.""" + descriptors_dir = self.examples_dir.parent / "parameter_descriptors" + + if not descriptors_dir.exists(): + logger.warning("Parameter descriptors directory not found: %s", descriptors_dir) + return + + for json_file in descriptors_dir.glob("*_params.json"): + try: + with open(json_file, "r") as f: + data = json.load(f) + + # Handle both old format (tool_name at top level) and new format (tools array) + if "tool_name" in data: + # Old format: {"tool_name": "...", "parameters": [...]} + tool_name = data.get("tool_name") + if tool_name: + descriptors = [] + for param_data in data.get("parameters", []): + descriptor = ParameterDescriptor.from_dict(param_data) + descriptors.append(descriptor) + self.parameter_descriptors[tool_name] = descriptors + logger.debug( + "Loaded %d parameter descriptors for %s", len(descriptors), tool_name + ) + + elif "tools" in data: + # New format: {"tools": [{"tool_name": "...", "parameters": [...]}]} + for tool_data in data["tools"]: + tool_name = tool_data.get("tool_name") + if not tool_name: + continue + + descriptors = [] + for param_data in tool_data.get("parameters", []): + descriptor = ParameterDescriptor.from_dict(param_data) + descriptors.append(descriptor) + + self.parameter_descriptors[tool_name] = descriptors + logger.debug( + "Loaded %d parameter descriptors for %s", len(descriptors), tool_name + ) + + except (OSError, json.JSONDecodeError, KeyError, ValueError) as e: + logger.error("Failed to load parameter descriptors from %s: %s", json_file, e) + + def _load_or_compute_embeddings(self) -> None: + """Load cached embeddings or compute new ones as needed.""" + if not self.embedding_cache: + return + + try: + # Use embedding cache to get or compute embeddings + self.examples = self.embedding_cache.get_or_compute_embeddings(self.examples) + + # Clean up orphaned embeddings + self.embedding_cache.cleanup_orphaned_embeddings(self.examples) + + logger.debug("Loaded/computed embeddings for %d examples", len(self.examples)) + + except (RuntimeError, ValueError, OSError) as e: + logger.error("Failed to load/compute embeddings: %s", e) + + def normalize_query_for_search(self, query: str) -> Tuple[str, Dict[str, str]]: + """Normalize a query for semantic search by replacing entities with placeholders. + + Args: + query: Original user query + + Returns: + Tuple of (normalized_query, entity_mapping) + """ + return self.entity_processor.process_query(query) + + def search_examples( + self, + query: str, + user_permissions: Set[Permission], + tool_names: Optional[List[str]] = None, + top_k: int = 5, + min_similarity: float = 0.3, + ) -> List[ToolExample]: + """Search for relevant examples using cosine similarity. + + Args: + query: User query to search for + user_permissions: User's permissions for filtering + tool_names: Optional list of tool names to filter by + top_k: Maximum number of examples to return + min_similarity: Minimum similarity threshold + + Returns: + List of relevant tool examples, sorted by similarity + """ + if not self.embedding_model or not self.examples: + return [] + + # Normalize query to reduce entity bias + normalized_query, entity_mapping = self.normalize_query_for_search(query) + + # Filter examples by permissions and tool names + filtered_examples = [] + for example in self.examples: + # Check permissions - user needs at least one of the required permissions + if not example.permissions_required.intersection(user_permissions): + continue + + # Check tool names if specified + if tool_names and example.tool_name not in tool_names: + continue + + filtered_examples.append(example) + + if not filtered_examples: + return [] + + # Compute query embedding using normalized query + try: + query_embedding = self.embedding_model.encode([normalized_query])[0] + except (RuntimeError, ValueError, OSError) as e: + logger.error("Failed to compute query embedding: %s", e) + return [] + + # Calculate similarities + similarities = [] + for example in filtered_examples: + if example.embedding is not None: + similarity = np.dot(query_embedding, example.embedding) / ( + np.linalg.norm(query_embedding) * np.linalg.norm(example.embedding) + ) + if similarity >= min_similarity: + similarities.append((similarity, example)) + + # Sort by similarity and return top_k + similarities.sort(key=lambda x: x[0], reverse=True) + return [example for _, example in similarities[:top_k]] + + def get_parameter_descriptors(self, tool_name: str) -> List[ParameterDescriptor]: + """Get parameter descriptors for a specific tool.""" + return self.parameter_descriptors.get(tool_name, []) + + def add_example(self, example: ToolExample) -> None: + """Add a new example to the repository.""" + # Add to examples list first + self.examples.append(example) + + # Compute embedding using cache if available + if self.embedding_cache: + try: + # Get embedding for the new example + examples_with_embeddings = self.embedding_cache.get_or_compute_embeddings([example]) + if examples_with_embeddings and examples_with_embeddings[0].embedding is not None: + example.embedding = examples_with_embeddings[0].embedding + except (RuntimeError, ValueError, OSError) as e: + logger.error("Failed to compute embedding for new example: %s", e) + elif self.embedding_model: + # Fallback to direct computation if no cache + try: + embedding = self.embedding_model.encode([example.query])[0] + example.embedding = embedding + except (RuntimeError, ValueError, OSError) as e: + logger.error("Failed to compute embedding for new example (fallback): %s", e) + + def save_examples(self, output_file: Path) -> None: + """Save examples to a JSON file.""" + data = {"examples": [example.to_dict() for example in self.examples]} + + with open(output_file, "w") as f: + json.dump(data, f, indent=2, default=str) + + logger.info("Saved %d examples to %s", len(self.examples), output_file) + + def precompute_embeddings(self, force_recompute: bool = False) -> None: + """Precompute embeddings for all examples. + + Args: + force_recompute: Whether to force recomputation of all embeddings + """ + if not self.embedding_cache: + logger.warning("No embedding cache available for precomputation") + return + + self.examples = self.embedding_cache.get_or_compute_embeddings( + self.examples, force_recompute=force_recompute + ) + logger.info("Precomputed embeddings for %d examples", len(self.examples)) + + def get_cache_stats(self) -> Dict[str, Any]: + """Get embedding cache statistics.""" + if not self.embedding_cache: + return {"error": "No embedding cache available"} + + return self.embedding_cache.get_cache_stats() + + def invalidate_cache(self) -> None: + """Invalidate the embedding cache.""" + if self.embedding_cache: + self.embedding_cache.invalidate_cache() + # Clear embeddings from examples + for example in self.examples: + example.embedding = None + logger.info("Invalidated embedding cache") + else: + logger.warning("No embedding cache to invalidate") diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/core/search.py b/kfinance/integrations/tool_calling/dynamic_prompts/core/search.py new file mode 100644 index 0000000..8497715 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/core/search.py @@ -0,0 +1,265 @@ +"""Similarity search engine for finding relevant tool examples.""" + +from __future__ import annotations + +import logging +from typing import List, Optional, Set, Tuple + +import numpy as np +from sentence_transformers import SentenceTransformer + +from kfinance.client.permission_models import Permission + +from .models import ToolExample + + +logger = logging.getLogger(__name__) + + +class SimilaritySearchEngine: + """Engine for performing similarity-based search on tool examples.""" + + def __init__( + self, + embedding_model: Optional[SentenceTransformer] = None, + embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2", + ): + """Initialize the similarity search engine. + + Args: + embedding_model: Pre-initialized embedding model + embedding_model_name: Name of the model to load if embedding_model is None + """ + if embedding_model: + self.embedding_model = embedding_model + else: + try: + self.embedding_model = SentenceTransformer(embedding_model_name) + except (OSError, ImportError, RuntimeError) as e: + logger.error("Failed to load embedding model %s: %s", embedding_model_name, e) + self.embedding_model = None + + def compute_similarity( + self, query_embedding: np.ndarray, example_embedding: np.ndarray + ) -> float: + """Compute cosine similarity between two embeddings. + + Args: + query_embedding: Query embedding vector + example_embedding: Example embedding vector + + Returns: + Cosine similarity score between 0 and 1 + """ + try: + similarity = np.dot(query_embedding, example_embedding) / ( + np.linalg.norm(query_embedding) * np.linalg.norm(example_embedding) + ) + # Ensure similarity is between 0 and 1 + return max(0.0, min(1.0, similarity)) + except (ValueError, ZeroDivisionError, RuntimeError) as e: + logger.error("Failed to compute similarity: %s", e) + return 0.0 + + def search_examples( + self, + query: str, + examples: List[ToolExample], + user_permissions: Set[Permission], + tool_names: Optional[List[str]] = None, + top_k: int = 5, + min_similarity: float = 0.3, + boost_exact_matches: bool = True, + ) -> List[Tuple[float, ToolExample]]: + """Search for relevant examples using cosine similarity. + + Args: + query: User query to search for + examples: List of tool examples to search through + user_permissions: User's permissions for filtering + tool_names: Optional list of tool names to filter by + top_k: Maximum number of examples to return + min_similarity: Minimum similarity threshold + boost_exact_matches: Whether to boost examples with exact keyword matches + + Returns: + List of (similarity_score, example) tuples, sorted by similarity + """ + if not self.embedding_model: + logger.warning("No embedding model available for similarity search") + return [] + + # Filter examples by permissions and tool names + filtered_examples = self._filter_examples(examples, user_permissions, tool_names) + + if not filtered_examples: + return [] + + # Compute query embedding + try: + query_embedding = self.embedding_model.encode([query])[0] + except (RuntimeError, ValueError, OSError) as e: + logger.error("Failed to compute query embedding: %s", e) + return [] + + # Calculate similarities + similarities = [] + query_lower = query.lower() + + for example in filtered_examples: + if example.embedding is None: + continue + + # Compute base similarity + similarity = self.compute_similarity(query_embedding, example.embedding) + + # Apply boosting for exact matches if enabled + if boost_exact_matches: + similarity = self._apply_similarity_boosting(similarity, query_lower, example) + + if similarity >= min_similarity: + similarities.append((similarity, example)) + + # Sort by similarity and return top_k + similarities.sort(key=lambda x: x[0], reverse=True) + return similarities[:top_k] + + def _filter_examples( + self, + examples: List[ToolExample], + user_permissions: Set[Permission], + tool_names: Optional[List[str]] = None, + ) -> List[ToolExample]: + """Filter examples by permissions and tool names.""" + filtered_examples = [] + + for example in examples: + # Check permissions - user needs at least one of the required permissions + if not example.permissions_required.intersection(user_permissions): + continue + + # Check tool names if specified + if tool_names and example.tool_name not in tool_names: + continue + + filtered_examples.append(example) + + return filtered_examples + + def _apply_similarity_boosting( + self, + base_similarity: float, + query_lower: str, + example: ToolExample, + ) -> float: + """Apply boosting to similarity score based on exact matches and other factors. + + Args: + base_similarity: Base cosine similarity score + query_lower: Lowercase query string + example: Tool example to check for matches + + Returns: + Boosted similarity score + """ + boost_factor = 1.0 + + # Boost for exact keyword matches in query + example_query_lower = example.query.lower() + query_words = set(query_lower.split()) + example_words = set(example_query_lower.split()) + + # Calculate word overlap + word_overlap = len(query_words.intersection(example_words)) + if word_overlap > 0: + overlap_ratio = word_overlap / len(query_words) + boost_factor += overlap_ratio * 0.2 # Up to 20% boost for word overlap + + # Boost for parameter name matches + for param_name, param_value in example.parameters.items(): + if isinstance(param_value, str) and param_value.lower() in query_lower: + boost_factor += 0.1 # 10% boost for parameter matches + + # Boost for tag matches + for tag in example.tags: + if tag.lower() in query_lower: + boost_factor += 0.05 # 5% boost per tag match + + # Apply boost but cap at reasonable maximum + boosted_similarity = base_similarity * boost_factor + return min(boosted_similarity, 1.0) + + def find_similar_parameters( + self, + parameter_name: str, + tool_name: str, + examples: List[ToolExample], + threshold: float = 0.7, + ) -> List[Tuple[str, float]]: + """Find parameters similar to the given parameter name. + + This is useful for parameter disambiguation and suggesting alternatives. + + Args: + parameter_name: Name of the parameter to find similar ones for + tool_name: Name of the tool + examples: List of examples to search through + threshold: Similarity threshold for matches + + Returns: + List of (parameter_name, similarity) tuples + """ + if not self.embedding_model: + return [] + + try: + param_embedding = self.embedding_model.encode([parameter_name])[0] + except (RuntimeError, ValueError, OSError) as e: + logger.error("Failed to compute parameter embedding: %s", e) + return [] + + similar_params = [] + seen_params = set() + + for example in examples: + if example.tool_name != tool_name: + continue + + for param_name, param_value in example.parameters.items(): + if param_name in seen_params: + continue + + seen_params.add(param_name) + + try: + param_name_embedding = self.embedding_model.encode([param_name])[0] + similarity = self.compute_similarity(param_embedding, param_name_embedding) + + if similarity >= threshold and param_name != parameter_name: + similar_params.append((param_name, similarity)) + + except (RuntimeError, ValueError, ZeroDivisionError) as e: + logger.error("Failed to compute similarity for parameter %s: %s", param_name, e) + continue + + # Sort by similarity + similar_params.sort(key=lambda x: x[1], reverse=True) + return similar_params + + def encode_text(self, text: str) -> Optional[np.ndarray]: + """Encode text into embedding vector. + + Args: + text: Text to encode + + Returns: + Embedding vector or None if encoding fails + """ + if not self.embedding_model: + return None + + try: + return self.embedding_model.encode([text])[0] + except (RuntimeError, ValueError, OSError) as e: + logger.error("Failed to encode text: %s", e) + return None diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/examples/__init__.py b/kfinance/integrations/tool_calling/dynamic_prompts/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/examples/demo.py b/kfinance/integrations/tool_calling/dynamic_prompts/examples/demo.py new file mode 100644 index 0000000..1bad611 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/examples/demo.py @@ -0,0 +1,275 @@ +"""Demonstration of dynamic prompt construction capabilities.""" + +import logging +import time +from typing import Set + +from kfinance.client.permission_models import Permission +from kfinance.integrations.tool_calling.prompts import BASE_PROMPT + +from ..core.manager import DynamicPromptManager + + +logger = logging.getLogger(__name__) + + +def demo_dynamic_prompt_construction() -> None: + """Demonstrate dynamic prompt construction with various queries.""" + + # Initialize the dynamic prompt manager with caching enabled + manager = DynamicPromptManager(enable_caching=True) + + # Show cache statistics + logger.info("Initial Cache Statistics:") + cache_stats = manager.get_cache_stats() + if "error" not in cache_stats: + for key, value in cache_stats.items(): + logger.info(" %s: %s", key, value) + else: + logger.info(" Cache not available or error occurred") + + # Example user permissions (assuming user has statements permission) + user_permissions: Set[Permission] = {Permission.StatementsPermission} + + # Test queries that should trigger different examples + test_queries = [ + "What is the preferred stock additional paid in capital for Apple?", + "Show me the total revenue for Google and Amazon", + "Get quarterly revenue for Tesla in Q1 2023", + "What are the total receivables for Microsoft?", + "Show me depreciation and amortization for Ford", + "What is the debt to equity ratio for JPMorgan?", + "Get the convertible preferred stock for Netflix", + ] + + for i, query in enumerate(test_queries, 1): + logger.info("Test Query %d: %s", i, query) + logger.info("-" * 60) + + # Construct dynamic prompt with statistics + prompt, stats = manager.get_prompt_with_stats( + query=query, + user_permissions=user_permissions, + ) + + logger.info("Prompt Stats:") + for key, value in stats.items(): + logger.info(" %s: %s", key, value) + + # Show relevant examples found + similar_examples = manager.search_similar_examples( + query=query, + user_permissions=user_permissions, + top_k=3, + ) + + logger.info("Similar Examples:") + if similar_examples: + for j, example in enumerate(similar_examples, 1): + similarity = example.get("similarity_score", 0) + example_query = example.get("query", "Unknown") + logger.info(" %d. %s (similarity: %.3f)", j, example_query, similarity) + else: + logger.info(" No similar examples found") + + # Show repository statistics + logger.info("Final Repository Statistics:") + repo_stats = manager.get_repository_stats() + for key, value in repo_stats.items(): + if isinstance(value, dict): + logger.info(" %s:", key) + for sub_key, sub_value in value.items(): + logger.info(" %s: %s", sub_key, sub_value) + else: + logger.info(" %s: %s", key, value) + + +def demo_prompt_comparison() -> None: + """Compare static vs dynamic prompts for specific queries.""" + + logger.info("Comparing Static vs Dynamic Prompts") + logger.info("=" * 50) + + # Use base prompt for comparison + manager = DynamicPromptManager() + user_permissions: Set[Permission] = {Permission.StatementsPermission} + + # Test with a query that should benefit from dynamic examples + test_query = "What is the preferred stock additional paid in capital for Apple?" + + # Dynamic prompt + dynamic_prompt, stats = manager.get_prompt_with_stats( + query=test_query, + user_permissions=user_permissions, + ) + + # Show the difference in token usage (approximate) + base_tokens = len(BASE_PROMPT.split()) + dynamic_tokens = len(dynamic_prompt.split()) + + logger.info("Token Comparison:") + logger.info(" Base prompt tokens: %d", base_tokens) + logger.info(" Dynamic prompt tokens: %d", dynamic_tokens) + logger.info( + " Token difference: %d (%.1f%% change)", + dynamic_tokens - base_tokens, + ((dynamic_tokens - base_tokens) / base_tokens * 100) if base_tokens > 0 else 0, + ) + + +def demo_parameter_disambiguation() -> None: + """Demonstrate how the system helps with parameter disambiguation.""" + + manager = DynamicPromptManager() + user_permissions: Set[Permission] = {Permission.StatementsPermission} + + # Queries that commonly cause parameter confusion + disambiguation_tests = [ + { + "query": "What is the convertible preferred stock for Tesla?", + "correct_param": "preferred_stock_convertible", + "common_mistake": "convertible_preferred_stock", + }, + { + "query": "Show me the total debt to equity ratio for JPMorgan", + "correct_param": "total_debt_to_equity", + "common_mistake": "total_debt_to_equity_ratio", + }, + { + "query": "Get the total receivables for Microsoft", + "correct_param": "total_receivable", + "common_mistake": "total_receivables", + }, + ] + + logger.info("Parameter Disambiguation Demo") + logger.info("=" * 50) + + for i, test in enumerate(disambiguation_tests, 1): + logger.info("Test %d: %s", i, test["query"]) + logger.info(" Correct parameter: %s", test["correct_param"]) + logger.info(" Common mistake: %s", test["common_mistake"]) + + # Find similar examples + similar_examples = manager.search_similar_examples( + query=test["query"], + user_permissions=user_permissions, + top_k=2, + ) + + if similar_examples: + logger.info(" Similar examples found:") + for example in similar_examples: + if test["correct_param"] in str(example.get("parameters", {})): + if example.get("disambiguation_note"): + logger.info( + " Found disambiguation guidance: %s", + example.get("disambiguation_note"), + ) + else: + logger.info(" Found correct parameter usage: %s", test["correct_param"]) + break + + +def demo_permission_filtering() -> None: + """Demonstrate how permission filtering works.""" + + manager = DynamicPromptManager() + + # Test with different permission sets + permission_sets = [ + {Permission.StatementsPermission}, + {Permission.StatementsPermission, Permission.PrivateCompanyFinancialsPermission}, + set(), # No permissions + ] + + test_query = "What is the revenue for Apple?" + + logger.info("Permission Filtering Demo") + logger.info("=" * 50) + + for i, permissions in enumerate(permission_sets, 1): + perm_names = [p.name for p in permissions] if permissions else ["No permissions"] + logger.info("Test %d - Permissions: %s", i, ", ".join(perm_names)) + + similar_examples = manager.search_similar_examples( + query=test_query, + user_permissions=permissions, + top_k=5, + ) + + if similar_examples: + logger.info(" Found %d examples:", len(similar_examples)) + for example in similar_examples[:2]: # Show first 2 + required_perms = example.get("permissions_required", []) + example_query = example.get("query", "Unknown") + logger.info(" - %s (requires: %s)", example_query, required_perms) + else: + logger.info(" No examples found with these permissions") + + +def demo_embedding_cache() -> None: + """Demonstrate embedding cache functionality.""" + + # Initialize manager with caching + manager = DynamicPromptManager(enable_caching=True) + + logger.info("Initial Cache Stats:") + stats = manager.get_cache_stats() + for key, value in stats.items(): + logger.info(" %s: %s", key, value) + + # Test query to trigger embedding computation + test_query = "What is the preferred stock additional paid in capital for Apple?" + user_permissions = {Permission.StatementsPermission} + + # First call - may compute embeddings + logger.info("First call (may compute embeddings)...") + start_time = time.time() + prompt1, stats1 = manager.get_prompt_with_stats(test_query, user_permissions) + first_call_time = time.time() - start_time + + # Second call - should use cached embeddings + logger.info("Second call (should use cache)...") + start_time = time.time() + prompt2, stats2 = manager.get_prompt_with_stats(test_query, user_permissions) + second_call_time = time.time() - start_time + + if first_call_time > 0: + speedup = first_call_time / second_call_time if second_call_time > 0 else float("inf") + logger.info("Performance Comparison:") + logger.info(" First call: %.3fs", first_call_time) + logger.info(" Second call: %.3fs", second_call_time) + logger.info(" Speedup: %.1fx", speedup) + + # Show updated cache statistics + logger.info("Updated Cache Stats:") + updated_stats = manager.get_cache_stats() + for key, value in updated_stats.items(): + logger.info(" %s: %s", key, value) + + # Demonstrate precomputation + logger.info("Precomputing embeddings...") + if manager.precompute_embeddings(): + logger.info(" Precomputation successful") + final_stats = manager.get_cache_stats() + logger.info("Final Cache Stats:") + for key, value in final_stats.items(): + logger.info(" %s: %s", key, value) + else: + logger.info(" Precomputation failed") + + +if __name__ == "__main__": + """Run all demonstrations.""" + try: + demo_dynamic_prompt_construction() + demo_prompt_comparison() + demo_parameter_disambiguation() + demo_permission_filtering() + demo_embedding_cache() + + except (RuntimeError, ValueError, OSError, ImportError): + import traceback + + traceback.print_exc() diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/examples/integration_example.py b/kfinance/integrations/tool_calling/dynamic_prompts/examples/integration_example.py new file mode 100644 index 0000000..52e5c29 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/examples/integration_example.py @@ -0,0 +1,233 @@ +"""Example of integrating dynamic prompts with existing tool calling systems.""" + +import logging +from typing import List, Set + +from kfinance.client.permission_models import Permission +from kfinance.integrations.tool_calling.prompts import BASE_PROMPT + +from ..core.manager import construct_dynamic_prompt + + +logger = logging.getLogger(__name__) + +# Mock tools for demonstration +ALL_TOOLS = [ + type( + "GetFinancialLineItemFromIdentifiers", + (), + { + "accepted_permissions": {Permission.StatementsPermission}, + "__name__": "get_financial_line_item_from_identifiers", + }, + ), + type( + "GetFinancialStatementFromIdentifiers", + (), + { + "accepted_permissions": {Permission.StatementsPermission}, + "__name__": "get_financial_statement_from_identifiers", + }, + ), +] + + +class EnhancedToolCaller: + """Enhanced tool caller that uses dynamic prompt construction.""" + + def __init__( + self, + user_permissions: Set[Permission], + enable_dynamic_prompts: bool = True, + fallback_to_static: bool = True, + ): + """Initialize the enhanced tool caller. + + Args: + user_permissions: User's permissions for tool access + enable_dynamic_prompts: Whether to use dynamic prompt construction + fallback_to_static: Whether to fall back to static prompts on failure + """ + self.user_permissions = user_permissions + self.enable_dynamic_prompts = enable_dynamic_prompts + self.fallback_to_static = fallback_to_static + + # Filter available tools based on permissions + self.available_tools = self._filter_tools_by_permissions() + + def _filter_tools_by_permissions(self) -> List[str]: + """Filter available tools based on user permissions.""" + available_tools = [] + + for tool_class in ALL_TOOLS: + # Check if user has required permissions for this tool + if hasattr(tool_class, "accepted_permissions") and tool_class.accepted_permissions: + if tool_class.accepted_permissions.intersection(self.user_permissions): + available_tools.append(tool_class.__name__) + else: + # Tool doesn't require specific permissions + available_tools.append(tool_class.__name__) + + return available_tools + + def get_prompt_for_query(self, query: str) -> str: + """Get the appropriate prompt for a user query. + + Args: + query: User query + + Returns: + Prompt string (dynamic or static) + """ + if not self.enable_dynamic_prompts: + return BASE_PROMPT + + try: + # Attempt dynamic prompt construction + dynamic_prompt = construct_dynamic_prompt( + query=query, + user_permissions=self.user_permissions, + ) + + # Log the improvement (in practice, you'd use proper logging) + base_tokens = len(BASE_PROMPT.split()) + dynamic_tokens = len(dynamic_prompt.split()) + logger.info( + "Dynamic prompt constructed: %d tokens (vs %d base tokens, %+d difference)", + dynamic_tokens, + base_tokens, + dynamic_tokens - base_tokens, + ) + + return dynamic_prompt + + except (RuntimeError, ValueError, OSError, ImportError): + if self.fallback_to_static: + return BASE_PROMPT + else: + raise + + +def example_usage() -> None: + """Example of how to use the enhanced tool caller.""" + + logger.info("Enhanced Tool Caller Demo") + logger.info("=" * 50) + + # Initialize with user permissions + user_permissions = {Permission.StatementsPermission} + tool_caller = EnhancedToolCaller(user_permissions) + + logger.info("Available tools: %d", len(tool_caller.available_tools)) + + # Test queries that benefit from dynamic prompts + test_queries = [ + "What is the preferred stock additional paid in capital for Apple?", + "Show me the total revenue for Google and Amazon", + "Get quarterly revenue for Tesla in Q1 2023", + "What are the total receivables for Microsoft?", + ] + + for i, query in enumerate(test_queries, 1): + logger.info("Test Query %d: %s", i, query) + + # Get dynamic prompt + prompt = tool_caller.get_prompt_for_query(query) + logger.info(" Generated prompt with %d tokens", len(prompt.split())) + + # In practice, you would now pass this prompt to your LLM + # along with the available tools for the actual tool calling + + +def compare_static_vs_dynamic() -> None: + """Compare static vs dynamic prompt approaches.""" + + user_permissions = {Permission.StatementsPermission} + + # Test with a query that should benefit from examples + query = "What is the preferred stock additional paid in capital for Apple?" + + logger.info("Comparing approaches for: %s", query) + logger.info("-" * 60) + + # Static approach + base_tokens = len(BASE_PROMPT.split()) + logger.info("Static prompt: %d tokens", base_tokens) + + try: + dynamic_prompt = construct_dynamic_prompt( + query=query, + user_permissions=user_permissions, + ) + + # Count examples in dynamic prompt + example_count = dynamic_prompt.count('Query: "') + dynamic_tokens = len(dynamic_prompt.split()) + + logger.info( + "Dynamic prompt: %d tokens, %d examples included", dynamic_tokens, example_count + ) + logger.info( + "Token difference: %+d (%+.1f%%)", + dynamic_tokens - base_tokens, + ((dynamic_tokens - base_tokens) / base_tokens * 100) if base_tokens > 0 else 0, + ) + + except (RuntimeError, ValueError, OSError, ImportError) as e: + logger.error("Dynamic prompt construction failed: %s", e) + logger.info("Falling back to static prompt: %d tokens", base_tokens) + + +def integration_with_existing_client() -> None: + """Example of integrating with existing kfinance client.""" + + logger.info("Integration with Existing Client Demo") + logger.info("=" * 50) + + # This is how you might modify the existing client to use dynamic prompts + class EnhancedKFinanceClient: + """Enhanced client with dynamic prompt support.""" + + def __init__(self, user_permissions: Set[Permission]) -> None: + self.user_permissions = user_permissions + self.tool_caller = EnhancedToolCaller(user_permissions) + + def process_query(self, query: str) -> str: + """Process a user query with dynamic prompts.""" + + # Get the appropriate prompt + prompt = self.tool_caller.get_prompt_for_query(query) + + # In the real implementation, you would: + # 1. Pass the prompt to your LLM along with available tools + # 2. Parse the LLM response to extract tool calls + # 3. Execute the tools and return results + + return f"Processed query with {len(prompt.split())} token prompt" + + # Example usage + client = EnhancedKFinanceClient({Permission.StatementsPermission}) + + queries = [ + "What is the convertible preferred stock for Tesla?", + "Show me the total debt to equity ratio for JPMorgan", + ] + + for i, query in enumerate(queries, 1): + logger.info("Processing Query %d: %s", i, query) + result = client.process_query(query) + logger.info(" Result: %s", result) + + +if __name__ == "__main__": + """Run all examples.""" + + try: + example_usage() + compare_static_vs_dynamic() + integration_with_existing_client() + + except (RuntimeError, ValueError, OSError, ImportError): + import traceback + + traceback.print_exc() diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/examples/simple_integration_example.py b/kfinance/integrations/tool_calling/dynamic_prompts/examples/simple_integration_example.py new file mode 100644 index 0000000..3c09ef9 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/examples/simple_integration_example.py @@ -0,0 +1,114 @@ +"""Simple example showing the core integration pattern for dynamic prompts with tool calling.""" + +from typing import Any, Dict, List, Set + +from kfinance.client.permission_models import Permission +from kfinance.integrations.tool_calling.dynamic_prompts import construct_dynamic_prompt + + +def simple_tool_calling_with_dynamic_prompts( + query: str, + user_permissions: Set[Permission], + llm_client: Any, # Your LLM client (OpenAI, Anthropic, etc.) + available_tools: List[Dict[str, Any]], +) -> Dict[str, Any]: + """Simple integration pattern for using dynamic prompts with tool calling. + + This is the core pattern you would use in your kfinance client. + """ + + # Step 1: Construct dynamic prompt with relevant examples + dynamic_prompt = construct_dynamic_prompt( + query=query, + user_permissions=user_permissions, + ) + + # Step 2: Send prompt to LLM with available tools + llm_response = llm_client.generate_with_tools( + prompt=dynamic_prompt, + tools=available_tools, + ) + + # Step 3: Execute any tool calls returned by the LLM + results = [] + for tool_call in llm_response.get("tool_calls", []): + # Execute the tool (replace with your actual tool execution) + tool_result = execute_kfinance_tool(tool_call["name"], tool_call["parameters"]) + results.append(tool_result) + + return { + "query": query, + "llm_response": llm_response, + "tool_results": results, + } + + +def execute_kfinance_tool(tool_name: str, parameters: Dict[str, Any]) -> Dict[str, Any]: + """Execute a kfinance tool - replace with your actual implementation.""" + # This is where you would call your actual kfinance tools + # from kfinance.integrations.tool_calling.all_tools import ALL_TOOLS + + # Find and execute the tool + # for tool_class in ALL_TOOLS: + # if tool_class.name == tool_name: + # tool_instance = tool_class() + # return tool_instance.execute(parameters) + + # Mock implementation for demo + return { + "success": True, + "data": f"Executed {tool_name} with {parameters}", + } + + +# Example usage: +if __name__ == "__main__": + # Your query + query = "What is the preferred stock additional paid in capital for Apple?" + + # User permissions + user_permissions = {Permission.StatementsPermission} + + # Available tools (simplified) + available_tools = [ + { + "name": "get_financial_line_item_from_identifiers", + "description": "Get financial line items for companies", + "parameters": { + "type": "object", + "properties": { + "identifiers": {"type": "array", "items": {"type": "string"}}, + "line_item": {"type": "string"}, + }, + "required": ["identifiers", "line_item"], + }, + } + ] + + # Mock LLM client (replace with your actual client) + class MockLLMClient: + """Mock LLM client for demonstration purposes.""" + + def generate_with_tools(self, prompt: str, tools: List[Dict]) -> Dict: + """Generate tool calls from prompt and available tools.""" + # Your LLM would analyze the prompt and return tool calls + return { + "tool_calls": [ + { + "name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["AAPL"], + "line_item": "additional_paid_in_capital_preferred_stock", + }, + } + ], + "reasoning": "Based on the examples, using correct parameter name.", + } + + # Process the query + result = simple_tool_calling_with_dynamic_prompts( + query=query, + user_permissions=user_permissions, + llm_client=MockLLMClient(), + available_tools=available_tools, + ) diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/examples/tool_calling_example.py b/kfinance/integrations/tool_calling/dynamic_prompts/examples/tool_calling_example.py new file mode 100644 index 0000000..fbc3db2 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/examples/tool_calling_example.py @@ -0,0 +1,377 @@ +"""Complete example of tool calling with dynamic prompts.""" + +import logging +from typing import Any, Dict, List, Optional, Set + +from kfinance.client.permission_models import Permission +from kfinance.integrations.tool_calling.dynamic_prompts.core.manager import DynamicPromptManager +from kfinance.integrations.tool_calling.prompts import BASE_PROMPT + + +logger = logging.getLogger(__name__) + + +# Mock LLM client for demonstration (replace with your actual LLM client) +class MockLLMClient: + """Mock LLM client for demonstration purposes.""" + + def __init__(self) -> None: + """Initialize the mock LLM client.""" + self.call_count = 0 + + def generate_with_tools(self, prompt: str, available_tools: List[Dict]) -> Dict[str, Any]: + """Mock LLM generation with tool calling.""" + self.call_count += 1 + + # Simulate LLM response based on prompt content + if "preferred stock additional paid in capital" in prompt.lower(): + return { + "tool_calls": [ + { + "name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["AAPL"], + "line_item": "additional_paid_in_capital_preferred_stock", + }, + } + ], + "reasoning": "Based on the examples provided, I should use 'additional_paid_in_capital_preferred_stock' for preferred stock capital above par value.", + } + elif "convertible preferred stock" in prompt.lower(): + return { + "tool_calls": [ + { + "name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TSLA"], + "line_item": "preferred_stock_convertible", + }, + } + ], + "reasoning": "The examples show to use 'preferred_stock_convertible' not 'convertible_preferred_stock'.", + } + else: + return { + "tool_calls": [ + { + "name": "get_financial_line_item_from_identifiers", + "parameters": {"identifiers": ["AAPL"], "line_item": "revenue"}, + } + ], + "reasoning": "Generic revenue query.", + } + + +# Mock tool execution (replace with your actual tool implementations) +def execute_tool(tool_name: str, parameters: Dict[str, Any]) -> Dict[str, Any]: + """Mock tool execution for demonstration.""" + if tool_name == "get_financial_line_item_from_identifiers": + line_item = parameters.get("line_item", "unknown") + identifiers = parameters.get("identifiers", []) + + # Mock financial data + mock_data = { + "additional_paid_in_capital_preferred_stock": 1250000000, + "preferred_stock_convertible": 850000000, + "revenue": 394328000000, + "total_revenue": 394328000000, + } + + value = mock_data.get(line_item, 0) + + return { + "success": True, + "data": { + "line_item": line_item, + "identifiers": identifiers, + "value": value, + "currency": "USD", + "period": "2023", + }, + } + + return {"success": False, "error": f"Unknown tool: {tool_name}"} + + +class EnhancedFinancialQueryProcessor: + """Enhanced financial query processor using dynamic prompts.""" + + def __init__( + self, + user_permissions: Set[Permission], + llm_client: Optional[MockLLMClient] = None, + enable_dynamic_prompts: bool = True, + ): + """Initialize the query processor. + + Args: + user_permissions: User's permissions for tool access + llm_client: LLM client for generating responses + enable_dynamic_prompts: Whether to use dynamic prompt construction + """ + self.user_permissions = user_permissions + self.llm_client = llm_client or MockLLMClient() + self.enable_dynamic_prompts = enable_dynamic_prompts + + # Initialize dynamic prompt manager + self.prompt_manager = ( + DynamicPromptManager(enable_caching=True) if enable_dynamic_prompts else None + ) + + # Available tools (simplified for demo) + self.available_tools = [ + { + "name": "get_financial_line_item_from_identifiers", + "description": "Get financial line items for companies by identifiers", + "parameters": { + "type": "object", + "properties": { + "identifiers": { + "type": "array", + "items": {"type": "string"}, + "description": "Company identifiers (tickers)", + }, + "line_item": { + "type": "string", + "description": "Financial line item to retrieve", + }, + "period_type": { + "type": "string", + "enum": ["annual", "quarterly"], + "description": "Period type for data", + }, + }, + "required": ["identifiers", "line_item"], + }, + } + ] + + def process_query(self, query: str) -> Dict[str, Any]: + """Process a financial query using dynamic prompts and tool calling. + + Args: + query: User's financial query + + Returns: + Dictionary with results and metadata + """ + + logger.info("Processing Query: %s", query) + logger.info("-" * 50) + + # Step 1: Construct dynamic prompt or use static prompt + if self.enable_dynamic_prompts and self.prompt_manager: + prompt, stats = self.prompt_manager.get_prompt_with_stats( + query=query, + user_permissions=self.user_permissions, + ) + + # Show similar examples found + similar_examples = self.prompt_manager.search_similar_examples( + query=query, + user_permissions=self.user_permissions, + top_k=3, + ) + + logger.info("Found %d similar examples:", len(similar_examples)) + if similar_examples: + for i, example in enumerate(similar_examples[:2], 1): + similarity = example.get("similarity_score", 0) + example_query = example.get("query", "Unknown") + logger.info(" %d. %s (similarity: %.3f)", i, example_query, similarity) + else: + logger.info(" No similar examples found") + else: + prompt = BASE_PROMPT + stats = {"example_count": 0, "total_words": len(prompt.split())} + logger.info("Using static prompt (dynamic prompts disabled)") + + # Step 2: Generate LLM response with tool calling + logger.info("Generating LLM response with %d available tools...", len(self.available_tools)) + + llm_response = self.llm_client.generate_with_tools( + prompt=prompt, available_tools=self.available_tools + ) + + logger.info(" LLM reasoning: %s", llm_response.get("reasoning", "No reasoning provided")) + + # Step 3: Execute tool calls + results = [] + tool_calls = llm_response.get("tool_calls", []) + + if tool_calls: + logger.info("Executing %d tool calls:", len(tool_calls)) + for i, tool_call in enumerate(tool_calls, 1): + tool_name = tool_call["name"] + parameters = tool_call["parameters"] + + logger.info(" %d. %s(%s)", i, tool_name, parameters) + + # Execute the tool + tool_result = execute_tool(tool_name, parameters) + results.append( + {"tool_name": tool_name, "parameters": parameters, "result": tool_result} + ) + + if tool_result.get("success"): + data = tool_result.get("data", {}) + value = data.get("value", "N/A") + line_item = data.get("line_item", "unknown") + logger.info(" Success: %s = %s", line_item, value) + else: + error = tool_result.get("error", "Unknown error") + logger.info(" Failed: %s", error) + else: + logger.info("No tool calls generated by LLM") + + # Step 4: Return comprehensive results + return { + "query": query, + "prompt_stats": stats, + "llm_response": llm_response, + "tool_results": results, + "success": len([r for r in results if r["result"].get("success")]) > 0, + } + + +def demo_basic_usage() -> None: + """Demonstrate basic usage of dynamic prompts with tool calling.""" + + logger.info("Basic Usage Demo") + logger.info("=" * 50) + + # Initialize processor with user permissions + user_permissions = {Permission.StatementsPermission} + processor = EnhancedFinancialQueryProcessor( + user_permissions=user_permissions, enable_dynamic_prompts=True + ) + + logger.info("Initialized processor with %d permissions", len(user_permissions)) + logger.info( + "Dynamic prompts: %s", "Enabled" if processor.enable_dynamic_prompts else "Disabled" + ) + + # Test queries that should benefit from dynamic prompts + test_queries = [ + "What is the preferred stock additional paid in capital for Apple?", + "Show me the convertible preferred stock for Tesla", + "Get the total revenue for Microsoft", + ] + + for i, query in enumerate(test_queries, 1): + logger.info("=" * 60) + logger.info("TEST %d/%d", i, len(test_queries)) + logger.info("=" * 60) + + result = processor.process_query(query) + + if result["success"]: + tool_results = result["tool_results"] + successful_calls = [r for r in tool_results if r["result"].get("success")] + logger.info( + "Query processed successfully with %d successful tool calls", len(successful_calls) + ) + else: + logger.info("Query processing failed") + + # Show prompt stats + stats = result["prompt_stats"] + logger.info("Prompt Statistics:") + for key, value in stats.items(): + logger.info(" %s: %s", key, value) + + +def demo_static_vs_dynamic_comparison() -> None: + """Compare static vs dynamic prompt performance.""" + + logger.info("Static vs Dynamic Comparison Demo") + logger.info("=" * 50) + + user_permissions = {Permission.StatementsPermission} + test_query = "What is the preferred stock additional paid in capital for Apple?" + + logger.info("Test query: %s", test_query) + logger.info("User permissions: %s", [p.name for p in user_permissions]) + + # Test with static prompts + logger.info("Testing with static prompts...") + static_processor = EnhancedFinancialQueryProcessor( + user_permissions=user_permissions, enable_dynamic_prompts=False + ) + + static_result = static_processor.process_query(test_query) + static_tool_calls = static_result["llm_response"].get("tool_calls", []) + + logger.info("Static Approach Results:") + if static_tool_calls: + static_params = static_tool_calls[0].get("parameters", {}) + logger.info(" Tool calls: %d", len(static_tool_calls)) + logger.info(" Parameters: %s", static_params) + else: + logger.info(" No tool calls generated") + + logger.info("Testing with dynamic prompts...") + dynamic_processor = EnhancedFinancialQueryProcessor( + user_permissions=user_permissions, enable_dynamic_prompts=True + ) + + dynamic_result = dynamic_processor.process_query(test_query) + dynamic_tool_calls = dynamic_result["llm_response"].get("tool_calls", []) + + logger.info("Dynamic Approach Results:") + if dynamic_tool_calls: + dynamic_params = dynamic_tool_calls[0].get("parameters", {}) + logger.info(" Tool calls: %d", len(dynamic_tool_calls)) + logger.info(" Parameters: %s", dynamic_params) + else: + logger.info(" No tool calls generated") + + # Compare accuracy + logger.info("Accuracy Comparison:") + + # Check if dynamic approach used correct parameter + correct_param = "additional_paid_in_capital_preferred_stock" + dynamic_correct = any( + correct_param in str(call.get("parameters", {})) for call in dynamic_tool_calls + ) + static_correct = any( + correct_param in str(call.get("parameters", {})) for call in static_tool_calls + ) + + logger.info(" Static approach used correct parameter: %s", "Yes" if static_correct else "No") + logger.info(" Dynamic approach used correct parameter: %s", "Yes" if dynamic_correct else "No") + + if dynamic_correct and not static_correct: + logger.info(" Dynamic prompts improved parameter selection!") + elif static_correct and not dynamic_correct: + logger.info(" Dynamic prompts performed worse than static") + else: + logger.info(" Both approaches performed similarly") + + +def demo_real_world_integration() -> None: + """Show how this would integrate with real kfinance client.""" + + logger.info("Real-World Integration Demo") + logger.info("=" * 50) + logger.info("This demonstrates how the dynamic prompt system would integrate") + logger.info("with the actual kfinance client in production.") + logger.info("Key integration points:") + logger.info(" 1. Client initialization loads cached embeddings") + logger.info(" 2. Query processing constructs dynamic prompts") + logger.info(" 3. LLM responses are enhanced with relevant examples") + logger.info(" 4. Tool parameter selection is more accurate") + logger.info("For full integration, see the integration_example.py file.") + + +if __name__ == "__main__": + """Run all demonstrations.""" + try: + demo_basic_usage() + demo_static_vs_dynamic_comparison() + demo_real_world_integration() + + except (RuntimeError, ValueError, OSError, ImportError): + import traceback + + traceback.print_exc() diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/company_financials_params.json b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/company_financials_params.json new file mode 100644 index 0000000..9ea2cb0 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/company_financials_params.json @@ -0,0 +1,695 @@ +{ + "tools": [ + { + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": [ + { + "related_parameters": [ + "period_type", + "start_year", + "end_year" + ], + "description": "The specific financial line item to retrieve. Must match exact parameter names from the allowed list. Categories include: Income Statement items (revenue, expenses, profits), Balance Sheet items (assets, liabilities, equity), Cash Flow items (operating, investing, financing), and Financial Ratios.", + "parameter_name": "line_item", + "common_mistakes": [ + "Using 'convertible_preferred_stock' instead of 'preferred_stock_convertible'", + "Using 'total_debt_to_equity_ratio' instead of 'total_debt_to_equity'", + "Using 'preferred_dividends_paid' instead of 'preferred_stock_dividend'", + "Using 'total_receivables' (plural) instead of 'total_receivable' (singular)", + "Using 'depreciation' alone instead of 'depreciation_and_amortization' when both are needed", + "Using 'revenue' when 'total_revenue' is more accurate (includes all revenue streams)", + "Using 'net_income_to_company' instead of 'net_income'", + "Using 'current_accounts_receivable' instead of 'accounts_receivable'", + "Using 'deferred_tax_asset_current_portion' instead of 'short_term_deferred_tax_asset'", + "Using 'finance_division_debt_long_term_portion' instead of 'finance_division_debt_non_current_portion'", + "Using 'foreign_exchange_rate_adjustments' instead of 'foreign_exchange_adjustments'", + "Using 'other_non_operating_income' instead of 'total_other_non_operating_income' when totals are requested", + "Using 'divestitures' instead of 'sale_of_real_estate' when the query specifically mentions real estate sales", + "Omitting 'current' from 'current_income_taxes_payable' when the user references current taxes payable", + "Selecting aggregate categories like 'depreciation_and_amortization' when the query references balance sheet accounts such as 'accumulated_depreciation'", + "Using 'ltm' period_type when the query specifies explicit quarters (use 'quarterly' with quarter parameters instead)", + "Leaving year or quarter parameters null when the user asks for explicit periods" + ], + "high_risk_confusions": [ + "Use 'accumulated_depreciation'; avoid 'depreciation_and_amortization' when the user references cumulative balances", + "Use 'cash_and_equivalents'; avoid plain 'cash' for balance sheet cash positions", + "Use 'cash_from_financing'; avoid 'cash_from_financing_activities' for standard financing cash flow totals", + "Use 'capital_expenditure'; avoid shorthand 'capex' for capital spending", + "Use 'total_receivable'; avoid 'total_receivables' in any context", + "Use 'preferred_stock_dividend'; avoid 'preferred_dividends_paid' for preferred payouts", + "Use 'diluted_earning_per_share_excluding_extra_items'; avoid 'diluted_eps_excluding_extra_items'", + "Use 'change_in_net_operating_assets'; avoid 'change_in_other_net_operating_assets' unless the user explicitly narrows it.", + "Use 'interest_expense_finance_division'; avoid 'finance_division_interest_expense'", + "Use 'finance_division_loans_and_leases_short_term'; avoid elongated variants like 'short_term_loans_and_leases_of_the_finance_division'" + ], + "finance_division_aliases": { + "finance_division_loans_and_leases_short_term": [ + "short_term_loans_and_leases_of_the_finance_division", + "finance_division_short_term_loans_and_leases" + ], + "finance_division_other_non_current_assets": [ + "other_non_current_assets_of_the_finance_division" + ], + "finance_division_other_non_current_liabilities": [ + "finance_division_other_long_term_liabilities" + ], + "finance_division_other_current_liabilities": [ + "other_current_liabilities_of_the_finance_division" + ], + "finance_division_debt_non_current_portion": [ + "finance_division_debt_long_term_portion" + ], + "interest_expense_finance_division": [ + "finance_division_interest_expense" + ] + }, + "examples": { + "restructuring_charges": "Charges related to restructuring initiatives such as layoffs or facility closures.", + "operating_expense_insurance_division": "Alternate label for insurance division operating expenses.", + "prepaid_expenses": "Alternate label for prepaid expenses.", + "total_revenue": "Sum of operating and non-operating revenue streams for the period.", + "issuance_of_common_stock": "Cash inflows from issuing common stock.", + "misc_cash_flow_adj": "Abbreviated label for miscellaneous cash flow adjustments.", + "total_ev_to_ebitda": "Total enterprise value to EBITDA ratio.", + "short_term_accounts_receivable": "Short-term receivables due within one year.", + "preferred_equity": "General label for preferred shareholders' equity.", + "prepaid_expense": "Prepaid expenses for goods or services yet to be received.", + "short_term_assets": "Alternate label referring to the company\u2019s current assets.", + "unusual_items": "General label for unusual or non-recurring items.", + "last_close_total_enterprise_value_to_ebitda": "Last close total enterprise value to EBITDA multiple.", + "other_investing_activities": "Miscellaneous investing cash flow activities.", + "total_other_non_cash_items": "Aggregate of non-cash items affecting operating cash flow.", + "other_non_current_liabilities": "Miscellaneous non-current liability categories.", + "treasury_stock_non_redeemable_preferred_stock": "Treasury shares held for non-redeemable preferred stock classes.", + "legal_settlements": "Amounts paid or received to resolve legal actions.", + "current_portion_of_capitalized_leases": "Alternate label for current capital lease liabilities.", + "sale_of_real_properties": "Alternate wording for real property sale proceeds.", + "total_enterprise_value_to_headcount": "Total enterprise value per headcount metric.", + "finance_division_other_long_term_liabilities": "Alternate label for finance division other non-current liabilities.", + "ebt_excluding_unusual_items": "Earnings before taxes with unusual/non-recurring items removed.", + "total_cash_and_short_term_investments": "Total cash plus short-term investment balance.", + "total_operating_expense": "Sum of all operating expenses for the reporting period.", + "total_selling_general_and_admin_cost": "Cost-focused aggregate of SG&A expenses.", + "current_portion_of_capital_leases": "Short-term portion of capital lease obligations due within 12 months.", + "trading_asset_securities": "Trading securities held as short-term financial assets.", + "restricted_cash": "Cash balances with usage restrictions (e.g., escrow, collateral).", + "liabilities_and_equity": "Alternate label for the combined liabilities and equity total.", + "other_unusual_items": "Unusual or infrequent items not captured elsewhere.", + "net_property_plant_and_equipment": "Net property, plant, and equipment after accumulated depreciation.", + "cashflow_from_investing_activities": "Alternate label for cash provided by investing activities.", + "total_d_and_a": "Abbreviated total depreciation and amortization expense.", + "currency_exchange_gains": "Gains arising from favorable foreign currency movements.", + "cashflow_from_financing_activities": "Alternate label for cash provided by financing activities.", + "current_portion_of_unearned_revenue": "Current component of unearned or deferred revenue.", + "short_term_loans_and_leases_of_the_finance_division": "Alternate wording for finance division short-term loans and leases.", + "total_preferred_stock": "Aggregate preferred stock balance.", + "short_term_borrowing": "Variant label for short-term debt obligations.", + "depreciation": "Depreciation expense recognized during the period.", + "insurance_division_revenue": "Revenue attributable to insurance activities within a diversified company.", + "common_shares_outstanding": "Weighted or period-end count of common shares outstanding.", + "last_close_enterprise_value_to_headcount": "Last close enterprise value per headcount metric.", + "total_enterprise_value_to_total_revenue": "Total enterprise value to total revenue multiple.", + "tax_rate": "General label for the tax rate metric.", + "net_income": "Bottom-line profit attributable to common shareholders.", + "net_decrease_in_investment_loans_originated_and_sold": "Net decrease in investment loans originated and sold.", + "impairment_of_oil_gas_and_mineral_properties": "Impairment charges related to oil, gas, or mineral property valuations.", + "short_term_debt_issued": "Cash inflows from issuing short-term debt.", + "short_term_finance_division_loans_and_leases": "Alternate label for finance division short-term loans and leases.", + "change_in_inventories": "Change in inventory balances, typically from the cash flow statement.", + "basic_earning_per_share_including_extra_items": "Spelled-out variant for basic EPS including extraordinary items.", + "normalized_basic_eps": "Normalized basic earnings per share metric.", + "last_close_total_ev_to_employee_count": "Last close total enterprise value per employee.", + "selling_general_and_admin_cost": "Cost-focused wording for SG&A expenses reported together.", + "sga": "Common shorthand for selling, general, and administrative expenses.", + "sg_and_a": "Abbreviated form for selling, general, and administrative expenses.", + "net_income_to_minority_interest": "Alternate label for income attributable to minority interests.", + "dna": "Common shorthand for depreciation and amortization expense.", + "non_current_debt": "Debt obligations with maturities beyond one year.", + "research_and_development_expense": "Expenses incurred for research and development activities.", + "insurance_division_operating_expense": "Operating expenses tied to insurance operations inside a diversified company.", + "cash_from_operating_activities": "Cash provided by operating activities.", + "total_debt_to_total_equity": "Total debt divided by total equity (explicit).", + "current_portion_of_cap_leases": "Current obligations under capital leases coming due.", + "last_total_ev_to_ebit": "Last reported total enterprise value to EBIT multiple.", + "total_debt_to_equity_ratio": "Total debt divided by total equity.", + "current_total_receivable": "Current total receivable amount.", + "net_interest_expense": "Interest expense net of any interest income for the period.", + "treasury_non_redeemable_preferred_stock": "Alternate label for treasury-held non-redeemable preferred shares.", + "last_ev_to_employees": "Last enterprise value per employees metric.", + "other_current_liabilities": "Pluralized label for miscellaneous current liabilities.", + "current_ratio": "Current assets divided by current liabilities.", + "interest_expense_finance_division": "Alternate label for finance division interest expense.", + "other_preferred_stock": "Alternate label for other preferred stock categories.", + "sale_of_intangible_asset": "Singular label for proceeds from an intangible asset sale.", + "last_close_enterprise_value_to_earnings_before_interest_taxes_depreciation_and_amortization": "Last close enterprise value to EBITDA ratio.", + "basic_eps": "Basic earnings per share; use sparingly when explicitly requested.", + "ev_to_ebitda": "Enterprise value to EBITDA shorthand multiple.", + "minority_interest_in_earnings": "Earnings attributable to minority interest holders.", + "total_preferred_equity": "Total preferred equity including all preferred share classes.", + "in_process_r_and_d_cost": "Alternate label for in-process research and development cost.", + "last_total_enterprise_value_to_ebitda": "Last total enterprise value to EBITDA multiple.", + "nppe": "Alternate abbreviation for net property, plant, and equipment.", + "net_ppe": "Abbreviated label for net property, plant, and equipment.", + "capital_leases": "Long-term obligations arising from capital lease arrangements.", + "in_process_research_and_development_cost": "Cost basis assigned to in-process R&D assets.", + "treasury_redeemable_preferred_stock": "Treasury balance for redeemable preferred stock.", + "finance_division_debt_non_current_portion": "Non-current portion of debt held within the finance division.", + "net_income_to_common_shareholders_including_extra_items": "Net income available to common shareholders inclusive of extraordinary items.", + "last_enterprise_value_to_total_revenue": "Last enterprise value to total revenue multiple.", + "non_current_debt_repaid": "Alternate label for non-current debt repayments.", + "last_total_enterprise_value_to_ebit": "Last total enterprise value divided by EBIT.", + "current_portion_of_long_term_debt_and_capital_leases": "Combined current portion of long-term debt and capital leases.", + "total_ev_to_ebit": "Alternate label for total EV/EBIT.", + "net_income_to_company": "Net income attributable to the parent company.", + "normal_revenue": "Core recurring revenue reported for ongoing business operations.", + "total_current_portion_of_non_current_debt_and_capitalized_leases": "Total current portion of non-current debt and capitalized lease obligations.", + "preferred_stock_dividend": "Dividends paid to preferred shareholders.", + "loss_on_equity_investments": "Loss recognized on equity method or other equity investments.", + "weighted_average_diluted_shares_outstanding": "Weighted average diluted shares used in EPS calculations.", + "capital_expenditures": "Pluralized label for capital expenditure cash outflows.", + "finance_division_operating_expense": "Operating expenses incurred within the company's finance or captive lending division.", + "total_enterprise_value_to_earnings_before_interest_taxes_depreciation_and_amortization": "Total enterprise value to EBITDA metric.", + "earnings_from_discontinued_operations": "Net earnings attributable to discontinued operations.", + "enterprise_value_to_ebit": "Current enterprise value to EBIT multiple.", + "ffo": "Abbreviated label for funds from operations.", + "merger_charges": "Costs incurred during merger transactions.", + "diluted_earning_per_share_including_extra_items": "Spelled-out variant for diluted EPS including extraordinary items.", + "last_close_tev_to_employee_count": "Last close total enterprise value per employee.", + "additional_paid_in_capital": "Capital paid by shareholders above par value for common stock.", + "other_long_term_liabilities": "Additional long-term liabilities not classified elsewhere.", + "last_close_total_ev_to_headcount": "Last close total enterprise value per headcount.", + "repurchase_of_common_stock": "Cash outflows for repurchasing common stock.", + "finance_division_revenue": "Revenue generated by a company's finance or captive lending operations.", + "last_close_enterprise_value_to_earnings_before_interest_and_taxes": "Last close enterprise value to EBIT ratio.", + "interest_and_investment_income": "Income from interest-bearing assets and investment returns outside core operations.", + "last_close_tev_to_ebitda": "Last close total enterprise value to EBITDA multiple.", + "last_enterprise_value_to_ebitda": "Last reported enterprise value to EBITDA multiple.", + "enterprise_value_to_earnings_before_interest_taxes_depreciation_and_amortization": "Spelled-out variant for EV/EBITDA ratio.", + "asset_writedown": "Reduction in asset carrying value due to impairment or obsolescence.", + "enterprise_value_to_employee_count": "Enterprise value divided by employee count.", + "total_dna": "Shorthand total for depreciation and amortization expense.", + "current_portion_of_lt_debt_and_cap_leases": "Abbreviated current portion of long-term debt and capital lease obligations.", + "total_liabilities": "Total liabilities including both current and non-current obligations.", + "total_unusual_items": "Aggregate impact of all unusual or non-recurring items for the reporting period.", + "total_debt_to_equity": "Alternate label for total debt to equity.", + "other_intangibles": "Intangible assets other than goodwill (e.g., patents, trademarks).", + "last_tev_to_headcount": "Last total enterprise value per headcount.", + "fx_adjustments": "Abbreviated label for foreign exchange adjustments.", + "net_change_in_cash": "Net change in cash during the reporting period.", + "rnd_expense": "Common shorthand for research and development expense.", + "diluted_eps_from_discontinued_operations": "Diluted EPS attributable to discontinued operations.", + "last_close_tev_to_total_revenue": "Last close total enterprise value to total revenue multiple.", + "last_close_ev_to_employee_count": "Last close enterprise value per employee metric.", + "ev_to_ebit": "Enterprise value to EBIT shorthand multiple.", + "revenue_from_sale_of_investments": "Revenue recognized from disposing of investment securities or ownership stakes.", + "change_in_accounts_payable": "Operating cash flow impact from changes in accounts payable.", + "rnd_cost": "Alternate phrasing for research and development expense.", + "sale_of_real_estate": "Proceeds from real estate sales (distinct from broader divestitures).", + "finance_division_non_current_debt": "Non-current debt balances tied to the finance division.", + "working_capital": "Alternate label for net working capital.", + "convertible_preferred_stock": "Preferred shares that can be converted into common equity.", + "basic_eps_from_accounting_change_and_extraordinary_items": "Basic EPS from accounting changes and extraordinary items combined.", + "current_borrowings": "Pluralized label for short-term borrowing balances.", + "quick_ratio": "Quick assets divided by current liabilities.", + "pension_and_other_post_retirement_benefit": "Liabilities for pension and post-retirement benefit obligations.", + "current_accounts_receivable": "Current portion of accounts receivable due within one year.", + "tev_to_ebitda": "Total enterprise value to EBITDA multiple.", + "total_other_revenue": "Aggregate of all non-core or miscellaneous revenue categories.", + "last_ev_to_total_revenue": "Last enterprise value to total revenue multiple.", + "preferred_stock_other": "Other preferred equity categories not classified elsewhere.", + "total_current_assets": "Sum of all assets classified as current for the reporting date.", + "in_process_rnd_cost": "Abbreviated form for in-process R&D cost.", + "revenue": "Revenue recognized from primary business activities (excludes non-operating income).", + "current_deferred_tax_liability": "Deferred tax liabilities classified as current.", + "in_process_research_and_development_expense": "Expense recognized for acquired in-process R&D projects.", + "finance_division_long_term_debt": "Long-term debt associated with the finance division.", + "common_equity": "General label for equity attributable to common stockholders.", + "total_receivable": "Total receivable balance reported.", + "diluted_eps_excluding_extra_items": "Diluted EPS calculated excluding extraordinary items.", + "other_adjustments_to_net_income": "Miscellaneous adjustments applied to net income calculations.", + "short_term_deferred_tax_asset": "Short-term deferred tax asset balance slated for near-term realization.", + "common_dividends_paid": "Cash dividends paid to common shareholders.", + "total_current_liabilities": "Total balance of current liabilities reported.", + "r_and_d_cost": "Spelled-out variant for research and development costs.", + "diluted_eps_from_accounting_change": "Diluted EPS impact from accounting changes.", + "non_current_deferred_tax_liability": "Deferred tax liabilities not expected to be settled within one year.", + "last_close_ev_to_headcount": "Last close enterprise value per headcount.", + "last_enterprise_value_to_earnings_before_interest_taxes_depreciation_and_amortization": "Last enterprise value to EBITDA ratio.", + "sale_of_property_plant_and_equipment": "Cash inflows from selling property, plant, and equipment.", + "last_enterprise_value_to_earnings_before_interest_and_taxes": "Last reported enterprise value to EBIT ratio.", + "enterprise_value_to_earnings_before_interest_and_taxes": "Enterprise value divided by EBIT metric.", + "last_total_ev_to_headcount": "Last reported total enterprise value per headcount.", + "earnings_before_interest_and_taxes": "Spelled-out variant for EBIT.", + "retained_earnings": "Accumulated retained earnings available to common shareholders.", + "total_dividends_paid": "Total dividends paid during the period.", + "tev_to_ebit": "Total enterprise value to EBIT multiple.", + "impairment_of_goodwill": "Write-downs of goodwill carrying value due to impairment testing.", + "change_in_trading_asset_securities": "Cash impact from changes in trading asset securities.", + "last_close_total_enterprise_value_to_headcount": "Last close total enterprise value per headcount.", + "gross_property_plant_and_equipment": "Gross value of PP&E before accumulated depreciation.", + "effective_tax_rate": "Effective tax rate metric for the period.", + "diluted_earning_per_share_from_extraordinary_items": "Spelled-out variant for diluted EPS from extraordinary items.", + "total_common_equity": "Total equity attributable to common shareholders.", + "preferred_stock_equity_adjustment": "Equity adjustment related to preferred stock balances.", + "miscellaneous_cash_flow_adjustments": "Miscellaneous adjustments affecting total cash flow.", + "accumulated_depreciation": "Cumulative depreciation recorded against property, plant, and equipment.", + "current_notes_receivable": "Notes receivable scheduled for collection within one year.", + "liabilities": "General label for the liabilities section of the balance sheet.", + "preferred_dividends_and_other_adjustments": "Combined preferred dividend and adjustment line item.", + "debt_ratio": "Total debt divided by total assets.", + "provision_for_bad_debts": "Allowance for credit losses on receivables recorded during the period.", + "other_current_assets": "Miscellaneous current assets not classified elsewhere.", + "long_term_unearned_revenue": "Unearned revenue expected to be recognized beyond one year.", + "r_and_d_expense": "Spelled-out variant for research and development expense.", + "diluted_earning_per_share": "Spelled-out label for diluted earnings per share.", + "last_close_total_enterprise_value_to_employees": "Last close total enterprise value per employees.", + "total_debt_issued": "Aggregate debt issuance during the period.", + "total_debt_to_capital": "Total debt divided by total capital (debt plus equity).", + "cash_from_financing_activities": "Explicit label for cash provided from financing activities.", + "finance_division_loans_and_leases_short_term": "Short-term loans and leases held by the finance division.", + "dividends_paid": "General label for cash dividends paid.", + "last_close_enterprise_value_to_employees": "Last close enterprise value per employees.", + "current_liabilities": "General label for liabilities due within one year.", + "cashflow_from_financing": "Alternate spelling for cash provided by financing activities.", + "last_total_enterprise_value_to_earnings_before_interest_and_taxes": "Last total EV divided by EBIT.", + "last_close_enterprise_value_to_ebitda": "Last close enterprise value to EBITDA multiple.", + "gross_ppe": "Abbreviated label for gross property, plant, and equipment before depreciation.", + "total_current_portion_of_lt_debt_and_cap_leases": "Abbreviated label for the aggregate current portion of long-term debt and capital leases.", + "long_term_debt_repaid": "Cash outflows from repaying long-term debt.", + "gain_from_sale_of_assets": "Gain recognized from disposing tangible or intangible assets.", + "cash_and_equivalents": "Cash on hand plus cash-equivalent short-term investments.", + "total_shareholders_equity": "Alternate label for total shareholders' equity.", + "extraordinary_item_and_accounting_change": "Combined impact of extraordinary items and accounting changes.", + "finance_division_debt_current_portion": "Current portion of debt held within the finance division.", + "last_close_total_enterprise_value_to_earnings_before_interest_and_taxes": "Last close total enterprise value divided by EBIT.", + "basic_earning_per_share": "Spelled-out label for basic earnings per share.", + "operating_expense": "General label for operating expenses incurred during the period.", + "additional_paid_in_capital_preferred_stock": "Capital received from preferred stock issuance above par value.", + "last_enterprise_value_to_headcount": "Last enterprise value per headcount.", + "gain_from_sale_of_investments": "Gain realized from selling investment holdings.", + "depreciation_of_rental_assets": "Depreciation charge specific to rental assets.", + "total_debt": "Total debt outstanding.", + "non_current_unearned_revenue": "Revenue received in advance that will be recognized beyond one year.", + "earnings_before_taxes_excluding_unusual_items": "Pre-tax earnings with unusual or non-recurring items removed.", + "ebita": "Earnings before interest, taxes, and amortization.", + "merger_and_restructuring_charges": "Combined charges stemming from merger and restructuring activities.", + "in_process_r_and_d_expense": "Alternate label for expense related to in-process R&D.", + "enterprise_value_to_ebitda": "Enterprise value to EBITDA multiple.", + "repurchase_of_preferred_stock": "Cash outflows for repurchasing preferred stock.", + "last_total_enterprise_value_to_earnings_before_interest_taxes_depreciation_and_amortization": "Last total EV to EBITDA ratio.", + "current_portion_of_long_term_debt": "Portion of long-term debt due within the next year.", + "amortization_of_goodwill_and_intangibles": "Amortization expense tied to goodwill and other intangible assets.", + "revenue_from_sale_of_assets": "Revenue recognized from selling long-lived assets outside normal product sales.", + "last_enterprise_value_to_ebit": "Last enterprise value divided by EBIT.", + "preferred_stock_convertible": "Preferred shares that can be converted into common stock.", + "cash_from_financing": "Cash provided by financing activities.", + "diluted_earning_per_share_from_accounting_change_and_extraordinary_items": "Spelled-out variant for diluted EPS from accounting changes and extraordinary items.", + "capex": "Abbreviated label for capital expenditures.", + "total_liabilities_and_equity": "Total liabilities plus shareholders' equity to balance assets.", + "equity": "General label for the equity section of the balance sheet.", + "change_in_income_taxes": "Period-over-period change in income tax expense on the income statement.", + "finance_division_other_current_assets": "Other current assets specific to a company's finance division.", + "last_total_enterprise_value_to_employee_count": "Last total enterprise value per employee count.", + "total_ev_to_employees": "Total enterprise value per employee.", + "last_close_tev_to_headcount": "Last close total enterprise value per headcount.", + "provision_for_credit_losses": "Provision recorded for credit losses within the period.", + "current_portion_of_non_current_debt": "Current portion of non-current debt obligations.", + "total_ev_to_headcount": "Total enterprise value per headcount.", + "total_current_portion_of_non_current_debt_and_capital_leases": "Total of current portions for non-current debt and capital leases.", + "normalized_diluted_eps": "Normalized diluted earnings per share metric.", + "total_current_portion_of_long_term_debt_and_capitalized_leases": "Total current portion of long-term debt and capitalized leases.", + "net_decrease_in_loans_originated_and_sold": "Net decrease in loans originated and sold impacting cash flow.", + "last_enterprise_value_to_employees": "Last enterprise value per employees metric.", + "basic_earning_per_share_from_accounting_change_and_extraordinary_items": "Spelled-out variant for combined accounting change and extraordinary items EPS.", + "last_close_total_enterprise_value_to_ebit": "Last close total enterprise value divided by EBIT.", + "basic_eps_from_extraordinary_items": "Basic EPS attributable solely to extraordinary items.", + "income_taxes": "Alternate label for income tax expense.", + "provision_for_bad_debt": "Alternate singular label for the bad debt provision.", + "change_in_net_working_capital": "Change in net working capital over the period.", + "current_portion_of_non_current_debt_and_capitalized_leases": "Current component of non-current debt and capitalized leases.", + "current_borrowing": "Alternate label for short-term borrowings outstanding.", + "non_current_other_liabilities": "Alternate label for other long-term liabilities.", + "last_close_total_ev_to_ebitda": "Last close total EV to EBITDA multiple.", + "last_tev_to_ebitda": "Last total EV to EBITDA.", + "last_tev_to_employee_count": "Last total enterprise value per employee.", + "change_in_other_net_operating_assets": "Change in other net operating assets affecting cash flow.", + "treasury_preferred_stock_non_redeemable": "Variant wording for treasury non-redeemable preferred stock.", + "earnings_before_interest_taxes_depreciation_amortization_and_rental_expense": "Spelled-out variant for EBITDAR.", + "total_equity": "Total shareholders' equity including preferred and common components.", + "net_cash_from_discontinued_operation": "Net cash flow attributable to discontinued operations.", + "diluted_eps_from_accounting_change_and_extraordinary_items": "Diluted EPS attributable to accounting changes and extraordinary items.", + "net_working_capital": "Net working capital (current assets minus current liabilities).", + "non_current_debt_issued": "Alternate label for non-current debt issuance.", + "total_depreciation_and_amortization": "Total depreciation and amortization expense reported for the period.", + "total_sga": "Abbreviated label for total selling, general, and administrative expenses.", + "current_portion_of_income_taxes_payable": "Current portion of income taxes owed but unpaid.", + "income_tax_expense": "Total income tax expense recognized for the period.", + "earnings_before_taxes_including_unusual_items": "Alternate phrasing for EBT including unusual items.", + "cor": "Alias for cost_of_revenue covering direct revenue-related costs.", + "enterprise_value_to_employees": "Enterprise value per employees metric.", + "tev_to_total_revenue": "Total enterprise value to total revenue multiple.", + "gross_profit": "Revenue minus cost_of_goods_sold or cost_of_revenue for the reported period.", + "current_other_receivables": "Current portion of other receivables expected to be collected soon.", + "selling_general_and_admin": "Alternate label for selling, general, and administrative expenses.", + "other_receivables": "Receivables that are not trade receivables (e.g., tax refunds, employee advances).", + "accrued_expenses": "Accrued expenses and other short-term liabilities awaiting payment.", + "total_other_non_operating_income": "Aggregate of non-operating income items (e.g., investment income, gains).", + "current_debt_repaid": "Alternate label for short-term debt repayments.", + "ev_to_total_revenue": "Enterprise value to total revenue multiple.", + "sale_proceeds_from_rental_assets": "Cash inflows from selling rental assets.", + "basic_eps_including_extra_items": "Basic EPS calculated including extraordinary items.", + "change_in_unearned_revenue": "Cash flow adjustment from changes in unearned revenue balances.", + "non_redeemable_preferred_stock": "Alternate label for non-redeemable preferred stock.", + "premium_on_redemption_of_preferred_stock": "Premium paid when redeeming preferred shares above par value.", + "other_financing_activities": "Miscellaneous financing cash flow activities.", + "last_tev_to_ebit": "Last total enterprise value to EBIT ratio.", + "in_process_rnd_expense": "Abbreviated form for in-process R&D expense.", + "other_non_operating_income": "Non-operating income items not captured in other categories.", + "diluted_eps_including_extra_items": "Diluted EPS including extraordinary items.", + "pre_opening_costs": "Costs associated with opening new locations or facilities before revenue starts.", + "cash_from_investing_activities": "Explicit label for cash provided from investing activities.", + "exploration_and_drilling_expense": "Expense entry for exploration and drilling programs.", + "fees_and_other_income": "Fee-based revenue combined with ancillary income streams in one line item.", + "finance_division_debt_long_term_portion": "Alternate label for finance division long-term debt.", + "cash_and_short_term_investments": "Combined cash and short-term investment balance.", + "current_total_receivables": "Current portion of total receivables.", + "long_term_investments": "Non-current investments intended to be held longer than one year.", + "accounts_receivable": "Accounts receivable balance due from customers.", + "interest_expense": "Total interest expense recognized during the period.", + "short_term_other_receivables": "Short-term non-trade receivables due within a year.", + "continued_operations_earnings": "Alternate label for earnings from continued operations.", + "ebit": "Earnings before interest and taxes.", + "short_term_borrowings": "Borrowings that mature within twelve months of the reporting date.", + "diluted_eps_from_extraordinary_items": "Diluted EPS attributable to extraordinary items.", + "preferred_stock": "Preferred stock outstanding across all share classes.", + "other_operating_activities": "Other operating cash flow adjustments not listed elsewhere.", + "cash": "Cash balance reported on the balance sheet.", + "regular_revenue": "Standard operating revenue before unusual or non-recurring items.", + "current_unearned_revenue": "Unearned revenue expected to be recognized within one year.", + "last_total_enterprise_value_to_employees": "Last total enterprise value per employees.", + "total_current_portion_of_long_term_debt_and_capital_leases": "Aggregate current portion of long-term debt and capital lease liabilities.", + "distributable_cash_per_share": "Distributable cash available per share.", + "income_tax": "Short label for income tax expense.", + "change_in_net_operating_assets": "Aggregate change in net operating assets during the period.", + "preferred_stock_additional_paid_in_capital": "Additional paid-in capital attributable to preferred stock.", + "finance_division_other_short_term_assets": "Non-core short-term assets tracked within the finance division.", + "funds_from_operations": "Funds from operations metric (often for REITs).", + "other_preferred_stock_adjustments": "Adjustments related to preferred stock outside dividends and redemption premiums.", + "capital_expenditure": "Cash outflows for capital expenditures during the period.", + "last_close_total_ev_to_ebit": "Last close total EV to EBIT.", + "weighted_average_basic_shares_outstanding": "Weighted average basic shares used in EPS calculations.", + "total_debt_repaid": "Aggregate debt repayments during the period.", + "finance_division_other_non_current_liabilities": "Other non-current liabilities reported by the finance division.", + "diluted_earning_per_share_from_accounting_change": "Spelled-out variant for diluted EPS from accounting changes.", + "last_enterprise_value_to_employee_count": "Last enterprise value per employee metric.", + "inventory": "Inventory balance reported for the period.", + "other_equity": "Other equity components not categorized elsewhere.", + "sale_of_intangibles": "Pluralized label for sale of intangible asset proceeds.", + "last_close_total_ev_to_employees": "Last close total enterprise value per employees.", + "cash_from_discontinued_operation": "Alternate label for cash generated from discontinued operations.", + "enterprise_value_to_headcount": "Enterprise value per headcount metric.", + "last_close_enterprise_value_to_ebit": "Alternate label for last close EV/EBIT.", + "normalized_diluted_earning_per_share": "Spelled-out variant for normalized diluted EPS.", + "dividends_per_share": "Cash dividends declared per share.", + "shareholders_equity": "Shareholders' equity as presented on the balance sheet.", + "total_other_investing_activities": "Aggregate of other investing cash flow activities.", + "last_close_total_enterprise_value_to_earnings_before_interest_taxes_depreciation_and_amortization": "Last close total EV to EBITDA ratio.", + "long_term_debt_issued": "Cash inflows from issuing long-term debt.", + "special_dividends_paid": "Cash outflows for special or one-time dividends.", + "short_term_total_receivables": "Short-term receivables inclusive of all categories.", + "enterprise_value_to_total_revenue": "Enterprise value to total revenue multiple.", + "equity_adjustment_preferred_stock": "Alternate wording for preferred stock equity adjustments.", + "preferred_stock_redeemable": "Redeemable preferred stock outstanding.", + "line_item": "Use exact parameter names from the allowed list; see common mistakes for frequent misnamings.", + "sale_of_ppe": "Abbreviated label for cash received from selling PP&E.", + "long_term_other_liabilities": "Alternate ordering for other long-term liabilities.", + "insurance_settlements": "Proceeds or expenses arising from insurance claim settlements.", + "divestitures": "Cash proceeds or impact from divested businesses or assets (distinct from sale_of_real_estate).", + "long_term_debt": "Debt obligations with maturity dates beyond one year.", + "diluted_eps": "Diluted earnings per share measure.", + "ev_to_headcount": "Enterprise value per headcount ratio.", + "last_close_enterprise_value_to_total_revenue": "Last close enterprise value to total revenue multiple.", + "pre_opening_expense": "Alternate label for pre-opening costs recorded as an expense.", + "last_tev_to_employees": "Last total enterprise value per employees.", + "sale_of_real_estate_properties": "Cash inflows from selling real estate properties.", + "total_enterprise_value_to_ebitda": "Total enterprise value to EBITDA multiple.", + "loans_held_for_sale": "Loans designated for sale rather than retention on the balance sheet.", + "current_portion_of_long_term_debt_and_capitalized_leases": "Current portion of combined long-term debt and capitalized lease obligations.", + "revenue_from_interest_and_investment_income": "Interest and investment returns classified within revenue rather than other income.", + "notes_receivable": "Amounts owed to the company via promissory notes.", + "preferred_stock_non_redeemable": "Non-redeemable preferred stock outstanding.", + "basic_earning_per_share_from_accounting_change": "Basic EPS attributable to accounting changes.", + "change_in_cash": "Alternate label for the net change in cash.", + "ebitdar": "Earnings before interest, taxes, depreciation, amortization, and rent.", + "earnings_before_interest_taxes_and_amortization": "Spelled-out variant for EBITA.", + "cost_of_revenue": "Direct costs associated with delivering goods or services that generate revenue.", + "last_tev_to_total_revenue": "Last total enterprise value to total revenue multiple.", + "current_debt_issued": "Alternate wording for short-term debt issuance.", + "short_term_debt_repaid": "Cash outflows from repaying short-term debt.", + "cogs": "Alias for cost_of_goods_sold covering direct production costs.", + "last_close_tev_to_ebit": "Last close total enterprise value to EBIT multiple.", + "cash_flow_from_operations": "Alternate label for operating cash flow.", + "treasury_preferred_stock_redeemable": "Variant description of treasury-held redeemable preferred stock.", + "current_assets": "Balance of assets expected to be converted to cash within one operating cycle.", + "capitalized_leases": "Alternate label for capital lease liabilities.", + "adjustments_to_cash_flow_net_income": "Adjustments reconciling net income to operating cash flow.", + "sale_of_intangible_assets": "Cash received from selling intangible assets.", + "cash_acquisitions": "Cash paid for acquisitions of businesses or assets.", + "current_portion_of_leases": "Current due amounts under lease agreements.", + "last_total_enterprise_value_to_headcount": "Last total enterprise value per headcount.", + "change_in_deferred_taxes": "Cash flow adjustment from changes in deferred tax balances.", + "other_revenue": "Miscellaneous revenue that does not fall into primary operating categories.", + "total_debt_ratio": "Alternate label for debt ratio.", + "cash_from_operations": "Operating cash flow generated during the reporting period.", + "tev_to_employees": "Total enterprise value per employees metric.", + "total_ev_to_employee_count": "Total enterprise value per employee count.", + "current_income_taxes_payable": "Current portion of income taxes owed but not yet paid (balance sheet liability).", + "net_cash_from_investments": "Net cash flow provided by investing activities overall.", + "change_in_accounts_receivable": "Operating cash flow impact from changes in accounts receivable.", + "last_close_ev_to_total_revenue": "Last close enterprise value to total revenue multiple.", + "basic_earning_per_share_excluding_extra_items": "Spelled-out variant for basic EPS excluding extraordinary items.", + "depreciation_and_amortization": "Combined depreciation and amortization expense for the period.", + "ev_to_employees": "Enterprise value per employee metric.", + "selling_general_and_admin_expense": "Combined selling, general, and administrative operating expenses.", + "operating_income": "Operating profit after subtracting operating expenses from operating revenue.", + "research_and_development_cost": "Costs associated with R&D projects and initiatives.", + "property_plant_and_equipment": "General label for property, plant, and equipment assets.", + "total_enterprise_value_to_employees": "Total enterprise value per employees metric.", + "deferred_tax_asset_current_portion": "Current portion of deferred tax assets expected to be realized within a year.", + "last_total_ev_to_employee_count": "Last total enterprise value per employee count.", + "last_ev_to_headcount": "Last enterprise value per headcount metric.", + "other_short_term_assets_of_the_finance_division": "Alternate label for finance division other short-term assets.", + "short_term_notes_receivable": "Short-term notes receivable held by the company.", + "other_current_assets_of_the_finance_division": "Other current asset categories specific to the finance division.", + "operating_expense_finance_division": "Alternate label for operating costs recorded by the finance division.", + "treasury_stock_redeemable_preferred_stock": "Alternate ordering for treasury redeemable preferred stock shares.", + "last_ev_to_employee_count": "Last enterprise value per employee count metric.", + "current_portion_of_non_current_debt_and_capital_leases": "Current amounts due for non-current debt and capital lease liabilities.", + "ppe": "Short label for the property, plant, and equipment balance.", + "net_income_allocable_to_general_partner": "Net income portion allocated to the general partner.", + "last_close_total_enterprise_value_to_employee_count": "Last close total enterprise value per employee count.", + "diluted_earning_per_share_excluding_extra_items": "Spelled-out variant for diluted EPS excluding extraordinary items.", + "tev_to_headcount": "Total enterprise value per headcount metric.", + "inventories": "Alternate label for inventory balance.", + "gppe": "Alternate abbreviation for gross property, plant, and equipment.", + "ebitda": "Earnings before interest, taxes, depreciation, and amortization.", + "impairment_of_oil_and_gas": "Alternate label for oil and gas property impairments.", + "short_term_investments": "Short-term investments readily convertible to cash.", + "redeemable_preferred_stock": "Alternate label for redeemable preferred equity.", + "total_short_term_assets": "Alternate label for the total current assets balance.", + "basic_eps_excluding_extra_items": "Basic EPS calculated excluding extraordinary items.", + "last_total_ev_to_ebitda": "Last total EV to EBITDA ratio.", + "ebt_including_unusual_items": "Earnings before taxes, including unusual or non-recurring items.", + "issuance_of_preferred_stock": "Cash inflows from issuing preferred stock.", + "tax_benefit_from_stock_options": "Tax benefit realized from employee stock option exercises.", + "foreign_exchange_rate_adjustments": "Adjustments from changes in foreign exchange rates.", + "net_income_to_common_shareholders_excluding_extra_items": "Net income to common shareholders excluding extraordinary items.", + "ev_to_employee_count": "Enterprise value per employee count ratio.", + "long_term_deferred_charges": "Deferred charges expected to provide benefits beyond one year.", + "impairment_o_and_g": "Abbreviated form for oil and gas impairment charges.", + "total_enterprise_value_to_ebit": "Total enterprise value divided by EBIT.", + "last_close_tev_to_employees": "Last close total enterprise value per employees.", + "long_term_leases": "Lease obligations with maturities extending beyond one year.", + "other_amortization": "Amortization charges not captured in major categories.", + "short_term_total_receivable": "Short-term subtotal of receivables.", + "cash_from_investing": "Cash provided by investing activities.", + "common_stock": "Par value of common stock issued and outstanding.", + "other_operating_expense": "Operating expenses not classified under standard categories.", + "normalized_basic_earning_per_share": "Spelled-out variant for normalized basic EPS.", + "foreign_exchange_adjustments": "Cumulative translation adjustments related to FX movements (CTA).", + "total_selling_general_and_admin_expense": "Aggregate SG&A expenses including all subcategories.", + "total_enterprise_value_to_earnings_before_interest_and_taxes": "Total enterprise value divided by earnings before interest and taxes.", + "d_and_a": "Abbreviated form referencing depreciation and amortization expense.", + "current_deferred_tax_asset": "Deferred tax asset amount classified as current on the balance sheet.", + "cashflow_from_investing": "Alternate spelling for cash provided by investing activities.", + "tev_to_employee_count": "Total enterprise value per employee count.", + "last_close_ev_to_ebit": "Last close EV/EBIT multiple.", + "exploration_and_drilling_costs": "Costs incurred for exploration and drilling activities in energy sectors.", + "last_close_enterprise_value_to_employee_count": "Last close enterprise value per employee.", + "total_receivables": "Aggregate receivable balance across categories.", + "total_other_operating_expense": "Aggregate of operating expenses not categorized elsewhere.", + "net_debt": "Net debt calculated as total debt minus cash and cash equivalents.", + "total_selling_general_and_admin": "Aggregate total of selling, general, and administrative expenses.", + "cash_and_cash_equivalents": "Alternate label for cash and cash equivalents.", + "cost_of_goods_sold": "Direct costs attributable to producing goods sold during the period.", + "other_current_liability": "Miscellaneous current liability category.", + "income_from_affiliates": "Equity-method income earned from affiliated companies.", + "total_enterprise_value_to_employee_count": "Total enterprise value divided by employee count.", + "diluted_earning_per_share_from_discontinued_operations": "Spelled-out variant for diluted EPS from discontinued operations.", + "stock_based_compensation": "Expense recognized for stock-based employee compensation.", + "current_portion_of_lt_debt": "Abbreviated label for the current portion of long-term debt.", + "last_total_ev_to_employees": "Last total enterprise value per employees.", + "last_close_ev_to_employees": "Last close enterprise value per employees metric.", + "basic_earning_per_share_from_extraordinary_items": "Spelled-out variant for basic EPS from extraordinary items.", + "finance_division_interest_expense": "Interest expense recognized by the finance or captive lending division.", + "earnings_from_continued_operations": "Earnings from ongoing operations excluding discontinued segments.", + "discontinued_operations_earnings": "Alternate label for earnings from discontinued operations.", + "finance_division_other_current_liabilities": "Other current liabilities associated with the finance division.", + "treasury_stock_preferred_stock_redeemable": "Treasury shares held for redeemable preferred stock classes.", + "preferred_dividends_paid": "Dividends paid to preferred shareholders.", + "finance_division_short_term_loans_and_leases": "Finance division short-term lending portfolio.", + "total_other_unusual_items": "Aggregate impact of all unusual items except those categorized separately.", + "treasury_stock": "Cost of company shares repurchased and held in treasury." + }, + "tool_name": "get_financial_line_item_from_identifiers" + }, + { + "related_parameters": [ + "line_item" + ], + "description": "List of company identifiers (ticker symbols, ISINs, CUSIPs, or company_ids). Always pass multiple identifiers in a single call when possible.", + "parameter_name": "identifiers", + "common_mistakes": [ + "Making separate API calls for each company instead of batching identifiers", + "Using incorrect ticker symbols", + "Not using array format for single identifiers" + ], + "examples": { + "['JPM', 'BAC', 'WFC']": "", + "['AAPL', 'MSFT', 'GOOGL']": "", + "['AAPL']": "" + }, + "tool_name": "get_financial_line_item_from_identifiers" + }, + { + "related_parameters": [ + "start_quarter", + "end_quarter", + "start_year", + "end_year" + ], + "description": "Specifies the aggregation frequency for the data. Supported values: 'annual', 'quarterly', and 'ytd'. Required when requesting quarterly data.", + "parameter_name": "period_type", + "common_mistakes": [ + "Forgetting to set period_type to 'quarterly' when requesting quarterly data", + "Using 'quarter' instead of 'quarterly'", + "Using 'ltm' when the query requests specific quarters (use 'quarterly' with start/end quarter instead)", + "Leaving period_type empty while providing quarter parameters" + ], + "examples": { + "quarterly": "", + "ytd": "Year-to-date aggregation matching the current fiscal year", + "annual": "" + }, + "boundary_callouts": { + "quarterly": "When the user specifies quarters (e.g., 'last three quarters', 'Q2 2025'), set period_type='quarterly' and populate both quarter parameters.", + "annual": "For multi-year spans you must still set start_year and end_year.", + "ytd": "Pair year-to-date requests with start_year equal to the referenced fiscal year; do not fall back to ltm." + }, + "tool_name": "get_financial_line_item_from_identifiers" + }, + { + "related_parameters": [ + "end_year", + "period_type" + ], + "description": "Starting year for the data range. Use calendar years, not fiscal years. Required whenever the query references a specific year or multi-year period.", + "parameter_name": "start_year", + "common_mistakes": [ + "Using fiscal years instead of calendar years", + "Not specifying when requesting historical data ranges", + "Leaving start_year null when the user asks for a specific year", + "Setting start_year after end_year in a range request" + ], + "examples": { + "2024": "", + "2020": "", + "2023": "" + }, + "boundary_callouts": "Required for any historical range. Do not leave null alongside end_year", + "tool_name": "get_financial_line_item_from_identifiers" + }, + { + "related_parameters": [ + "start_year", + "period_type" + ], + "description": "Ending year for the data range. Use calendar years, not fiscal years. Must be provided whenever start_year is set or the query references an ending year.", + "parameter_name": "end_year", + "common_mistakes": [ + "Using fiscal years instead of calendar years", + "Setting end_year before start_year", + "Leaving end_year null when the query asks for a closing year", + "Providing only end_year without start_year for historical ranges" + ], + "examples": { + "2024": "", + "2023": "" + }, + "boundary_callouts": "Whenever start_year is provided or the user cites an ending period, populate end_year.", + "tool_name": "get_financial_line_item_from_identifiers" + }, + { + "related_parameters": [ + "end_quarter", + "period_type", + "start_year" + ], + "description": "Starting quarter (1-4) when requesting quarterly data. Only used with period_type='quarterly'.", + "parameter_name": "start_quarter", + "common_mistakes": [ + "Using quarter numbers outside 1-4 range", + "Forgetting to set period_type to 'quarterly'", + "Using string values like 'Q1' instead of integer 1", + "Leaving start_quarter null when the query specifies a quarter", + "Setting start_quarter greater than end_quarter within the same year" + ], + "examples": { + "1": "", + "3": "", + "2": "", + "4": "" + }, + "boundary_callouts": "If the user references a starting quarter, set start_quarter and align start_year.", + "tool_name": "get_financial_line_item_from_identifiers" + }, + { + "related_parameters": [ + "start_quarter", + "period_type", + "end_year" + ], + "description": "Ending quarter (1-4) when requesting quarterly data. Only used with period_type='quarterly'.", + "parameter_name": "end_quarter", + "common_mistakes": [ + "Using quarter numbers outside 1-4 range", + "Setting end_quarter before start_quarter in same year", + "Using string values like 'Q4' instead of integer 4", + "Leaving end_quarter null when the query references a closing quarter", + "Using end_quarter without providing end_year" + ], + "examples": { + "1": "", + "3": "", + "2": "", + "4": "" + }, + "boundary_callouts": "Always pair end_quarter with end_year and ensure it is populated when the query ends on a specific quarter.", + "tool_name": "get_financial_line_item_from_identifiers" + } + ] + } + ], + "description": "Parameter descriptors for company financials tools - specifically financial line items", + "dataset": "company_financials" +} \ No newline at end of file diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/company_intelligence_params.json b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/company_intelligence_params.json new file mode 100644 index 0000000..94633f4 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/company_intelligence_params.json @@ -0,0 +1,89 @@ +{ + "description": "Parameter descriptors for company information tools", + "tools": [ + { + "tool_name": "get_info_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_info_from_identifiers", + "description": "List of company identifiers for basic company information including name, status, industry, employees, and headquarters.", + "examples": { + "['AAPL']": "", + "['MSFT', 'GOOGL', 'AMZN']": "", + "['TSLA']": "" + }, + "common_mistakes": [ + "Making separate calls for each company instead of batching", + "Using this tool for detailed business descriptions (use get_company_description instead)", + "Using this for financial data (use financial statement tools instead)" + ], + "related_parameters": [] + } + ] + }, + { + "tool_name": "get_company_summary_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_company_summary_from_identifiers", + "description": "List of company identifiers for one-paragraph business summaries including primary business, products, services, and markets.", + "examples": { + "['NFLX']": "", + "['JPM', 'BAC']": "", + "['F']": "" + }, + "common_mistakes": [ + "Using this for detailed multi-section descriptions (use get_company_description instead)", + "Using this for basic company facts like headquarters (use get_info_from_identifiers instead)", + "Expecting detailed segment breakdowns (summary provides overview only)" + ], + "related_parameters": [] + } + ] + }, + { + "tool_name": "get_company_description_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_company_description_from_identifiers", + "description": "List of company identifiers for detailed, multi-section descriptions including segments, competition, history, and significant events.", + "examples": { + "['AAPL']": "", + "['AMZN']": "", + "['MSFT', 'GOOGL']": "" + }, + "common_mistakes": [ + "Using this for simple business overviews (use get_company_summary instead)", + "Using this for basic company facts (use get_info_from_identifiers instead)", + "Expecting financial data (use financial statement tools instead)" + ], + "related_parameters": [] + } + ] + }, + { + "tool_name": "get_company_other_names_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_company_other_names_from_identifiers", + "description": "List of company identifiers for alternate names, historical names, and native language names.", + "examples": { + "['GOOGL']": "", + "['META']": "", + "['TM', '005930.KS']": "" + }, + "common_mistakes": [ + "Using this for business information (use other company tools instead)", + "Using this for current company name (use get_info_from_identifiers instead)", + "Expecting detailed company descriptions (this is only for name variations)" + ], + "related_parameters": [] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/competitors_params.json b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/competitors_params.json new file mode 100644 index 0000000..96feb8f --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/competitors_params.json @@ -0,0 +1,50 @@ +{ + "description": "Parameter descriptors for competitor queries with focus on competitor source disambiguation", + "tools": [ + { + "tool_name": "get_competitors_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_competitors_from_identifiers", + "description": "List of company identifiers to find competitors for. Always batch multiple identifiers when possible.", + "examples": { + "['AAPL']": "", + "['TSLA']": "", + "['AAPL', 'MSFT', 'GOOGL']": "" + }, + "common_mistakes": [ + "Making separate API calls for each company instead of batching", + "Using incorrect ticker symbols", + "Not using array format for single identifiers", + "Expecting comprehensive competitor lists without specifying source" + ], + "related_parameters": ["competitor_source"] + }, + { + "parameter_name": "competitor_source", + "tool_name": "get_competitors_from_identifiers", + "description": "The source type of competitor information. Must be exact source names from the system.", + "examples": { + "filing": "Competitors mentioned in SEC filings and regulatory documents", + "key_dev": "Competitors identified through key developments and news events", + "contact": "Competitors identified through contact and relationship data", + "third_party": "Competitors identified by third-party research and analysis", + "self_identified": "Competitors that the company identifies as its own competitors", + "named_by_competitor": "Companies that have named this company as their competitor" + }, + "common_mistakes": [ + "Using 'sec_filing' instead of 'filing'", + "Using 'self_reported' instead of 'self_identified'", + "Using 'third_party_source' instead of 'third_party'", + "Using 'key_development' instead of 'key_dev'", + "Using 'competitor_named' instead of 'named_by_competitor'", + "Not specifying competitor_source (this parameter is required)", + "Using multiple sources in one call (make separate calls for different sources)" + ], + "related_parameters": ["identifiers"] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/earnings_params.json b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/earnings_params.json new file mode 100644 index 0000000..107b58b --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/earnings_params.json @@ -0,0 +1,148 @@ +{ + "description": "Parameter descriptors for earnings tools with temporal disambiguation", + "tools": [ + { + "tool_name": "get_earnings_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_earnings_from_identifiers", + "description": "List of company identifiers for complete earnings announcement history. Returns ALL earnings announcements (past and future).", + "examples": { + "['AAPL']": "", + "['MSFT', 'GOOGL']": "", + "['TSLA']": "" + }, + "common_mistakes": [ + "Using this when you only want latest earnings (use get_latest_earnings_from_identifiers instead)", + "Using this when you only want next earnings (use get_next_earnings_from_identifiers instead)", + "Expecting only recent earnings (this returns complete history)" + ], + "related_parameters": [] + } + ] + }, + { + "tool_name": "get_latest_earnings_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_latest_earnings_from_identifiers", + "description": "List of company identifiers for the most recent earnings announcement only. Use for 'latest', 'most recent', or 'last' earnings questions.", + "examples": { + "['AAPL']": "", + "['AMZN', 'NFLX']": "", + "['MSFT']": "" + }, + "common_mistakes": [ + "Using this for future earnings (use get_next_earnings_from_identifiers instead)", + "Using this for complete history (use get_earnings_from_identifiers instead)", + "Confusing 'latest' (past) with 'next' (future)" + ], + "related_parameters": [] + } + ] + }, + { + "tool_name": "get_next_earnings_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_next_earnings_from_identifiers", + "description": "List of company identifiers for upcoming/future earnings announcements. Use for 'next', 'upcoming', or 'scheduled' earnings questions.", + "examples": { + "['AAPL']": "", + "['TSLA', 'F']": "", + "['GOOGL']": "", + "['JPM']": "" + }, + "common_mistakes": [ + "Using this for past earnings (use get_latest_earnings_from_identifiers instead)", + "Using this for complete history (use get_earnings_from_identifiers instead)", + "Confusing 'next' (future) with 'latest' (past)" + ], + "related_parameters": [] + } + ] + }, + { + "tool_name": "get_earnings_call_datetimes_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_earnings_call_datetimes_from_identifiers", + "description": "List of company identifiers (tickers, ISINs, etc.) to retrieve earnings call timestamps for. Batch multiple identifiers when comparing call schedules across peers.", + "examples": { + "['AAPL']": "", + "['AAPL', 'MSFT']": "" + }, + "common_mistakes": [ + "Omitting identifiers entirelyโ€”this tool requires at least one company", + "Supplying key_dev_id instead of identifiers (use transcripts tool for key_dev_id)", + "Making separate calls instead of batching related companies" + ], + "related_parameters": [ + "call_type", + "start_date", + "end_date" + ] + }, + { + "parameter_name": "call_type", + "tool_name": "get_earnings_call_datetimes_from_identifiers", + "description": "Type of call to retrieve. For standard earnings calls use 'earnings_call'. Other types include 'investor_day' or 'conference_call' when available.", + "examples": { + "earnings_call": "Standard quarterly earnings call", + "investor_day": "Investor day presentations" + }, + "common_mistakes": [ + "Leaving call_type blank (defaults vary and may cause description errors)", + "Using natural language like 'earnings' or 'call' instead of the canonical option", + "Requesting transcript content (use transcripts tools)" + ], + "related_parameters": [ + "identifiers", + "start_date", + "end_date" + ] + }, + { + "parameter_name": "start_date", + "tool_name": "get_earnings_call_datetimes_from_identifiers", + "description": "Lower bound (inclusive) for call schedule queries. Use YYYY-MM-DD. Required when the user references specific historic windows (e.g., 'Q4 2023').", + "examples": { + "2023-10-01": "Quarter start for Q4 2023", + "2024-01-01": "Start of fiscal year 2024" + }, + "common_mistakes": [ + "Leaving start_date empty while providing end_date, causing mismatched ranges", + "Using natural language like 'last quarter' instead of explicit date", + "Setting start_date after end_date" + ], + "related_parameters": [ + "end_date", + "identifiers" + ] + }, + { + "parameter_name": "end_date", + "tool_name": "get_earnings_call_datetimes_from_identifiers", + "description": "Upper bound (inclusive) for call schedule queries. Use YYYY-MM-DD. Pair with start_date to cover the period requested.", + "examples": { + "2023-12-31": "Quarter end for Q4 2023", + "2024-03-31": "Quarter end for Q1 2024" + }, + "common_mistakes": [ + "Leaving end_date empty when start_date is provided", + "Setting end_date before start_date", + "Using ambiguous phrases like 'quarter end' without exact dates" + ], + "related_parameters": [ + "start_date", + "identifiers" + ] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/id_params.json b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/id_params.json new file mode 100644 index 0000000..e5e7d26 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/id_params.json @@ -0,0 +1,169 @@ +{ + "description": "Parameter descriptors for ID-related tools: capitalization, security identifiers, and utility tools", + "tools": [ + { + "tool_name": "get_capitalization_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_capitalization_from_identifiers", + "description": "List of company identifiers for capitalization metrics. Always batch multiple identifiers when possible.", + "examples": { + "['AAPL']": "", + "['AAPL', 'MSFT']": "", + "['TSLA']": "", + "['GOOGL', 'META', 'AMZN']": "" + }, + "common_mistakes": [ + "Making separate API calls for each company instead of batching", + "Using incorrect ticker symbols", + "Not using array format for single identifiers", + "Expecting this to work for private companies without public market data" + ], + "related_parameters": ["capitalization", "start_date", "end_date"] + }, + { + "parameter_name": "capitalization", + "tool_name": "get_capitalization_from_identifiers", + "description": "The type of capitalization metric to retrieve. Must be exact parameter names.", + "examples": { + "market_cap": "Market capitalization (share price ร— shares outstanding)", + "tev": "Total enterprise value (market cap + debt - cash)", + "shares_outstanding": "Total number of shares currently outstanding" + }, + "common_mistakes": [ + "Using 'market_capitalization' instead of 'market_cap'", + "Using 'enterprise_value' or 'ev' instead of 'tev'", + "Using 'shares' or 'outstanding_shares' instead of 'shares_outstanding'", + "Using 'market_value' instead of 'market_cap'", + "Not specifying capitalization parameter (this is required)" + ], + "related_parameters": ["identifiers", "start_date", "end_date"] + }, + { + "parameter_name": "start_date", + "tool_name": "get_capitalization_from_identifiers", + "description": "Starting date for historical capitalization retrieval in YYYY-MM-DD format. Leave empty for most recent values.", + "examples": { + "2023-01-01": "", + "2022-12-31": "", + "2023-06-30": "" + }, + "common_mistakes": [ + "Using incorrect date format (should be YYYY-MM-DD)", + "Setting start_date without end_date for ranges", + "Using future dates for historical data", + "Using relative terms instead of specific dates" + ], + "related_parameters": ["end_date", "capitalization"] + }, + { + "parameter_name": "end_date", + "tool_name": "get_capitalization_from_identifiers", + "description": "Ending date for historical capitalization retrieval in YYYY-MM-DD format. Must be >= start_date.", + "examples": { + "2023-12-31": "", + "2023-06-30": "", + "2023-09-30": "" + }, + "common_mistakes": [ + "Using incorrect date format (should be YYYY-MM-DD)", + "Setting end_date before start_date", + "Using future dates beyond current date", + "Not specifying end_date when start_date is provided" + ], + "related_parameters": ["start_date", "capitalization"] + } + ] + }, + { + "tool_name": "get_cusip_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_cusip_from_identifiers", + "description": "List of company identifiers to get CUSIP identifiers for. Always batch multiple identifiers when possible.", + "examples": { + "['AAPL']": "", + "['MSFT']": "", + "['AAPL', 'MSFT', 'GOOGL']": "" + }, + "common_mistakes": [ + "Making separate API calls for each company instead of batching", + "Using incorrect ticker symbols", + "Not using array format for single identifiers", + "Expecting this to work for non-US securities (CUSIP is US-specific)" + ], + "related_parameters": [] + } + ] + }, + { + "tool_name": "get_isin_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_isin_from_identifiers", + "description": "List of company identifiers to get ISIN identifiers for. Always batch multiple identifiers when possible.", + "examples": { + "['AAPL']": "", + "['MSFT']": "", + "['AAPL', 'MSFT', 'GOOGL']": "" + }, + "common_mistakes": [ + "Making separate API calls for each company instead of batching", + "Using incorrect ticker symbols", + "Not using array format for single identifiers", + "Confusing ISIN with CUSIP (ISIN is international, CUSIP is US-specific)" + ], + "related_parameters": [] + } + ] + }, + { + "tool_name": "get_latest", + "parameters": [ + { + "parameter_name": "use_local_timezone", + "tool_name": "get_latest", + "description": "Whether to use the local timezone of the user instead of UTC. Optional parameter, defaults to false (UTC).", + "examples": { + "true": "Use the user's local timezone for date/time calculations", + "false": "Use UTC (Coordinated Universal Time) for date/time calculations" + }, + "common_mistakes": [ + "Using string values 'true'/'false' instead of boolean true/false", + "Not understanding the difference between local timezone and UTC", + "Assuming the tool always returns local time (defaults to UTC)", + "Using this parameter when timezone doesn't matter for the use case" + ], + "related_parameters": [] + } + ] + }, + { + "tool_name": "get_n_quarters_ago", + "parameters": [ + { + "parameter_name": "n", + "tool_name": "get_n_quarters_ago", + "description": "Number of quarters before the current quarter to calculate. Must be a positive integer.", + "examples": { + "1": "", + "2": "", + "4": "", + "6": "" + }, + "common_mistakes": [ + "Using negative numbers (n must be positive)", + "Using 0 (use 1 for previous quarter)", + "Using string values instead of integers", + "Using decimal values instead of whole numbers", + "Not understanding that n=4 means 1 year ago, n=8 means 2 years ago" + ], + "related_parameters": [] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/mergers_params.json b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/mergers_params.json new file mode 100644 index 0000000..fd83ce2 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/mergers_params.json @@ -0,0 +1,88 @@ +{ + "description": "Parameter descriptors for mergers and acquisitions tools", + "tools": [ + { + "tool_name": "get_mergers_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_mergers_from_identifiers", + "description": "List of company identifiers to find all M&A transactions where the company was involved as buyer, seller, or target. Returns transaction_ids for use with other M&A tools.", + "examples": { + "['MSFT']": "", + "['BJ']": "", + "['PFE']": "", + "['AAPL', 'GOOGL']": "" + }, + "common_mistakes": [ + "Using this tool to get detailed transaction information (use get_merger_info_from_transaction_id instead)", + "Not using the returned transaction_ids with other M&A tools", + "Expecting financial details (this tool only provides transaction_ids and basic info)", + "Making separate calls for each company instead of batching" + ], + "related_parameters": [] + } + ] + }, + { + "tool_name": "get_merger_info_from_transaction_id", + "parameters": [ + { + "parameter_name": "transaction_id", + "tool_name": "get_merger_info_from_transaction_id", + "description": "The specific transaction ID for comprehensive M&A transaction information. Must be obtained from get_mergers_from_identifiers first.", + "examples": { + "67890": "", + "11111": "", + "12345": "" + }, + "common_mistakes": [ + "Using company identifier instead of transaction_id", + "Not finding transaction_id first using get_mergers_from_identifiers", + "Expecting this to work without a valid transaction_id", + "Guessing transaction_id values instead of looking them up" + ], + "related_parameters": [] + } + ] + }, + { + "tool_name": "get_advisors_for_company_in_transaction_from_identifier", + "parameters": [ + { + "parameter_name": "identifier", + "tool_name": "get_advisors_for_company_in_transaction_from_identifier", + "description": "Single company identifier (not array) for the company whose advisors you want to find in a specific transaction.", + "examples": { + "SPGI": "", + "BJ": "", + "VOD": "" + }, + "common_mistakes": [ + "Using array format instead of single identifier", + "Not providing transaction_id (required parameter)", + "Using this without first finding the transaction_id from get_mergers_from_identifiers" + ], + "related_parameters": ["transaction_id"] + }, + { + "parameter_name": "transaction_id", + "tool_name": "get_advisors_for_company_in_transaction_from_identifier", + "description": "The specific transaction ID for the M&A deal. Must be obtained from get_mergers_from_identifiers first.", + "examples": { + "12345": "", + "67890": "", + "11111": "" + }, + "common_mistakes": [ + "Not providing transaction_id (this parameter is required)", + "Using company name instead of transaction_id", + "Guessing transaction_id instead of looking it up first", + "Using transaction_id from a different company's transaction" + ], + "related_parameters": ["identifier"] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/pricing_params.json b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/pricing_params.json new file mode 100644 index 0000000..6f16eb4 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/pricing_params.json @@ -0,0 +1,101 @@ +{ + "description": "Parameter descriptors for stock price queries with focus on date ranges, periodicity, and adjustment options", + "tools": [ + { + "tool_name": "get_prices_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_prices_from_identifiers", + "description": "List of company identifiers (ticker symbols, ISINs, CUSIPs, or company_ids). Always batch multiple identifiers in a single call when possible.", + "examples": { + "['AAPL']": "", + "['AAPL', 'MSFT']": "", + "['GOOGL', 'META', 'AMZN']": "", + "['TSLA']": "", + "['JPM', 'BAC', 'WFC']": "" + }, + "common_mistakes": [ + "Making separate API calls for each company instead of batching identifiers", + "Using incorrect ticker symbols", + "Not using array format for single identifiers", + "Mixing identifier types inconsistently" + ], + "related_parameters": ["start_date", "end_date", "periodicity", "adjusted"] + }, + { + "parameter_name": "start_date", + "tool_name": "get_prices_from_identifiers", + "description": "Starting date for historical price retrieval in YYYY-MM-DD format. Leave empty for most recent prices.", + "examples": { + "2023-01-01": "", + "2023-10-01": "", + "2022-11-01": "", + "2023-06-15": "" + }, + "common_mistakes": [ + "Using incorrect date format (should be YYYY-MM-DD)", + "Using relative terms like 'last month' instead of specific dates", + "Setting start_date without end_date for ranges", + "Using future dates for historical data" + ], + "boundary_callouts": "Always pair start_date with an end_date for ranged questions. Leave both empty only when the user explicitly asks for the latest quote.", + "related_parameters": ["end_date", "periodicity"] + }, + { + "parameter_name": "end_date", + "tool_name": "get_prices_from_identifiers", + "description": "Ending date for historical price retrieval in YYYY-MM-DD format. Must be >= start_date. Leave empty for most recent prices.", + "examples": { + "2023-12-31": "", + "2023-10-31": "", + "2023-10-27": "", + "2023-06-15": "" + }, + "common_mistakes": [ + "Using incorrect date format (should be YYYY-MM-DD)", + "Setting end_date before start_date", + "Using future dates beyond current date", + "Not specifying end_date when start_date is provided" + ], + "boundary_callouts": "The end_date should bracket the last event referenced in the question. See failures 201, 203, 208 where the window was truncated.", + "related_parameters": ["start_date", "periodicity"] + }, + { + "parameter_name": "periodicity", + "tool_name": "get_prices_from_identifiers", + "description": "The frequency at which data points are sampled or aggregated. Controls data granularity, not date range.", + "examples": { + "daily": "One data point per trading day (highest granularity)", + "weekly": "One data point per week (typically Friday close or week-end)", + "monthly": "One data point per month (typically month-end close)" + }, + "common_mistakes": [ + "Confusing periodicity with date range (periodicity is frequency, not time span)", + "Using 'day' instead of 'daily'", + "Using 'week' instead of 'weekly'", + "Using 'month' instead of 'monthly'", + "Not specifying periodicity for long time ranges (defaults to daily)" + ], + "related_parameters": ["start_date", "end_date"] + }, + { + "parameter_name": "adjusted", + "tool_name": "get_prices_from_identifiers", + "description": "Whether to retrieve adjusted prices that account for corporate actions such as dividends and splits. True for adjusted, false for raw historical prices.", + "examples": { + "true": "Adjusted prices accounting for dividends, splits, and other corporate actions", + "false": "Raw historical prices as they were traded (not adjusted for corporate actions)" + }, + "common_mistakes": [ + "Using string values 'true'/'false' instead of boolean true/false", + "Not understanding the difference between adjusted and unadjusted prices", + "Using adjusted=false for historical analysis (adjusted is usually preferred)", + "Not specifying when corporate actions are relevant" + ], + "related_parameters": ["start_date", "end_date"] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/relationship_params.json b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/relationship_params.json new file mode 100644 index 0000000..f206a77 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/relationship_params.json @@ -0,0 +1,53 @@ +{ + "description": "Parameter descriptors for business relationship queries with relationship type disambiguation", + "tools": [ + { + "tool_name": "get_business_relationship_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_business_relationship_from_identifiers", + "description": "List of company identifiers to find companies with specific business relationships. Always batch multiple identifiers when possible.", + "examples": { + "['JPM']": "", + "['AAPL']": "", + "['MSFT']": "", + "['AAPL', 'MSFT', 'GOOGL']": "" + }, + "common_mistakes": [ + "Making separate API calls for each company instead of batching", + "Using incorrect ticker symbols", + "Not using array format for single identifiers", + "Expecting financial data (this tool only provides relationship information)" + ], + "related_parameters": ["business_relationship"] + }, + { + "parameter_name": "business_relationship", + "tool_name": "get_business_relationship_from_identifiers", + "description": "The type of business relationship to search for. Must match exact relationship types from the system.", + "examples": { + "borrower": "Companies that have borrowed money or received financing", + "supplier": "Companies that provide goods or services to the queried company", + "customer": "Companies that purchase goods or services from the queried company", + "lender": "Companies that have provided loans or financing to others", + "partner": "Companies with strategic partnerships or joint ventures", + "distributor": "Companies that distribute or resell the queried company's products", + "vendor": "Companies that provide supplies, services, or components", + "client": "Companies that are clients or customers for professional services" + }, + "common_mistakes": [ + "Using plural forms like 'suppliers' instead of 'supplier'", + "Using 'vendors' instead of 'vendor'", + "Using 'customers' instead of 'customer'", + "Using 'borrowers' instead of 'borrower'", + "Using generic terms like 'business_partner' instead of 'partner'", + "Not using exact relationship type names from the system", + "Using synonyms instead of exact parameter values" + ], + "related_parameters": ["identifiers"] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/segments_params.json b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/segments_params.json new file mode 100644 index 0000000..59e764e --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/segments_params.json @@ -0,0 +1,139 @@ +{ + "description": "Parameter descriptors for company segment queries with segment type and time period disambiguation", + "tools": [ + { + "tool_name": "get_segments_from_identifiers", + "parameters": [ + { + "parameter_name": "identifiers", + "tool_name": "get_segments_from_identifiers", + "description": "List of company identifiers for segment information. Always batch multiple identifiers when possible.", + "examples": { + "['AAPL']": "", + "['KO']": "", + "['AMZN']": "", + "['AAPL', 'MSFT', 'GOOGL']": "" + }, + "common_mistakes": [ + "Making separate API calls for each company instead of batching", + "Using incorrect ticker symbols", + "Not using array format for single identifiers", + "Expecting this to work for companies without segment reporting" + ], + "related_parameters": ["segment_type", "period_type", "start_year", "end_year"] + }, + { + "parameter_name": "segment_type", + "tool_name": "get_segments_from_identifiers", + "description": "The type of segment breakdown to retrieve. Must match available segment types in the system.", + "examples": { + "business": "Business unit or division segments (e.g., iPhone, Mac, Services for Apple)", + "geographic": "Geographic or regional segments (e.g., Americas, Europe, Asia-Pacific)", + "product": "Product line segments (e.g., different product categories or brands)", + "service": "Service-based segments (e.g., cloud services, consulting, support)", + "revenue": "Revenue-based segment breakdown (e.g., subscription vs one-time revenue)", + "operating": "Operating segment divisions (e.g., reportable operating segments per GAAP)" + }, + "common_mistakes": [ + "Using 'geographical' instead of 'geographic'", + "Using 'business_segment' instead of 'business'", + "Using 'product_segment' instead of 'product'", + "Using 'regional' instead of 'geographic'", + "Not specifying segment_type (this parameter is required)", + "Using segment types not available for the specific company" + ], + "related_parameters": ["identifiers", "period_type"] + }, + { + "parameter_name": "period_type", + "tool_name": "get_segments_from_identifiers", + "description": "Specifies whether to retrieve annual or quarterly segment data. Required when requesting quarterly data.", + "examples": { + "annual": "", + "quarterly": "" + }, + "common_mistakes": [ + "Forgetting to set period_type to 'quarterly' when requesting quarterly data", + "Using 'quarter' instead of 'quarterly'", + "Using 'yearly' instead of 'annual'", + "Not specifying period_type when using quarter parameters" + ], + "related_parameters": ["start_quarter", "end_quarter", "start_year", "end_year"] + }, + { + "parameter_name": "start_year", + "tool_name": "get_segments_from_identifiers", + "description": "Starting year for the segment data range. Use calendar years. Required whenever the question references a specific year or range.", + "examples": { + "2020": "", + "2022": "", + "2023": "" + }, + "common_mistakes": [ + "Using fiscal years instead of calendar years", + "Using string format instead of integer", + "Not specifying when requesting historical ranges", + "Setting start_year without end_year for ranges", + "Leaving start_year null when the user asks for a single explicit year" + ], + "related_parameters": ["end_year", "period_type", "start_quarter"] + }, + { + "parameter_name": "end_year", + "tool_name": "get_segments_from_identifiers", + "description": "Ending year for the segment data range. Use calendar years. Must be >= start_year.", + "examples": { + "2023": "", + "2024": "" + }, + "common_mistakes": [ + "Using fiscal years instead of calendar years", + "Setting end_year before start_year", + "Using string format instead of integer", + "Not specifying end_year when start_year is provided", + "Leaving end_year null when the user asks for an ending year" + ], + "related_parameters": ["start_year", "period_type", "end_quarter"] + }, + { + "parameter_name": "start_quarter", + "tool_name": "get_segments_from_identifiers", + "description": "Starting quarter (1-4) when requesting quarterly segment data. Only used with period_type='quarterly' and required whenever the question references a specific start quarter.", + "examples": { + "1": "", + "2": "", + "3": "", + "4": "" + }, + "common_mistakes": [ + "Using quarter numbers outside 1-4 range", + "Forgetting to set period_type to 'quarterly'", + "Using string values like 'Q1' instead of integer 1", + "Using 0-based indexing (0-3) instead of 1-based (1-4)", + "Leaving start_quarter empty when the query mentions a specific quarter" + ], + "related_parameters": ["end_quarter", "period_type", "start_year"] + }, + { + "parameter_name": "end_quarter", + "tool_name": "get_segments_from_identifiers", + "description": "Ending quarter (1-4) when requesting quarterly segment data. Only used with period_type='quarterly'.", + "examples": { + "1": "", + "2": "", + "3": "", + "4": "" + }, + "common_mistakes": [ + "Using quarter numbers outside 1-4 range", + "Setting end_quarter before start_quarter in same year", + "Using string values like 'Q4' instead of integer 4", + "Not specifying end_quarter when start_quarter is provided", + "Leaving end_quarter null when the question references an ending quarter" + ], + "related_parameters": ["start_quarter", "period_type", "end_year"] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/statements_params.json b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/statements_params.json new file mode 100644 index 0000000..c6f2b8d --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/statements_params.json @@ -0,0 +1,122 @@ +{ + "description": "Parameter descriptors for financial statements tools (excluding line items)", + "tools": [ + { + "tool_name": "get_financial_statement_from_identifiers", + "parameters": [ + { + "parameter_name": "statement", + "tool_name": "get_financial_statement_from_identifiers", + "description": "The type of financial statement to retrieve. Must be one of the three main financial statements.", + "examples": { + "balance_sheet": "Statement of financial position showing assets, liabilities, and equity at a point in time", + "income_statement": "Statement of earnings showing revenues, expenses, and profit over a period", + "cash_flow_statement": "Statement showing cash inflows and outflows from operating, investing, and financing activities" + }, + "common_mistakes": [ + "Using 'balance_sheet_statement' instead of 'balance_sheet'", + "Using 'profit_and_loss' or 'p_and_l' instead of 'income_statement'", + "Using 'cash_flows' or 'statement_of_cash_flows' instead of 'cash_flow_statement'" + ], + "related_parameters": ["period_type", "start_year", "end_year"] + }, + { + "parameter_name": "identifiers", + "tool_name": "get_financial_statement_from_identifiers", + "description": "List of company identifiers (ticker symbols, ISINs, CUSIPs, or company_ids). Always batch multiple companies in a single call when possible.", + "examples": { + "['AAPL']": "", + "['MSFT', 'GOOGL']": "", + "['JPM', 'BAC', 'WFC']": "" + }, + "common_mistakes": [ + "Making separate API calls for each company instead of batching", + "Using incorrect ticker symbols", + "Not using array format for single identifiers" + ], + "related_parameters": ["statement"] + }, + { + "parameter_name": "period_type", + "tool_name": "get_financial_statement_from_identifiers", + "description": "Specifies whether to retrieve annual or quarterly statements. Required when requesting quarterly data.", + "examples": { + "annual": "", + "quarterly": "" + }, + "common_mistakes": [ + "Forgetting to set period_type to 'quarterly' when requesting quarterly statements", + "Using 'quarter' instead of 'quarterly'", + "Using 'yearly' instead of 'annual'" + ], + "related_parameters": ["start_quarter", "end_quarter", "start_year", "end_year"] + }, + { + "parameter_name": "start_year", + "tool_name": "get_financial_statement_from_identifiers", + "description": "Starting year for the data range. Use calendar years. Leave empty for most recent data.", + "examples": { + "2020": "", + "2022": "", + "2023": "" + }, + "common_mistakes": [ + "Using fiscal years instead of calendar years", + "Using string format instead of integer", + "Not specifying when requesting historical ranges" + ], + "related_parameters": ["end_year", "period_type"] + }, + { + "parameter_name": "end_year", + "tool_name": "get_financial_statement_from_identifiers", + "description": "Ending year for the data range. Use calendar years. Must be >= start_year.", + "examples": { + "2023": "", + "2024": "" + }, + "common_mistakes": [ + "Using fiscal years instead of calendar years", + "Setting end_year before start_year", + "Using string format instead of integer" + ], + "related_parameters": ["start_year", "period_type"] + }, + { + "parameter_name": "start_quarter", + "tool_name": "get_financial_statement_from_identifiers", + "description": "Starting quarter (1-4) when requesting quarterly statements. Only used with period_type='quarterly'.", + "examples": { + "1": "", + "2": "", + "3": "", + "4": "" + }, + "common_mistakes": [ + "Using quarter numbers outside 1-4 range", + "Forgetting to set period_type to 'quarterly'", + "Using string values like 'Q1' instead of integer 1" + ], + "related_parameters": ["end_quarter", "period_type", "start_year"] + }, + { + "parameter_name": "end_quarter", + "tool_name": "get_financial_statement_from_identifiers", + "description": "Ending quarter (1-4) when requesting quarterly statements. Only used with period_type='quarterly'.", + "examples": { + "1": "", + "2": "", + "3": "", + "4": "" + }, + "common_mistakes": [ + "Using quarter numbers outside 1-4 range", + "Setting end_quarter before start_quarter in same year", + "Using string values like 'Q4' instead of integer 4" + ], + "related_parameters": ["start_quarter", "period_type", "end_year"] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/transcripts_params.json b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/transcripts_params.json new file mode 100644 index 0000000..bfbdd21 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/parameter_descriptors/transcripts_params.json @@ -0,0 +1,29 @@ +{ + "description": "Parameter descriptors for transcript access tools", + "tools": [ + { + "tool_name": "get_transcript_from_key_dev_id", + "parameters": [ + { + "parameter_name": "key_dev_id", + "tool_name": "get_transcript_from_key_dev_id", + "description": "The key development ID for the earnings call transcript. Must be obtained from earnings tools first (get_earnings_from_identifiers, get_latest_earnings_from_identifiers, or get_next_earnings_from_identifiers).", + "examples": { + "12345": "", + "67890": "", + "54321": "" + }, + "common_mistakes": [ + "Using company identifier instead of key_dev_id", + "Using ticker symbol instead of key_dev_id", + "Not obtaining key_dev_id from earnings tools first", + "Guessing key_dev_id values instead of looking them up", + "Using string format instead of integer for key_dev_id", + "Expecting this to work without a valid key_dev_id" + ], + "related_parameters": [] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/processing/__init__.py b/kfinance/integrations/tool_calling/dynamic_prompts/processing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/processing/entities.py b/kfinance/integrations/tool_calling/dynamic_prompts/processing/entities.py new file mode 100644 index 0000000..82d560f --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/processing/entities.py @@ -0,0 +1,748 @@ +# -*- coding: utf-8 -*- +"""Unified entity processing system combining NER detection and normalization.""" + +from dataclasses import dataclass +import logging +import re +from typing import Any, Dict, List, Optional, Set, Tuple + + +logger = logging.getLogger(__name__) + +# Try to import spaCy for NER, with fallback +try: + import spacy + + SPACY_AVAILABLE = True +except ImportError: + SPACY_AVAILABLE = False + logger.warning("spaCy not available - entity normalization disabled") + + +@dataclass +class EntityMatch: + """Represents a detected entity match.""" + + text: str + start: int + end: int + entity_type: str + placeholder: str + + +class EntityProcessor: + """Unified entity detection and normalization processor.""" + + def __init__(self) -> None: + """Initialize the entity processor.""" + # Initialize spaCy NER + self.nlp: Optional[Any] = None + self._init_spacy_model() + + # Initialize legacy patterns for backward compatibility + self._init_legacy_patterns() + + def _init_spacy_model(self) -> None: + """Initialize spaCy NER model if available.""" + if not SPACY_AVAILABLE: + return + + try: + # Try to load English model + self.nlp = spacy.load("en_core_web_sm") + logger.debug("Loaded spaCy English model for NER") + except OSError: + try: + # Fallback to smaller model + self.nlp = spacy.load("en_core_web_md") + logger.debug("Loaded spaCy medium English model for NER") + except OSError: + logger.warning("No spaCy English model found - entity normalization disabled") + self.nlp = None + + def _init_legacy_patterns(self) -> None: + """Initialize legacy company patterns for backward compatibility.""" + self.legacy_company_patterns = { + # Tech companies + "apple": ["apple", "aapl", "apple inc", "apple computer"], + "microsoft": ["microsoft", "msft", "microsoft corp", "microsoft corporation"], + "google": ["google", "googl", "alphabet", "alphabet inc"], + "amazon": ["amazon", "amzn", "amazon.com", "amazon inc"], + "meta": ["meta", "facebook", "fb", "meta platforms"], + "tesla": ["tesla", "tsla", "tesla inc", "tesla motors"], + "netflix": ["netflix", "nflx", "netflix inc"], + # Financial companies + "jpmorgan": ["jpmorgan", "jpm", "jpmorgan chase", "jp morgan"], + "goldman_sachs": ["goldman sachs", "gs", "goldman sachs group"], + "morgan_stanley": ["morgan stanley", "ms", "morgan stanley & co"], + "bank_of_america": ["bank of america", "bac", "bofa"], + "wells_fargo": ["wells fargo", "wfc", "wells fargo & company"], + "citigroup": ["citigroup", "c", "citi", "citicorp"], + # Other major companies + "berkshire_hathaway": ["berkshire hathaway", "brk.a", "brk.b", "berkshire"], + "coca_cola": ["coca cola", "ko", "coca-cola", "coke"], + "walmart": ["walmart", "wmt", "wal-mart"], + "disney": ["disney", "dis", "walt disney"], + "general_electric": ["general electric", "ge", "ge company"], + "exxon": ["exxon", "xom", "exxon mobil", "exxonmobil"], + "pfizer": ["pfizer", "pfe", "pfizer inc"], + # International companies + "toyota": ["toyota", "tm", "toyota motor"], + "samsung": ["samsung", "005930.ks", "samsung electronics"], + "nestle": ["nestle", "nsrgy", "nestle sa"], + } + + # Create reverse mapping for legacy patterns + self.legacy_entity_to_placeholder = {} + self.legacy_placeholder_to_entities = {} + + # Create generic placeholders (COMPANY_A, COMPANY_B, etc.) for legacy patterns + placeholder_letters = [ + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + ] + + for i, (placeholder, variations) in enumerate(self.legacy_company_patterns.items()): + if i < len(placeholder_letters): + placeholder_key = f"" + self.legacy_placeholder_to_entities[placeholder_key] = variations + for variation in variations: + self.legacy_entity_to_placeholder[variation.lower()] = placeholder_key + + def process_query(self, query: str) -> Tuple[str, Dict[str, str]]: + """Main entry point: detect and normalize entities in query. + + Args: + query: Original query string + + Returns: + Tuple of (normalized_query, entity_mapping) + """ + # First, handle existing old-style placeholders (convert them to new format) + normalized_query = query.lower() + entity_mapping = {} + + old_placeholder_pattern = r"" + old_placeholders = re.findall(old_placeholder_pattern, normalized_query) + for old_company_name in old_placeholders: + # Find the new placeholder for this company using legacy patterns + if old_company_name in self.legacy_entity_to_placeholder: + new_placeholder = self.legacy_entity_to_placeholder[old_company_name] + old_pattern = f"" + normalized_query = normalized_query.replace(old_pattern, new_placeholder.lower()) + entity_mapping[new_placeholder] = old_company_name + + # If no old placeholders found, use the advanced entity detection + if not old_placeholders: + try: + normalized_query, entity_mapping = self._detect_and_normalize(query) + except (RuntimeError, ValueError, AttributeError) as e: + logger.error("Error in entity detection, falling back to legacy method: %s", e) + # Fallback to legacy method + return self._normalize_query_legacy(query) + + return normalized_query, entity_mapping + + def normalize_query_for_search(self, query: str) -> Tuple[str, Dict[str, str]]: + """Normalize a query for search by replacing entities with placeholders. + + This is an alias for process_query to maintain API compatibility. + + Args: + query: Input query text + + Returns: + Tuple of (normalized_query, entity_mapping) + """ + return self.process_query(query) + + def _detect_and_normalize(self, text: str) -> Tuple[str, Dict[str, str]]: + """Detect and normalize entities using spaCy NER. + + If spaCy is not available, returns text as-is with no entity masking. + + Args: + text: Input text + + Returns: + Tuple of (normalized_text, entity_mapping) + """ + # If no spaCy model available, return text as-is (no entity masking) + if not self.nlp: + return text.lower(), {} + + entities = self._detect_entities(text) + + if not entities: + return text.lower(), {} + + # Sort entities by start position (reverse order for replacement) + entities.sort(key=lambda x: x.start, reverse=True) + + normalized_text = text + entity_mapping = {} + + # Replace entities with placeholders (reverse order to preserve positions) + for entity in entities: + normalized_text = ( + normalized_text[: entity.start] + + entity.placeholder.lower() + + normalized_text[entity.end :] + ) + entity_mapping[entity.placeholder] = entity.text.lower() + + return normalized_text.lower(), entity_mapping + + def _detect_entities(self, text: str) -> List[EntityMatch]: + """Detect entities using spaCy NER. + + Args: + text: Input text to analyze + + Returns: + List of detected entity matches (empty if spaCy not available) + """ + entities: List[EntityMatch] = [] + + # Only use spaCy NER if available + if self.nlp: + entities = self._detect_with_spacy(text) + # Remove duplicates and overlaps + entities = self._deduplicate_entities(entities) + + return entities + + def _detect_with_spacy(self, text: str) -> List[EntityMatch]: + """Detect entities using spaCy NER.""" + entities: List[EntityMatch] = [] + + if not self.nlp: + return entities + + try: + doc = self.nlp(text) + + # Count entities by type for placeholder numbering + entity_counts = {"COMPANY": 0, "GPE": 0, "PERSON": 0} + + for ent in doc.ents: + cleaned_text = self._clean_entity_text(ent.text) + if not cleaned_text: + continue + + placeholder = None + entity_type = None + + # Handle different entity types + if ent.label_ == "ORG": + # Organizations - filter for likely companies and exclude geographic terms + if self._is_likely_company(cleaned_text) and not self._is_geographic_entity( + cleaned_text + ): + entity_counts["COMPANY"] += 1 + placeholder = ( + f"" # A, B, C, etc. + ) + entity_type = "COMPANY_NER" + + elif ent.label_ == "GPE": + # Geo-political entities (countries, cities, states) + entity_counts["GPE"] += 1 + placeholder = f"" # A, B, C, etc. + entity_type = "GPE_NER" + + elif ent.label_ == "PERSON": + # Person names (CEOs, executives, etc.) + entity_counts["PERSON"] += 1 + placeholder = f"" # A, B, C, etc. + entity_type = "PERSON_NER" + + # Add entity if we created a placeholder + if placeholder and entity_type: + entities.append( + EntityMatch( + text=cleaned_text, + start=ent.start_char, + end=ent.end_char, + entity_type=entity_type, + placeholder=placeholder, + ) + ) + except (RuntimeError, ValueError, AttributeError) as e: + logger.error("Error in spaCy NER: %s", e) + + return entities + + def _clean_entity_text(self, text: str) -> str: + """Clean entity text by removing common prefixes and normalizing.""" + cleaned = text.strip() + + # Remove common prefixes that shouldn't be part of company names + prefixes_to_remove = [ + "compare ", + "get ", + "show ", + "what ", + "how ", + "when ", + "where ", + "why ", + "find ", + "search ", + "look ", + "see ", + "view ", + "display ", + "list ", + "tell ", + "give ", + "provide ", + "fetch ", + "retrieve ", + "obtain ", + ] + + cleaned_lower = cleaned.lower() + for prefix in prefixes_to_remove: + if cleaned_lower.startswith(prefix): + cleaned = cleaned[len(prefix) :].strip() + break + + # Remove trailing punctuation + cleaned = cleaned.rstrip(".,!?;:") + + return cleaned + + def _is_likely_company(self, text: str) -> bool: + """Determine if detected entity is likely a company.""" + text_lower = text.lower() + + # Company indicators + company_indicators = [ + "inc", + "corp", + "corporation", + "company", + "ltd", + "limited", + "llc", + "plc", + "technologies", + "systems", + "solutions", + "services", + "group", + "holdings", + "bank", + "financial", + "capital", + "investment", + "fund", + "insurance", + "pharmaceutical", + "biotech", + "energy", + "oil", + "gas", + "mining", + "automotive", + "motors", + "airlines", + "airways", + "communications", + "media", + "entertainment", + "studios", + "networks", + "broadcasting", + ] + + # Check if contains company indicators + for indicator in company_indicators: + if indicator in text_lower: + return True + + # Check if it's a known ticker pattern (2-5 uppercase letters) + if re.match(r"^[A-Z]{2,5}$", text): + return True + + # Check if it's a well-known company name (expanded list) + known_companies = { + # Tech companies + "apple", + "microsoft", + "google", + "alphabet", + "amazon", + "meta", + "facebook", + "tesla", + "netflix", + "nvidia", + "intel", + "cisco", + "oracle", + "salesforce", + "adobe", + "paypal", + "uber", + "lyft", + "airbnb", + "zoom", + "slack", + "shopify", + "spotify", + "twitter", + "snap", + "pinterest", + "reddit", + "palantir", + "snowflake", + # Traditional companies + "walmart", + "disney", + "nike", + "mcdonald", + "starbucks", + "boeing", + "caterpillar", + "ford", + "general motors", + "general electric", + "ibm", + "hp", + "dell", + "xerox", + # Financial companies + "jpmorgan", + "goldman sachs", + "morgan stanley", + "bank of america", + "wells fargo", + "citigroup", + "visa", + "mastercard", + "american express", + "berkshire hathaway", + # International companies + "samsung", + "toyota", + "sony", + "nintendo", + "softbank", + "alibaba", + "tencent", + "tsmc", + "asml", + "nestle", + "unilever", + "lvmh", + "sap", + "siemens", + "volkswagen", + # Healthcare & Pharma + "johnson & johnson", + "pfizer", + "merck", + "abbott", + "bristol myers squibb", + "eli lilly", + "novartis", + "roche", + "astrazeneca", + "glaxosmithkline", + # Energy & Commodities + "exxon mobil", + "chevron", + "conocophillips", + "bp", + "shell", + "total", + "saudi aramco", + "gazprom", + "petrobras", + } + + return any(company in text_lower for company in known_companies) + + def _is_geographic_entity(self, text: str) -> bool: + """Determine if detected entity is a geographic location, not a company.""" + text_lower = text.lower().strip() + + # Common geographic terms that should not be treated as companies + geographic_terms = { + # Countries + "us", + "usa", + "united states", + "america", + "uk", + "united kingdom", + "canada", + "china", + "japan", + "germany", + "france", + "italy", + "spain", + "russia", + "india", + "brazil", + "mexico", + "australia", + "south korea", + "netherlands", + "switzerland", + # US States + "california", + "new york", + "texas", + "florida", + "illinois", + "pennsylvania", + "ohio", + "georgia", + "north carolina", + "michigan", + "new jersey", + "virginia", + "washington", + "arizona", + "massachusetts", + "tennessee", + "indiana", + "missouri", + "maryland", + "wisconsin", + "colorado", + "minnesota", + "south carolina", + "alabama", + # Major cities + "new york city", + "los angeles", + "chicago", + "houston", + "phoenix", + "philadelphia", + "san antonio", + "san diego", + "dallas", + "san jose", + "austin", + "jacksonville", + "san francisco", + "columbus", + "charlotte", + "fort worth", + "detroit", + "el paso", + "memphis", + "seattle", + "denver", + "washington dc", + "boston", + "nashville", + "baltimore", + "oklahoma city", + "portland", + "las vegas", + "milwaukee", + "albuquerque", + # International cities + "london", + "paris", + "tokyo", + "beijing", + "shanghai", + "mumbai", + "delhi", + "sydney", + "toronto", + "vancouver", + "berlin", + "munich", + "zurich", + "geneva", + "amsterdam", + "stockholm", + "copenhagen", + "oslo", + "helsinki", + "dublin", + "madrid", + "barcelona", + "rome", + "milan", + "moscow", + "st petersburg", + "hong kong", + "singapore", + "seoul", + # Regions/Continents + "europe", + "asia", + "north america", + "south america", + "africa", + "oceania", + "middle east", + "latin america", + "caribbean", + "scandinavia", + "balkans", + "eastern europe", + "western europe", + "southeast asia", + "east asia", + "south asia", + } + + # Check exact matches and common variations + if text_lower in geographic_terms: + return True + + # Check for possessive forms (e.g., "US's" -> "us") + if text_lower.endswith("'s") and text_lower[:-2] in geographic_terms: + return True + + # Check for common geographic suffixes that indicate places, not companies + geographic_suffixes = ["city", "state", "country", "region", "province", "territory"] + for suffix in geographic_suffixes: + if text_lower.endswith(f" {suffix}"): + return True + + return False + + def _deduplicate_entities(self, entities: List[EntityMatch]) -> List[EntityMatch]: + """Remove duplicate and overlapping entities.""" + if not entities: + return entities + + # Sort by start position + entities.sort(key=lambda x: x.start) + + deduplicated: List[EntityMatch] = [] + for entity in entities: + # Check for overlap with existing entities + overlaps = False + for existing in deduplicated: + if entity.start < existing.end and entity.end > existing.start: + # Overlapping entities - keep the longer one + if len(entity.text) > len(existing.text): + deduplicated.remove(existing) + break + else: + overlaps = True + break + + if not overlaps: + deduplicated.append(entity) + + # Reassign placeholder numbers sequentially by type + entity_counts = {"COMPANY": 0, "GPE": 0, "PERSON": 0} + + for entity in deduplicated: + if entity.entity_type == "COMPANY_NER": + entity_counts["COMPANY"] += 1 + entity.placeholder = f"" + elif entity.entity_type == "GPE_NER": + entity_counts["GPE"] += 1 + entity.placeholder = f"" + elif entity.entity_type == "PERSON_NER": + entity_counts["PERSON"] += 1 + entity.placeholder = f"" + + return deduplicated + + def _normalize_query_legacy(self, query: str) -> Tuple[str, Dict[str, str]]: + """Legacy normalization method as fallback.""" + normalized_query = query.lower() + entity_mapping = {} + + # Use legacy entity patterns + sorted_entities = sorted( + self.legacy_entity_to_placeholder.items(), key=lambda x: len(x[0]), reverse=True + ) + + for entity, placeholder in sorted_entities: + pattern = r"\b" + re.escape(entity) + r"\b" + if re.search(pattern, normalized_query, re.IGNORECASE): + normalized_query = re.sub( + pattern, placeholder.lower(), normalized_query, flags=re.IGNORECASE + ) + entity_mapping[placeholder] = entity + + return normalized_query, entity_mapping + + def denormalize_query(self, normalized_query: str, entity_mapping: Dict[str, str]) -> str: + """Convert normalized query back to original entities. + + Args: + normalized_query: Query with placeholders + entity_mapping: Mapping from placeholders to original entities + + Returns: + Query with original entity names + """ + denormalized = normalized_query + for placeholder, entity in entity_mapping.items(): + denormalized = denormalized.replace(placeholder.lower(), entity) + return denormalized + + def get_common_entities(self) -> Set[str]: + """Get set of all common entity variations for testing.""" + entities = set() + for variations in self.legacy_company_patterns.values(): + entities.update(variations) + return entities + + def get_placeholders(self) -> Set[str]: + """Get set of all placeholders.""" + return set(self.legacy_placeholder_to_entities.keys()) + + +# Convenience functions for backward compatibility +def detect_companies(text: str) -> List[EntityMatch]: + """Detect company entities in text.""" + processor = EntityProcessor() + # Use the public API by processing the query and extracting entities + _, entity_mapping = processor.process_query(text) + # Convert entity mapping back to EntityMatch objects for backward compatibility + entities = [] + for placeholder, original_text in entity_mapping.items(): + # This is a simplified conversion - in practice you might want more detailed info + entities.append( + EntityMatch( + text=original_text, + start=0, # Position info not available from mapping + end=len(original_text), + entity_type="COMPANY", + placeholder=placeholder, + ) + ) + return entities + + +def normalize_query(text: str) -> Tuple[str, Dict[str, str]]: + """Normalize query by replacing entities with placeholders.""" + processor = EntityProcessor() + return processor.process_query(text) diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/company_financials_examples.json b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/company_financials_examples.json new file mode 100644 index 0000000..ecc425f --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/company_financials_examples.json @@ -0,0 +1,460 @@ +{ + "dataset": "company_financials", + "description": "Examples for company financials tools - specifically financial line items", + "tools": [ + { + "tool_name": "get_financial_line_item_from_identifiers", + "examples": [ + { + "query": "what is the preferred stock additional paid in capital for ?", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL" + ], + "line_item": "additional_paid_in_capital_preferred_stock" + }, + "context": "use 'additional_paid_in_capital_preferred_stock' for the amount of capital received from preferred stock issuance above par value.", + "permissions_required": [ + "StatementsPermission", + "PrivateCompanyFinancialsPermission" + ], + "disambiguation_note": "key difference: 'additional_paid_in_capital_preferred_stock' vs 'preferred_stock_additional_paid_in_capital' - the first follows standard accounting terminology.", + "tags": [ + "preferred_stock", + "capital", + "disambiguation" + ] + }, + { + "query": "show me the convertible preferred stock for ", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": [ + "TSLA" + ], + "line_item": "preferred_stock_convertible" + }, + "context": "use 'preferred_stock_convertible' for preferred stock that can be converted to common stock.", + "permissions_required": [ + "StatementsPermission", + "PrivateCompanyFinancialsPermission" + ], + "disambiguation_note": "use 'preferred_stock_convertible' not 'convertible_preferred_stock' - the first is the correct parameter name.", + "tags": [ + "preferred_stock", + "convertible", + "disambiguation" + ] + }, + { + "query": "get the total revenue for and ", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": [ + "GOOGL", + "AMZN" + ], + "line_item": "total_revenue" + }, + "context": "use 'total_revenue' for the complete revenue figure including all revenue streams.", + "permissions_required": [ + "StatementsPermission", + "PrivateCompanyFinancialsPermission" + ], + "disambiguation_note": "use 'total_revenue' not 'revenue' when you specifically want the total. 'revenue' refers to normal/regular revenue only.", + "tags": [ + "revenue", + "total", + "multiple_companies" + ] + }, + { + "query": "what is the net income for ?", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": [ + "MSFT" + ], + "line_item": "net_income" + }, + "context": "use 'net_income' for the bottom-line profit after all expenses and taxes.", + "permissions_required": [ + "StatementsPermission", + "PrivateCompanyFinancialsPermission" + ], + "disambiguation_note": "use 'net_income' not 'net_profit' or 'earnings' - net_income is the standard parameter name.", + "tags": [ + "net_income", + "profit", + "earnings" + ] + }, + { + "query": "show me the total debt to equity ratio for ", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": [ + "JPM" + ], + "line_item": "total_debt_to_equity" + }, + "context": "use 'total_debt_to_equity' for the debt-to-equity ratio calculation.", + "permissions_required": [ + "StatementsPermission", + "PrivateCompanyFinancialsPermission" + ], + "disambiguation_note": "use 'total_debt_to_equity' not 'total_debt_to_equity_ratio' - the parameter name doesn't include 'ratio'.", + "tags": [ + "debt", + "equity", + "ratio", + "disambiguation" + ] + }, + { + "query": "get quarterly revenue for in q1 2023", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL" + ], + "line_item": "revenue", + "period_type": "quarterly", + "start_year": 2023, + "start_quarter": 1, + "end_year": 2023, + "end_quarter": 1 + }, + "context": "for quarterly data, specify period_type as 'quarterly' and include quarter parameters.", + "permissions_required": [ + "StatementsPermission", + "PrivateCompanyFinancialsPermission" + ], + "disambiguation_note": "when requesting quarterly data, always set period_type to 'quarterly' and specify both start_quarter and end_quarter.", + "tags": [ + "quarterly", + "revenue", + "specific_period" + ] + }, + { + "query": "what is the free cash flow for ?", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL" + ], + "line_item": "free_cash_flow" + }, + "context": "use 'free_cash_flow' for operating cash flow minus capital expenditures.", + "permissions_required": [ + "StatementsPermission", + "PrivateCompanyFinancialsPermission" + ], + "disambiguation_note": "use 'free_cash_flow' not 'fcf' or 'unlevered_free_cash_flow' - free_cash_flow is the standard parameter.", + "tags": [ + "cash_flow", + "free", + "capex" + ] + }, + { + "query": "show me the earnings per share for ", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": [ + "NFLX" + ], + "line_item": "earnings_per_share" + }, + "context": "use 'earnings_per_share' for net income divided by shares outstanding.", + "permissions_required": [ + "StatementsPermission", + "PrivateCompanyFinancialsPermission" + ], + "disambiguation_note": "use 'earnings_per_share' not 'eps' or 'net_income_per_share' - use the full parameter name.", + "tags": [ + "eps", + "earnings", + "per_share" + ] + }, + { + "query": "get the depreciation and amortization for for 2024", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "depreciation_and_amortization", + "start_year": 2024, + "end_year": 2024 + }, + "context": "use 'depreciation_and_amortization' when you need both depreciation and amortization values combined.", + "disambiguation_note": "use 'depreciation_and_amortization' not just 'depreciation' when you need the combined value.", + "tags": ["depreciation", "amortization", "disambiguation"] + }, + { + "query": "pull accumulated depreciation for from 2022 through 2024", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "accumulated_depreciation", + "start_year": 2022, + "end_year": 2024 + }, + "context": "balance sheet roll-forward requests must include both start_year and end_year to cover the full range.", + "disambiguation_note": "do not substitute 'depreciation_and_amortization' for balance sheet accounts like 'accumulated_depreciation'.", + "tags": ["balance_sheet", "accumulated_depreciation", "date_range"] + }, + { + "query": "show me the total debt to equity for in 2024", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "total_debt_to_equity", + "start_year": 2024, + "end_year": 2024 + }, + "context": "Use 'total_debt_to_equity' to get the company's financial leverage ratio, which compares total debt to shareholders' equity.", + "disambiguation_note": "use 'total_debt_to_equity' not 'total_debt_to_equity_ratio' - the parameter name doesn't include 'ratio'.", + "tags": ["debt", "equity", "ratio", "disambiguation"] + }, + { + "query": "what is the preferred stock dividend for in 2023", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "preferred_stock_dividend", + "start_year": 2023, + "end_year": 2023 + }, + "context": "Use 'preferred_stock_dividend' to retrieve the amount of dividends paid to preferred shareholders, which typically have priority over common stock dividends.", + "disambiguation_note": "use 'preferred_stock_dividend' not 'preferred_dividends_paid' - the first is the correct parameter name.", + "tags": ["preferred", "dividend", "disambiguation"] + }, + { + "query": "show me the total receivables for in 2023 Q3", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "total_receivable", + "period_type": "quarterly", + "start_year": 2023, + "start_quarter": 3, + "end_year": 2023, + "end_quarter": 3 + }, + "context": "Use 'total_receivable' to get the sum of all amounts owed to the company, including both current and non-current receivables.", + "disambiguation_note": "use 'total_receivable' (singular) not 'total_receivables' (plural) - the parameter name is singular.", + "tags": ["receivables", "disambiguation", "quarterly"] + }, + { + "query": "what was 's net interest expense for q2 2025?", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "net_interest_expense", + "period_type": "quarterly", + "start_year": 2025, + "start_quarter": 2, + "end_year": 2025, + "end_quarter": 2 + }, + "context": "for quarter-specific queries, include period_type='quarterly' and both start/end quarter fields.", + "disambiguation_note": "never use 'ltm' when the user asks for an explicit quarter.", + "tags": ["interest_expense", "quarterly", "period_params"] + }, + { + "query": "get the current accounts receivable for ", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "accounts_receivable" + }, + "context": "Use 'accounts_receivable' to get the amount of money owed to the company by customers for goods or services provided on credit.", + "disambiguation_note": "use 'accounts_receivable' not 'current_accounts_receivable' - the parameter name doesn't include 'current'.", + "tags": ["receivables", "current_assets", "disambiguation"] + }, + { + "query": "show me the net income for from 2022 to 2024", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "net_income", + "start_year": 2022, + "end_year": 2024 + }, + "context": "Use 'net_income' to retrieve the company's bottom-line profit after all expenses, taxes, and interest have been deducted from total revenue.", + "disambiguation_note": "use 'net_income' not 'net_income_to_company' - the first is the standard parameter name.", + "tags": ["net_income", "disambiguation", "time_series"] + }, + { + "query": "get the quarterly revenue for in Q2 2025", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "total_revenue", + "period_type": "quarterly", + "start_year": 2025, + "start_quarter": 2, + "end_year": 2025, + "end_quarter": 2 + }, + "context": "Use 'total_revenue' with 'period_type' set to 'quarterly' to get the company's top-line earnings for a specific quarter.", + "disambiguation_note": "when requesting quarterly data, always specify both start_quarter and end_quarter, and set period_type to 'quarterly'.", + "tags": ["revenue", "quarterly", "specific_period"] + }, + { + "query": "retrieve foreign exchange adjustments ytd 2025 for ", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "foreign_exchange_adjustments", + "period_type": "ytd", + "start_year": 2025 + }, + "context": "use period_type='ytd' when the user explicitly asks for year-to-date amounts.", + "disambiguation_note": "the correct line item is 'foreign_exchange_adjustments', not 'foreign_exchange_rate_adjustments'.", + "tags": ["cta", "ytd", "fx"] + }, + { + "query": "what were 's divestitures from 2021 to 2024?", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "divestitures", + "start_year": 2021, + "end_year": 2024 + }, + "context": "range queries must include both start_year and end_year for bounded periods.", + "disambiguation_note": "do not substitute 'sale_of_real_estate' unless the user explicitly states real estate only.", + "tags": ["divestitures", "multi_year", "boundaries"] + }, + { + "query": "show me the finance division debt non-current portion for ", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "finance_division_debt_non_current_portion" + }, + "context": "Use 'finance_division_debt_non_current_portion' to get the long-term debt obligations specifically related to the company's financing division.", + "disambiguation_note": "use 'finance_division_debt_non_current_portion' not 'finance_division_debt_long_term_portion' - the first is the correct parameter name.", + "tags": ["finance_division", "debt", "non_current", "disambiguation"] + }, + { + "query": "what is the short term deferred tax asset for ", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "short_term_deferred_tax_asset" + }, + "context": "Use 'short_term_deferred_tax_asset' to get the portion of deferred tax assets expected to be realized within one year.", + "disambiguation_note": "use 'short_term_deferred_tax_asset' not 'deferred_tax_asset_current_portion' - the first is the correct parameter name.", + "tags": ["deferred_tax", "current_assets", "disambiguation"] + } + , + { + "query": "provide total unusual items for from 2019 through 2024", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "total_unusual_items", + "start_year": 2019, + "end_year": 2024 + }, + "context": "use both start_year and end_year for multi-year roll-ups of unusual items.", + "disambiguation_note": "ensure 'end_year' is populated; leaving it null will truncate the range.", + "tags": ["unusual_items", "multi_year", "range"] + }, + { + "query": "get basic eps for in 2024", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "basic_eps", + "start_year": 2024, + "end_year": 2024 + }, + "context": "even when period_type defaults to annual, include explicit start/end years for clarity.", + "disambiguation_note": "do not shift years; both start_year and end_year should match the requested year.", + "tags": ["eps", "annual", "year_alignment"] + }, + { + "query": "fetch current income taxes payable quarterly from q2 2022 to q2 2023 for ", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "current_income_taxes_payable", + "period_type": "quarterly", + "start_year": 2022, + "start_quarter": 2, + "end_year": 2023, + "end_quarter": 2 + }, + "context": "boundary quarters must be provided for both start and end when querying multiple quarters.", + "disambiguation_note": "ensure period_type='quarterly' whenever quarter fields are present.", + "tags": ["taxes", "quarterly", "bounded_range"] + }, + { + "query": "report the cash and equivalents for at year end 2024", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "cash_and_equivalents", + "start_year": 2024, + "end_year": 2024 + }, + "context": "Balance sheet cash positions must use the canonical 'cash_and_equivalents' line item to include cash plus near-cash investments.", + "disambiguation_note": "Avoid the shorter 'cash' alias when a more appropriate line item is available.", + "tags": ["cash", "balance_sheet", "disambiguation"] + }, + { + "query": "measure finance division interest expense for over the last three reported quarters", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "interest_expense_finance_division", + "period_type": "quarterly", + "start_year": 2024, + "start_quarter": 4, + "end_year": 2025, + "end_quarter": 2 + }, + "context": "Finance-division metrics retain the `finance_division_` prefix. Populate all quarter boundaries when covering multiple sequential quarters.", + "permissions_required": [ + "StatementsPermission", + "PrivateCompanyFinancialsPermission" + ], + "disambiguation_note": "Use 'interest_expense_finance_division', not 'finance_division_interest_expense'. Failures 60 and 142 came from swapping the order.", + "tags": ["finance_division", "interest_expense", "quarterly", "disambiguation"] + }, + { + "query": "show the diluted eps excluding extraordinary items for in 2023 and 2024", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "diluted_earning_per_share_excluding_extra_items", + "start_year": 2023, + "end_year": 2024 + }, + "context": "Explicit multi-year EPS comparisons require both start_year and end_year.", + "disambiguation_note": "The canonical parameter is 'diluted_earning_per_share_excluding_extra_items'.", + "tags": ["eps", "multi_year", "disambiguation"] + }, + { + "query": "provide the net change in cash for across fiscal 2022 and 2023", + "tool_name": "get_financial_line_item_from_identifiers", + "parameters": { + "identifiers": ["TICKER"], + "line_item": "net_change_in_cash", + "start_year": 2022, + "end_year": 2023 + }, + "context": "Multi-year cash flow roll-ups must include both boundary years to avoid null parameters.", + "disambiguation_note": "Use the canonical 'net_change_in_cash'.", + "tags": ["cash_flow", "multi_year", "boundaries", "disambiguation"] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/company_intelligence_examples.json b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/company_intelligence_examples.json new file mode 100644 index 0000000..96c8839 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/company_intelligence_examples.json @@ -0,0 +1,208 @@ +{ + "description": "Examples for all company information tools", + "tools": [ + { + "tool_name": "get_info_from_identifiers", + "examples": [ + { + "query": "get basic information about ", + "tool_name": "get_info_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL" + ] + }, + "context": "use get_info_from_identifiers for basic company details like name, status, industry, employees, founding date, and headquarters.", + "permissions_required": [ + "CompanyIntelligencePermission" + ], + "disambiguation_note": "this tool provides core company facts - use it for questions about company basics, not detailed descriptions.", + "tags": [ + "company_info", + "basic_info", + "headquarters", + "industry" + ] + }, + { + "query": "what are the basic details for , , and ?", + "tool_name": "get_info_from_identifiers", + "parameters": { + "identifiers": [ + "MSFT", + "GOOGL", + "AMZN" + ] + }, + "context": "batch multiple companies to get their basic information efficiently.", + "permissions_required": [ + "CompanyIntelligencePermission" + ], + "disambiguation_note": "always batch multiple companies in one call rather than making separate requests.", + "tags": [ + "company_info", + "multiple_companies", + "batching", + "basic_info" + ] + }, + { + "query": "where is headquartered and when was it founded?", + "tool_name": "get_info_from_identifiers", + "parameters": { + "identifiers": [ + "TSLA" + ] + }, + "context": "this tool provides headquarters location and founding date information.", + "permissions_required": [ + "CompanyIntelligencePermission" + ], + "disambiguation_note": "use get_info_from_identifiers for headquarters and founding date questions, not get_company_description.", + "tags": [ + "headquarters", + "founding_date", + "location", + "company_info" + ] + } + ] + }, + { + "tool_name": "get_company_summary_from_identifiers", + "examples": [ + { + "query": "give me a summary of what does", + "tool_name": "get_company_summary_from_identifiers", + "parameters": { + "identifiers": [ + "NFLX" + ] + }, + "context": "use get_company_summary_from_identifiers for one-paragraph business summaries including products, services, and markets.", + "permissions_required": [ + "CompanyIntelligencePermission" + ], + "disambiguation_note": "use summary for concise business overviews, description for detailed multi-section information.", + "tags": [ + "summary", + "business_overview", + "products", + "services" + ] + }, + { + "query": "what do and do?", + "tool_name": "get_company_summary_from_identifiers", + "parameters": { + "identifiers": [ + "JPM", + "BAC" + ] + }, + "context": "get concise business summaries for multiple companies in one call.", + "permissions_required": [ + "CompanyIntelligencePermission" + ], + "disambiguation_note": "summary provides a single paragraph overview, while description provides detailed sections.", + "tags": [ + "summary", + "banking", + "multiple_companies", + "business_model" + ] + } + ] + }, + { + "tool_name": "get_company_description_from_identifiers", + "examples": [ + { + "query": "give me a detailed description of 's business segments and competition", + "tool_name": "get_company_description_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL" + ] + }, + "context": "use get_company_description_from_identifiers for detailed, multi-section descriptions including segments, competition, and history.", + "permissions_required": [ + "CompanyIntelligencePermission" + ], + "disambiguation_note": "description provides detailed sections (segments, competition, history), summary provides one paragraph.", + "tags": [ + "description", + "detailed", + "segments", + "competition", + "history" + ] + }, + { + "query": "what are 's main business segments and competitive landscape?", + "tool_name": "get_company_description_from_identifiers", + "parameters": { + "identifiers": [ + "AMZN" + ] + }, + "context": "description includes detailed segment breakdowns and competitive analysis.", + "permissions_required": [ + "CompanyIntelligencePermission" + ], + "disambiguation_note": "use description for detailed segment and competition analysis, not summary.", + "tags": [ + "segments", + "competition", + "detailed_analysis", + "business_units" + ] + } + ] + }, + { + "tool_name": "get_company_other_names_from_identifiers", + "examples": [ + { + "query": "what other names does go by?", + "tool_name": "get_company_other_names_from_identifiers", + "parameters": { + "identifiers": [ + "GOOGL" + ] + }, + "context": "use get_company_other_names_from_identifiers for alternate names, historical names, and native language names.", + "permissions_required": [ + "CompanyIntelligencePermission" + ], + "disambiguation_note": "this tool is specifically for name variations, not basic company information.", + "tags": [ + "alternate_names", + "historical_names", + "name_variations" + ] + }, + { + "query": "what was called before and what other names does it use?", + "tool_name": "get_company_other_names_from_identifiers", + "parameters": { + "identifiers": [ + "META" + ] + }, + "context": "get historical names (previous company names) and alternate names (current variations).", + "permissions_required": [ + "CompanyIntelligencePermission" + ], + "disambiguation_note": "use this for name history questions, not current business information.", + "tags": [ + "historical_names", + "name_changes", + "alternate_names", + "rebranding" + ] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/competitors_examples.json b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/competitors_examples.json new file mode 100644 index 0000000..a2449ad --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/competitors_examples.json @@ -0,0 +1,130 @@ +{ + "description": "Comprehensive examples for competitor queries covering all competitor sources", + "tools": [ + { + "tool_name": "get_competitors_from_identifiers", + "examples": [ + { + "query": "who are 's competitors according to sec filings?", + "tool_name": "get_competitors_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL" + ], + "competitor_source": "filing" + }, + "context": "use get_competitors_from_identifiers with specific sources to find competitor relationships.", + "permissions_required": [ + "CompetitorsPermission" + ], + "disambiguation_note": "competitor_source must be exact: 'filing', 'key_dev', 'contact', 'third_party', 'self_identified', 'named_by_competitor'.", + "tags": [ + "competitors", + "sec_filings", + "competitive_landscape" + ] + }, + { + "query": "what companies does consider as competitors?", + "tool_name": "get_competitors_from_identifiers", + "parameters": { + "identifiers": [ + "TSLA" + ], + "competitor_source": "self_identified" + }, + "context": "use 'self_identified' for competitors that the company itself identifies.", + "permissions_required": [ + "CompetitorsPermission" + ], + "disambiguation_note": "different sources provide different perspectives on competitive relationships.", + "tags": [ + "self_identified_competitors", + "company_perspective", + "competitive_analysis" + ] + }, + { + "query": "who are 's competitors from key developments?", + "tool_name": "get_competitors_from_identifiers", + "parameters": { + "identifiers": [ + "MSFT" + ], + "competitor_source": "key_dev" + }, + "context": "use 'key_dev' for competitors identified through key business developments and news.", + "permissions_required": [ + "CompetitorsPermission" + ], + "disambiguation_note": "use 'key_dev' not 'key_development' - the parameter is abbreviated.", + "tags": [ + "key_development_competitors", + "news_based", + "market_intelligence" + ] + }, + { + "query": "what competitors does have through contact relationships?", + "tool_name": "get_competitors_from_identifiers", + "parameters": { + "identifiers": [ + "GOOGL" + ], + "competitor_source": "contact" + }, + "context": "use 'contact' for competitors identified through business contact relationships.", + "permissions_required": [ + "CompetitorsPermission" + ], + "disambiguation_note": "use 'contact' for relationship-based competitor identification.", + "tags": [ + "contact_competitors", + "relationship_based", + "business_networks" + ] + }, + { + "query": "who are 's competitors according to third-party sources?", + "tool_name": "get_competitors_from_identifiers", + "parameters": { + "identifiers": [ + "AMZN" + ], + "competitor_source": "third_party" + }, + "context": "use 'third_party' for competitors identified by external research and analysis firms.", + "permissions_required": [ + "CompetitorsPermission" + ], + "disambiguation_note": "use 'third_party' not 'third_party_source' - the parameter is just 'third_party'.", + "tags": [ + "third_party_competitors", + "external_research", + "analyst_identified" + ] + }, + { + "query": "what companies name as a competitor?", + "tool_name": "get_competitors_from_identifiers", + "parameters": { + "identifiers": [ + "NFLX" + ], + "competitor_source": "named_by_competitor" + }, + "context": "use 'named_by_competitor' for reverse competitive relationships where other companies identify this company as their competitor.", + "permissions_required": [ + "CompetitorsPermission" + ], + "disambiguation_note": "use 'named_by_competitor' for reverse competitive analysis - who considers this company a competitor.", + "tags": [ + "named_by_competitor", + "reverse_competitive_analysis", + "market_position" + ] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/earnings_examples.json b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/earnings_examples.json new file mode 100644 index 0000000..05189a1 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/earnings_examples.json @@ -0,0 +1,189 @@ +{ + "description": "Examples for earnings-related tools", + "tools": [ + { + "tool_name": "get_earnings_from_identifiers", + "examples": [ + { + "query": "get all earnings announcements for ", + "tool_name": "get_earnings_from_identifiers", + "parameters": { + "identifiers": [ + "TICKER" + ] + }, + "context": "use get_earnings_from_identifiers to get the complete list of all earnings announcements for companies.", + "permissions_required": [ + "EarningsPermission" + ], + "disambiguation_note": "this returns all earnings (historical list), not just the latest or next earnings.", + "tags": [ + "earnings", + "all_earnings", + "historical", + "announcements" + ] + }, + { + "query": "show me all earnings dates for and ", + "tool_name": "get_earnings_from_identifiers", + "parameters": { + "identifiers": [ + "TICKER", + "TICKER" + ] + }, + "context": "get comprehensive earnings history for multiple companies in one call.", + "permissions_required": [ + "EarningsPermission" + ], + "disambiguation_note": "use this for complete earnings history, not just recent or upcoming earnings.", + "tags": [ + "earnings", + "multiple_companies", + "earnings_history", + "batching" + ] + } + ] + }, + { + "tool_name": "get_latest_earnings_from_identifiers", + "examples": [ + { + "query": "when was 's most recent earnings announcement?", + "tool_name": "get_latest_earnings_from_identifiers", + "parameters": { + "identifiers": [ + "TICKER" + ] + }, + "context": "use get_latest_earnings_from_identifiers for the most recent earnings announcement only.", + "permissions_required": [ + "EarningsPermission" + ], + "disambiguation_note": "this returns only the latest earnings, not the complete history or future earnings.", + "tags": [ + "latest_earnings", + "most_recent", + "recent_announcement" + ] + }, + { + "query": "when did last report earnings?", + "tool_name": "get_latest_earnings_from_identifiers", + "parameters": { + "identifiers": [ + "TICKER" + ] + }, + "context": "get the date and details of the most recent earnings report.", + "permissions_required": [ + "EarningsPermission" + ], + "disambiguation_note": "use 'latest' for past earnings, 'next' for future earnings.", + "tags": [ + "last_reported", + "latest_earnings", + "recent_report" + ] + } + ] + }, + { + "tool_name": "get_next_earnings_from_identifiers", + "examples": [ + { + "query": "when is 's next earnings announcement?", + "tool_name": "get_next_earnings_from_identifiers", + "parameters": { + "identifiers": [ + "TICKER" + ] + }, + "context": "use get_next_earnings_from_identifiers for upcoming/future earnings announcements.", + "permissions_required": [ + "EarningsPermission" + ], + "disambiguation_note": "this returns future earnings only, not past or current earnings.", + "tags": [ + "next_earnings", + "upcoming", + "future", + "scheduled" + ] + }, + { + "query": "when will report their next earnings?", + "tool_name": "get_next_earnings_from_identifiers", + "parameters": { + "identifiers": [ + "TICKER" + ] + }, + "context": "get the scheduled date for the next earnings announcement.", + "permissions_required": [ + "EarningsPermission" + ], + "disambiguation_note": "use 'next' for future earnings, 'latest' for past earnings.", + "tags": [ + "next_earnings", + "future_report", + "scheduled_announcement" + ] + } + ] + }, + { + "tool_name": "get_earnings_call_datetimes_from_identifiers", + "examples": [ + { + "query": "when did and host their Q4 2023 earnings calls?", + "tool_name": "get_earnings_call_datetimes_from_identifiers", + "parameters": { + "identifiers": [ + "TICKER", + "TICKER" + ], + "call_type": "earnings_call", + "start_date": "2023-10-01", + "end_date": "2023-12-31" + }, + "context": "Use this tool to retrieve scheduled or completed earnings call timestamps. Provide a bounded date window when the question references a specific quarter.", + "permissions_required": [ + "EarningsPermission", + "TranscriptsPermission" + ], + "disambiguation_note": "Include both start_date and end_date to cover the entire quarter. The tool expects 'call_type'='earnings_call' for standard earnings call timestamps.", + "tags": [ + "earnings_calls", + "timestamps", + "bounded_range", + "multi_company" + ] + }, + { + "query": "what time is 's upcoming earnings call?", + "tool_name": "get_earnings_call_datetimes_from_identifiers", + "parameters": { + "identifiers": [ + "TICKER" + ], + "call_type": "earnings_call" + }, + "context": "If the user asks for the scheduled time of the next call without specifying a range, omit date filters to receive the next known call.", + "permissions_required": [ + "EarningsPermission", + "TranscriptsPermission" + ], + "disambiguation_note": "Leave dates blank for the next upcoming call. Add date bounds to avoid tool_description_not_found errors when targeting historical windows.", + "tags": [ + "earnings_calls", + "upcoming", + "scheduling" + ] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/id_examples.json b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/id_examples.json new file mode 100644 index 0000000..ff73fb3 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/id_examples.json @@ -0,0 +1,242 @@ +{ + "description": "Examples for ID-related tools: capitalization, security identifiers, and utility tools", + "tools": [ + { + "tool_name": "get_capitalization_from_identifiers", + "examples": [ + { + "query": "what is the market cap of and ?", + "tool_name": "get_capitalization_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL", + "MSFT" + ], + "capitalization": "market_cap" + }, + "context": "use get_capitalization_from_identifiers for market cap, enterprise value, and shares outstanding.", + "permissions_required": [ + "IDPermission" + ], + "disambiguation_note": "capitalization parameter must be exact: 'market_cap', 'tev', or 'shares_outstanding'.", + "tags": [ + "market_cap", + "valuation", + "capitalization" + ] + }, + { + "query": "get the enterprise value for ", + "tool_name": "get_capitalization_from_identifiers", + "parameters": { + "identifiers": [ + "TSLA" + ], + "capitalization": "tev" + }, + "context": "use 'tev' for total enterprise value calculations.", + "permissions_required": [ + "IDPermission" + ], + "disambiguation_note": "use 'tev' not 'enterprise_value' or 'ev' - the parameter is 'tev'.", + "tags": [ + "enterprise_value", + "tev", + "valuation_metrics" + ] + }, + { + "query": "what was 's market cap at the end of 2022?", + "tool_name": "get_capitalization_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL" + ], + "capitalization": "market_cap", + "start_date": "2022-12-31", + "end_date": "2022-12-31" + }, + "context": "get historical market cap for specific dates using date parameters.", + "permissions_required": [ + "IDPermission" + ], + "disambiguation_note": "use same start_date and end_date for specific point-in-time values.", + "tags": [ + "historical_market_cap", + "specific_date", + "point_in_time" + ] + } + ] + }, + { + "tool_name": "get_cusip_from_identifiers", + "examples": [ + { + "query": "what is the cusip for ?", + "tool_name": "get_cusip_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL" + ] + }, + "context": "use get_cusip_from_identifiers to get cusip identifiers for companies.", + "permissions_required": [ + "IDPermission" + ], + "disambiguation_note": "this tool only returns cusip identifiers, not other identifier types.", + "tags": [ + "cusip", + "identifiers", + "security_identifiers" + ] + }, + { + "query": "get cusip identifiers for , , and ", + "tool_name": "get_cusip_from_identifiers", + "parameters": { + "identifiers": [ + "MSFT", + "GOOGL", + "AMZN" + ] + }, + "context": "batch multiple companies to get their cusip identifiers efficiently.", + "permissions_required": [ + "IDPermission" + ], + "disambiguation_note": "always batch multiple companies in one call rather than making separate requests.", + "tags": [ + "cusip", + "multiple_companies", + "batching", + "us_securities" + ] + } + ] + }, + { + "tool_name": "get_isin_from_identifiers", + "examples": [ + { + "query": "what is the isin for ?", + "tool_name": "get_isin_from_identifiers", + "parameters": { + "identifiers": [ + "MSFT" + ] + }, + "context": "use get_isin_from_identifiers to get isin identifiers for companies.", + "permissions_required": [ + "IDPermission" + ], + "disambiguation_note": "this tool only returns isin identifiers, not other identifier types.", + "tags": [ + "isin", + "identifiers", + "international_identifiers" + ] + }, + { + "query": "get isin codes for , , and ", + "tool_name": "get_isin_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL", + "TSLA", + "NFLX" + ] + }, + "context": "batch multiple companies to get their isin identifiers efficiently.", + "permissions_required": [ + "IDPermission" + ], + "disambiguation_note": "always batch multiple companies in one call for better performance.", + "tags": [ + "isin", + "multiple_companies", + "batching", + "international_securities" + ] + } + ] + }, + { + "tool_name": "get_latest", + "examples": [ + { + "query": "what is the current date and latest reporting periods?", + "tool_name": "get_latest", + "parameters": {}, + "context": "use get_latest to get current date, latest annual reporting year, and latest quarterly reporting period.", + "permissions_required": [ + "IDPermission" + ], + "disambiguation_note": "this tool takes no parameters and returns current system date and reporting periods.", + "tags": [ + "current_date", + "reporting_periods", + "latest_data", + "system_info" + ] + }, + { + "query": "what is today's date in the user's timezone?", + "tool_name": "get_latest", + "parameters": { + "use_local_timezone": true + }, + "context": "use use_local_timezone=true to get date in user's local timezone instead of utc.", + "permissions_required": [ + "IDPermission" + ], + "disambiguation_note": "set use_local_timezone=true for user-specific date, leave false/empty for utc.", + "tags": [ + "local_timezone", + "user_timezone", + "current_date" + ] + } + ] + }, + { + "tool_name": "get_n_quarters_ago", + "examples": [ + { + "query": "what quarter was it 2 quarters ago?", + "tool_name": "get_n_quarters_ago", + "parameters": { + "n": 2 + }, + "context": "use get_n_quarters_ago to calculate quarters relative to the current quarter.", + "permissions_required": [ + "IDPermission" + ], + "disambiguation_note": "returns the year and quarter that was n quarters before the current quarter.", + "tags": [ + "quarter_calculation", + "relative_quarters", + "time_calculation" + ] + }, + { + "query": "what was the quarter 4 quarters back?", + "tool_name": "get_n_quarters_ago", + "parameters": { + "n": 4 + }, + "context": "calculate quarters going back in time from the current quarter.", + "permissions_required": [ + "IDPermission" + ], + "disambiguation_note": "use this for relative quarter calculations rather than hardcoding dates.", + "tags": [ + "year_back", + "quarter_calculation", + "relative_time" + ] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/mergers_examples.json b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/mergers_examples.json new file mode 100644 index 0000000..7236e29 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/mergers_examples.json @@ -0,0 +1,150 @@ +{ + "description": "Comprehensive examples for mergers and acquisitions tools", + "tools": [ + { + "tool_name": "get_mergers_from_identifiers", + "examples": [ + { + "query": "which companies did acquire?", + "tool_name": "get_mergers_from_identifiers", + "parameters": { + "identifiers": [ + "MSFT" + ] + }, + "context": "use get_mergers_from_identifiers to find all m&a transactions where a company was involved as buyer, seller, or target.", + "permissions_required": [ + "MergersPermission" + ], + "disambiguation_note": "this returns transaction_ids that can be used with other m&a tools for detailed information.", + "tags": [ + "acquisitions", + "ma_history", + "buyer_role", + "transaction_ids" + ] + }, + { + "query": "who acquired ben & jerry's?", + "tool_name": "get_mergers_from_identifiers", + "parameters": { + "identifiers": [ + "BJ" + ] + }, + "context": "find m&a transactions where the company was a target (acquired).", + "permissions_required": [ + "MergersPermission" + ], + "disambiguation_note": "results show the company's role in each transaction (target, buyer, seller).", + "tags": [ + "acquisition_target", + "who_acquired", + "target_role" + ] + }, + { + "query": "what m&a activity has been involved in?", + "tool_name": "get_mergers_from_identifiers", + "parameters": { + "identifiers": [ + "PFE" + ] + }, + "context": "get comprehensive m&a history including all roles (buyer, seller, target).", + "permissions_required": [ + "MergersPermission" + ], + "disambiguation_note": "use this first to get transaction_ids, then use other m&a tools for details.", + "tags": [ + "ma_activity", + "comprehensive_history", + "all_roles" + ] + } + ] + }, + { + "tool_name": "get_merger_info_from_transaction_id", + "examples": [ + { + "query": "when was the acquisition of ben & jerry's announced?", + "tool_name": "get_merger_info_from_transaction_id", + "parameters": { + "transaction_id": "67890" + }, + "context": "use this tool for comprehensive m&a transaction details including timeline, participants, and financial terms.", + "permissions_required": [ + "MergersPermission" + ], + "disambiguation_note": "this tool requires transaction_id - use get_mergers_from_identifiers to find transaction ids first.", + "tags": [ + "ma_timeline", + "announcement_date", + "transaction_details" + ] + }, + { + "query": "how much did s&p purchase kensho for?", + "tool_name": "get_merger_info_from_transaction_id", + "parameters": { + "transaction_id": "12345" + }, + "context": "get purchase price and financial details of m&a transactions.", + "permissions_required": [ + "MergersPermission" + ], + "disambiguation_note": "this provides detailed financial terms when transaction_id is known.", + "tags": [ + "purchase_price", + "acquisition_cost", + "deal_value" + ] + } + ] + }, + { + "tool_name": "get_advisors_for_company_in_transaction_from_identifier", + "examples": [ + { + "query": "who advised s&p global during their purchase of kensho?", + "tool_name": "get_advisors_for_company_in_transaction_from_identifier", + "parameters": { + "identifier": "SPGI", + "transaction_id": "12345" + }, + "context": "use this tool to find advisory firms that provided services during m&a transactions.", + "permissions_required": [ + "MergersPermission" + ], + "disambiguation_note": "this tool requires both company identifier and transaction_id - use get_mergers_from_identifiers first to find transaction_id.", + "tags": [ + "advisors", + "ma", + "investment_banking", + "transaction_services" + ] + }, + { + "query": "which firms advised ben & jerry's in their acquisition?", + "tool_name": "get_advisors_for_company_in_transaction_from_identifier", + "parameters": { + "identifier": "BJ", + "transaction_id": "67890" + }, + "context": "get the list of advisory companies for acquisition transactions.", + "permissions_required": [ + "MergersPermission" + ], + "disambiguation_note": "you need the specific transaction_id - this tool won't work without it.", + "tags": [ + "acquisition_advisors", + "ma", + "advisory_services", + "transaction_id_required" + ] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/pricing_examples.json b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/pricing_examples.json new file mode 100644 index 0000000..7812c14 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/pricing_examples.json @@ -0,0 +1,144 @@ +{ + "description": "Examples for stock price queries with focus on date ranges and periodicity", + "tools": [ + { + "tool_name": "get_prices_from_identifiers", + "examples": [ + { + "query": "what are the current stock prices for and ?", + "tool_name": "get_prices_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL", + "MSFT" + ] + }, + "context": "for current/most recent prices, omit start_date and end_date parameters.", + "permissions_required": [ + "PricingPermission" + ], + "disambiguation_note": "leave date parameters empty to get the most recent available prices automatically.", + "tags": [ + "current_prices", + "most_recent", + "multiple_companies" + ] + }, + { + "query": "get 's stock price for the last month", + "tool_name": "get_prices_from_identifiers", + "parameters": { + "identifiers": [ + "TSLA" + ], + "start_date": "2023-10-01", + "end_date": "2023-10-31" + }, + "context": "for specific date ranges, provide both start_date and end_date in yyyy-mm-dd format.", + "permissions_required": [ + "PricingPermission" + ], + "disambiguation_note": "use specific dates rather than relative terms like 'last month' - calculate the actual dates.", + "tags": [ + "date_range", + "monthly_data", + "historical_prices" + ] + }, + { + "query": "show me 's daily stock prices for q3 2023", + "tool_name": "get_prices_from_identifiers", + "parameters": { + "identifiers": [ + "AMZN" + ], + "start_date": "2023-07-01", + "end_date": "2023-09-30", + "periodicity": "daily" + }, + "context": "use periodicity parameter to specify the frequency of data points (daily, weekly, monthly).", + "permissions_required": [ + "PricingPermission" + ], + "disambiguation_note": "periodicity controls data frequency, not the date range. default is daily if not specified.", + "tags": [ + "daily_prices", + "quarterly_range", + "periodicity" + ] + }, + { + "query": "what were 's adjusted stock prices in 2022?", + "tool_name": "get_prices_from_identifiers", + "parameters": { + "identifiers": [ + "NFLX" + ], + "start_date": "2022-01-01", + "end_date": "2022-12-31", + "adjusted": true + }, + "context": "use adjusted=true to get prices adjusted for splits, dividends, and other corporate actions.", + "permissions_required": [ + "PricingPermission" + ], + "disambiguation_note": "adjusted prices account for corporate actions - use for historical analysis and comparisons.", + "tags": [ + "adjusted_prices", + "yearly_data", + "corporate_actions" + ] + }, + { + "query": "get the latest stock prices for all major tech companies", + "tool_name": "get_prices_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL", + "MSFT", + "GOOGL", + "AMZN", + "META", + "TSLA", + "NFLX" + ] + }, + "context": "batch multiple companies efficiently rather than making separate calls.", + "permissions_required": [ + "PricingPermission" + ], + "disambiguation_note": "always batch multiple companies in one call for better performance.", + "tags": [ + "multiple_companies", + "tech_stocks", + "batching", + "current_prices" + ] + }, + { + "query": "analyze META's stock performance across 2020 earnings and regulatory events", + "tool_name": "get_prices_from_identifiers", + "parameters": { + "identifiers": [ + "META" + ], + "start_date": "2020-01-01", + "end_date": "2020-12-31", + "periodicity": "daily" + }, + "context": "When a question references multiple events across a calendar year, cover the entire period with explicit start and end dates.", + "permissions_required": [ + "PricingPermission" + ], + "disambiguation_note": "Use the full-year window instead of narrowing to a single month. Failure 208 resulted from truncating the date range.", + "tags": [ + "event_window", + "daily_prices", + "inclusive_range", + "regulatory_events" + ] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/relationship_examples.json b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/relationship_examples.json new file mode 100644 index 0000000..9b17cf8 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/relationship_examples.json @@ -0,0 +1,170 @@ +{ + "description": "Comprehensive examples for business relationship queries covering all relationship types", + "tools": [ + { + "tool_name": "get_business_relationship_from_identifiers", + "examples": [ + { + "query": "what are the previous borrowers of ?", + "tool_name": "get_business_relationship_from_identifiers", + "parameters": { + "identifiers": [ + "JPM" + ], + "business_relationship": "borrower" + }, + "context": "use get_business_relationship_from_identifiers to find companies with specific business relationships.", + "permissions_required": [ + "RelationshipPermission" + ], + "disambiguation_note": "the business_relationship parameter must match exact relationship types from the system.", + "tags": [ + "borrowers", + "lending_relationships", + "business_relationships" + ] + }, + { + "query": "who are the suppliers for ?", + "tool_name": "get_business_relationship_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL" + ], + "business_relationship": "supplier" + }, + "context": "find supplier relationships for companies.", + "permissions_required": [ + "RelationshipPermission" + ], + "disambiguation_note": "use exact relationship type names - 'supplier' not 'suppliers' or 'vendor'.", + "tags": [ + "suppliers", + "supply_chain", + "vendor_relationships" + ] + }, + { + "query": "what companies are customers of ?", + "tool_name": "get_business_relationship_from_identifiers", + "parameters": { + "identifiers": [ + "MSFT" + ], + "business_relationship": "customer" + }, + "context": "find customer relationships and business partnerships.", + "permissions_required": [ + "RelationshipPermission" + ], + "disambiguation_note": "relationship types are predefined - use exact parameter values.", + "tags": [ + "customers", + "client_relationships", + "business_partnerships" + ] + }, + { + "query": "who are the lenders for ?", + "tool_name": "get_business_relationship_from_identifiers", + "parameters": { + "identifiers": [ + "TSLA" + ], + "business_relationship": "lender" + }, + "context": "find lending relationships where companies provide financing.", + "permissions_required": [ + "RelationshipPermission" + ], + "disambiguation_note": "use 'lender' for companies that provide loans, 'borrower' for companies that receive loans.", + "tags": [ + "lenders", + "financing_relationships", + "credit_relationships" + ] + }, + { + "query": "what partners does work with?", + "tool_name": "get_business_relationship_from_identifiers", + "parameters": { + "identifiers": [ + "GOOGL" + ], + "business_relationship": "partner" + }, + "context": "find strategic partnerships and business alliances.", + "permissions_required": [ + "RelationshipPermission" + ], + "disambiguation_note": "use 'partner' not 'business_partner' or 'strategic_partner' - use the exact parameter name.", + "tags": [ + "partners", + "strategic_alliances", + "business_partnerships" + ] + }, + { + "query": "who are the distributors for ?", + "tool_name": "get_business_relationship_from_identifiers", + "parameters": { + "identifiers": [ + "KO" + ], + "business_relationship": "distributor" + }, + "context": "find distribution channel relationships.", + "permissions_required": [ + "RelationshipPermission" + ], + "disambiguation_note": "use 'distributor' for companies that distribute products, different from 'supplier'.", + "tags": [ + "distributors", + "distribution_channels", + "sales_relationships" + ] + }, + { + "query": "what vendors does use?", + "tool_name": "get_business_relationship_from_identifiers", + "parameters": { + "identifiers": [ + "WMT" + ], + "business_relationship": "vendor" + }, + "context": "find vendor relationships for procurement and services.", + "permissions_required": [ + "RelationshipPermission" + ], + "disambiguation_note": "use 'vendor' for service providers, 'supplier' for goods providers - they may overlap but are distinct categories.", + "tags": [ + "vendors", + "procurement", + "service_relationships" + ] + }, + { + "query": "who are the clients of ?", + "tool_name": "get_business_relationship_from_identifiers", + "parameters": { + "identifiers": [ + "JPM" + ], + "business_relationship": "client" + }, + "context": "find client relationships, particularly for service-oriented businesses.", + "permissions_required": [ + "RelationshipPermission" + ], + "disambiguation_note": "use 'client' for service relationships, 'customer' for product relationships - context matters.", + "tags": [ + "clients", + "service_relationships", + "professional_services" + ] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/segments_examples.json b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/segments_examples.json new file mode 100644 index 0000000..9302bde --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/segments_examples.json @@ -0,0 +1,160 @@ +{ + "description": "Comprehensive examples for company segment queries covering all segment types and time periods", + "tools": [ + { + "tool_name": "get_segments_from_identifiers", + "examples": [ + { + "query": "what are 's business segments?", + "tool_name": "get_segments_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL" + ], + "segment_type": "business" + }, + "context": "use get_segments_from_identifiers to get business segment breakdowns for companies.", + "permissions_required": [ + "SegmentsPermission" + ], + "disambiguation_note": "segment_type must match available segment types - 'business', 'geographic', etc.", + "tags": [ + "business_segments", + "segment_breakdown", + "business_units" + ] + }, + { + "query": "show me the geographic segments for ", + "tool_name": "get_segments_from_identifiers", + "parameters": { + "identifiers": [ + "KO" + ], + "segment_type": "geographic" + }, + "context": "get geographic/regional segment information for global companies.", + "permissions_required": [ + "SegmentsPermission" + ], + "disambiguation_note": "use 'geographic' for regional breakdowns, 'business' for product/service segments.", + "tags": [ + "geographic_segments", + "regional_breakdown", + "global_operations" + ] + }, + { + "query": "get quarterly business segments for in 2023", + "tool_name": "get_segments_from_identifiers", + "parameters": { + "identifiers": [ + "AMZN" + ], + "segment_type": "business", + "period_type": "quarterly", + "start_year": 2023, + "end_year": 2023 + }, + "context": "segment data can be retrieved for specific time periods and frequencies.", + "permissions_required": [ + "SegmentsPermission" + ], + "disambiguation_note": "use same date parameters as financial statement tools for consistency.", + "tags": [ + "quarterly_segments", + "business_segments", + "time_specific" + ] + }, + { + "query": "what are 's product segments?", + "tool_name": "get_segments_from_identifiers", + "parameters": { + "identifiers": [ + "MSFT" + ], + "segment_type": "product" + }, + "context": "get product-based segment breakdowns for companies with diverse product lines.", + "permissions_required": [ + "SegmentsPermission" + ], + "disambiguation_note": "use 'product' for product-based segments, 'business' for broader business unit segments.", + "tags": [ + "product_segments", + "product_lines", + "segment_analysis" + ] + }, + { + "query": "show me the service segments for ", + "tool_name": "get_segments_from_identifiers", + "parameters": { + "identifiers": [ + "JPM" + ], + "segment_type": "service" + }, + "context": "get service-based segment information for service-oriented companies.", + "permissions_required": [ + "SegmentsPermission" + ], + "disambiguation_note": "use 'service' for service-based segments, particularly for financial services and consulting companies.", + "tags": [ + "service_segments", + "service_lines", + "financial_services" + ] + }, + { + "query": "get revenue segments for over the last 2 years", + "tool_name": "get_segments_from_identifiers", + "parameters": { + "identifiers": [ + "GOOGL" + ], + "segment_type": "revenue", + "start_year": 2022, + "end_year": 2023 + }, + "context": "get revenue-based segment breakdowns showing how revenue is distributed across different areas.", + "permissions_required": [ + "SegmentsPermission" + ], + "disambiguation_note": "use 'revenue' for revenue-based segments, useful for understanding revenue composition.", + "tags": [ + "revenue_segments", + "revenue_breakdown", + "multi_year" + ] + }, + { + "query": "what are the operating segments for in q4 2023?", + "tool_name": "get_segments_from_identifiers", + "parameters": { + "identifiers": [ + "TSLA" + ], + "segment_type": "operating", + "period_type": "quarterly", + "start_year": 2023, + "start_quarter": 4, + "end_year": 2023, + "end_quarter": 4 + }, + "context": "get operating segment information for specific quarters.", + "permissions_required": [ + "SegmentsPermission" + ], + "disambiguation_note": "use 'operating' for operational segment breakdowns, with quarterly parameters for specific periods.", + "tags": [ + "operating_segments", + "quarterly_data", + "operational_breakdown" + ] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/statements_examples.json b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/statements_examples.json new file mode 100644 index 0000000..d28216c --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/statements_examples.json @@ -0,0 +1,80 @@ +{ + "description": "Examples for financial statements tools (excluding line items)", + "tools": [ + { + "tool_name": "get_financial_statement_from_identifiers", + "examples": [ + { + "query": "get the balance sheet for ", + "tool_name": "get_financial_statement_from_identifiers", + "parameters": { + "identifiers": [ + "AAPL" + ], + "statement": "balance_sheet" + }, + "context": "use 'balance_sheet' to get assets, liabilities, and equity information.", + "permissions_required": [ + "StatementsPermission" + ], + "disambiguation_note": "use 'balance_sheet' not 'balance_sheet_statement' - the parameter name is just 'balance_sheet'.", + "tags": [ + "balance_sheet", + "assets", + "liabilities", + "equity" + ] + }, + { + "query": "show me the income statement for and ", + "tool_name": "get_financial_statement_from_identifiers", + "parameters": { + "identifiers": [ + "MSFT", + "GOOGL" + ], + "statement": "income_statement" + }, + "context": "use 'income_statement' to get revenue, expenses, and profit information.", + "permissions_required": [ + "StatementsPermission" + ], + "disambiguation_note": "use 'income_statement' not 'profit_and_loss' or 'p_and_l' - income_statement is the standard parameter.", + "tags": [ + "income_statement", + "revenue", + "expenses", + "profit", + "multiple_companies" + ] + }, + { + "query": "get quarterly income statements for from q1 2022 to q4 2023", + "tool_name": "get_financial_statement_from_identifiers", + "parameters": { + "identifiers": [ + "AMZN" + ], + "statement": "income_statement", + "period_type": "quarterly", + "start_year": 2022, + "start_quarter": 1, + "end_year": 2023, + "end_quarter": 4 + }, + "context": "for quarterly statements, set period_type to 'quarterly' and specify quarter ranges.", + "permissions_required": [ + "StatementsPermission" + ], + "disambiguation_note": "always set period_type to 'quarterly' when requesting quarterly data and include both start and end quarters.", + "tags": [ + "income_statement", + "quarterly", + "multi_period", + "time_range" + ] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/transcripts_examples.json b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/transcripts_examples.json new file mode 100644 index 0000000..d9208c1 --- /dev/null +++ b/kfinance/integrations/tool_calling/dynamic_prompts/tool_examples/transcripts_examples.json @@ -0,0 +1,61 @@ +{ + "description": "Examples for transcript access tools", + "tools": [ + { + "tool_name": "get_transcript_from_key_dev_id", + "examples": [ + { + "query": "get the earnings call transcript for key dev id 12345", + "tool_name": "get_transcript_from_key_dev_id", + "parameters": { + "key_dev_id": 12345 + }, + "context": "use get_transcript_from_key_dev_id to get raw transcript text for earnings calls using the key development id.", + "permissions_required": [ + "TranscriptsPermission" + ], + "disambiguation_note": "requires the specific key_dev_id from earnings tools - cannot work with company identifiers.", + "tags": [ + "earnings_transcript", + "key_dev_id", + "transcript_text" + ] + }, + { + "query": "show me the transcript for earnings call 67890", + "tool_name": "get_transcript_from_key_dev_id", + "parameters": { + "key_dev_id": 67890 + }, + "context": "get the full text content of earnings call transcripts.", + "permissions_required": [ + "TranscriptsPermission" + ], + "disambiguation_note": "the key_dev_id must be obtained from earnings tools first - it's not the same as company id.", + "tags": [ + "transcript_content", + "earnings_call", + "full_text" + ] + }, + { + "query": "retrieve transcript text for key development 54321", + "tool_name": "get_transcript_from_key_dev_id", + "parameters": { + "key_dev_id": 54321 + }, + "context": "access detailed transcript content for analysis and research.", + "permissions_required": [ + "TranscriptsPermission" + ], + "disambiguation_note": "this is a two-step process: first get key_dev_id from earnings tools, then get transcript.", + "tags": [ + "transcript_retrieval", + "detailed_content", + "research_access" + ] + } + ] + } + ] +} diff --git a/kfinance/integrations/tool_calling/prompts.py b/kfinance/integrations/tool_calling/prompts.py index 4a5319d..0d4e879 100644 --- a/kfinance/integrations/tool_calling/prompts.py +++ b/kfinance/integrations/tool_calling/prompts.py @@ -1,8 +1,9 @@ BASE_PROMPT = f""" You are an LLM designed to help financial analysts. Use the supplied tools to assist the user. + CRITICAL RULES FOR TOOL USAGE -Time Handling: +1. Time Handling: - Always select the most recent complete period when the user does not specify a time. - Use the get_latest function to determine the latest annual year, latest completed quarter, and current date. - For annual data, use the latest completed year. For quarterly data, use the latest completed quarter and year. @@ -12,13 +13,40 @@ - "Last year" or "last quarter" refers to the previous completed period from the current date. - For quarterly data requests without specific quarters, assume the most recent completed quarter. -Tool Selection: +2. Parameter Mapping: +- Always use the exact enum values provided in the tool description for line items and other parameters. +- If the user provides an alias or synonym, map it to the canonical enum using the alias table. Never use a close but non-canonical value. +- Common mappings include: + - "EPS" โ†’ "basic_eps" or "diluted_eps" (based on context) + - "revenue" โ†’ "total_revenue" + - "depreciation" โ†’ "depreciation_and_amortization" + - "cash flow from operations" โ†’ "cash_from_operations" +- If the user specifies a time period, use it exactly as provided. If not specified, resolve using get_latest. +- For all tools that accept multiple IDs, always include all mentioned IDs in a single call. Never drop or add IDs unless explicitly instructed. + +3.Multiple Entities: +- Always include all mentioned companies, securities, or trading items in a single call when possible. +- Do not make separate calls for each entity when they can be grouped. +- When multiple entities are mentioned, include all their IDs in the relevant array parameter. + +4. Required Parameters: +- For all tools, ensure all required parameters are present and match the user's intent exactly. + +5.Tool Selection: - Use get_latest before any other tool when dates are ambiguous, unspecified, or when you need to determine the most recent period. - Use get_n_quarters_ago for relative quarter references such as "3 quarters ago". - Always make tool calls when financial data is requestedโ€”never skip them. - For identifier resolution, use the exact identifiers provided by the user. Do not add or modify suffixes unless explicitly required. -Identifier Handling: +6.Identifier Handling: - Use the exact identifiers provided by the user. Do not add or modify suffixes such as ".PA" or ".DE" unless the user specifies the exchange or market. - Never invent or guess identifiers. Only use those explicitly provided. + +7. Company Reference Preference: +- When resolving company identifiers, always prefer the full company, group, or subsidiary name as provided by the user, rather than splitting into individual tickers. + +8. No Hallucination of Data: +- Never generate, estimate, or infer financial figures, dates, or other factual answers. +- Only provide numbers, data, or conclusions that are directly grounded in tool responses or explicitly supplied information. +- If tool data is unavailable or incomplete, state this clearly and do not fabricate or guess any values. """ diff --git a/pyproject.toml b/pyproject.toml index b713aee..b34e583 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,14 +23,18 @@ dependencies = [ "fastmcp>=2.11", "langchain-core>=0.3.15", "langchain-google-genai>=2.1.5,<3", - "numpy>=1.22.4", + "numpy>=1.22.4,<2.0.0", # Constrain to numpy 1.x for spaCy compatibility "pandas>=2.0.0", "pillow>=10", "pydantic>=2.10.0,<3", "pyjwt>=2.8.0", "python-dateutil>=2.8.2,<2.9", + "sentence-transformers>=2.2.0,<3", # For dynamic prompt construction + "spacy>=3.4.0,<4", # For named entity recognition in dynamic prompts + "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl", # spaCy English model for NER + "torch>=2.0.0,<2.3.0", # Constrain torch version for compatibility "strenum>=0.4.15", - "tabulate>=0.9.0", # required for turning dataframes into markdown + "tabulate>=0.9.0", # required for turning dataframes into markdown "types-requests>=2.22.0,<3", "requests>=2.22.0,<3", "urllib3>=2.5", @@ -45,12 +49,13 @@ dev = [ "nbconvert>=7.16,<8", "nbformat>5.10,<6", "nbqa>1.9,<2", - "pytest>=6.1.2,<7", "pytest-cov>=6.0.0,<7", "requests_mock>=1.12,<2", "ruff>=0.9.4,<1", "time_machine>=2.1,<3", - "types-cachetools>=5.5,<6" + "types-cachetools>=5.5,<6", + "pytest>=8.1.1,<9", + "python-dotenv>=1.1.0,<2", ] @@ -152,3 +157,6 @@ warn_redundant_casts = true warn_unused_ignores = true warn_unreachable = true exclude = "tests" + +[tool.uv.sources] +en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }