From 91c76d4152c52355679632fd5f31c57e732d89e0 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 19 Sep 2024 12:58:22 -0700 Subject: [PATCH] Intelligently initialize a decent default set of chat model options Given the LLM landscape is rapidly changing, providing a good default set of options should help reduce decision fatigue to get started Improve initialization flow during first run - Set Google, Anthropic Chat models too Previously only Offline, Openai chat models could be set during init - Add multiple chat models for each LLM provider Interactively set a comma separated list of models for each provider - Auto add default chat models for each provider in non-interactive model if the {OPENAI,GEMINI,ANTHROPIC}_API_KEY env var is set - Do not ask for max_tokens, tokenizer for offline models during initialization. Use better defaults inferred in code instead - Explicitly set default chat model to use If unset, it implicitly defaults to using the first chat model. Make it explicit to reduce this confusion Resolves #882 --- docker-compose.yml | 13 +- src/khoj/processor/conversation/utils.py | 13 +- src/khoj/utils/constants.py | 11 +- src/khoj/utils/initialization.py | 215 ++++++++++++++++------- tests/test_offline_chat_actors.py | 4 +- 5 files changed, 184 insertions(+), 72 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 4007b47f2..3c5fe9f4d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -44,10 +44,19 @@ services: - KHOJ_DEBUG=False - KHOJ_ADMIN_EMAIL=username@example.com - KHOJ_ADMIN_PASSWORD=password - # Uncomment the following lines to make your instance publicly accessible. - # Replace the domain with your domain. Proceed with caution, especially if you are using anonymous mode. + # Uncomment lines below to use chat models by each provider. + # Ensure you set your provider specific API keys. + # --- + # - OPENAI_API_KEY=your_openai_api_key + # - GEMINI_API_KEY=your_gemini_api_key + # - ANTHROPIC_API_KEY=your_anthropic_api_key + # Uncomment the necessary lines below to make your instance publicly accessible. + # Replace the KHOJ_DOMAIN with either your domain or IP address (no http/https prefix). + # Proceed with caution, especially if you are using anonymous mode. + # --- # - KHOJ_NO_HTTPS=True # - KHOJ_DOMAIN=192.168.0.104 + # - KHOJ_DOMAIN=khoj.example.com command: --host="0.0.0.0" --port=42110 -vv --anonymous-mode --non-interactive diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index b7794361f..3f3977986 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -18,13 +18,20 @@ logger = logging.getLogger(__name__) model_to_prompt_size = { + # OpenAI Models "gpt-3.5-turbo": 12000, - "gpt-3.5-turbo-0125": 12000, - "gpt-4-0125-preview": 20000, "gpt-4-turbo-preview": 20000, + "gpt-4o": 20000, "gpt-4o-mini": 20000, "o1-preview": 20000, "o1-mini": 20000, + # Google Models + "gemini-1.5-flash": 20000, + "gemini-1.5-pro": 20000, + # Anthropic Models + "claude-3-5-sonnet-20240620": 20000, + "claude-3-opus-20240229": 20000, + # Offline Models "TheBloke/Mistral-7B-Instruct-v0.2-GGUF": 3500, "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF": 3500, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000, @@ -163,7 +170,7 @@ def generate_chatml_messages_with_context( if loaded_model: max_prompt_size = infer_max_tokens(loaded_model.n_ctx(), model_to_prompt_size.get(model_name, math.inf)) else: - max_prompt_size = model_to_prompt_size.get(model_name, 2000) + max_prompt_size = model_to_prompt_size.get(model_name, 10000) # Scale lookback turns proportional to max prompt size supported by model lookback_turns = max_prompt_size // 750 diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py index bbbbd0007..d91cc84ab 100644 --- a/src/khoj/utils/constants.py +++ b/src/khoj/utils/constants.py @@ -8,8 +8,15 @@ app_env_filepath = "~/.khoj/env" telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry" content_directory = "~/.khoj/content/" -default_offline_chat_model = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" -default_online_chat_model = "gpt-4o-mini" +default_offline_chat_models = [ + "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", + "bartowski/gemma-2-9b-it-GGUF", + "bartowski/gemma-2-2b-it-GGUF", + "bartowski/Phi-3.5-mini-instruct-GGUF", +] +default_openai_chat_models = ["gpt-4o-mini", "gpt-4o"] +default_gemini_chat_models = ["gemini-1.5-flash", "gemini-1.5-pro"] +default_anthropic_chat_models = ["claude-3-5-sonnet-20240620", "claude-3-opus-20240229"] empty_config = { "search-type": { diff --git a/src/khoj/utils/initialization.py b/src/khoj/utils/initialization.py index 3791af3fb..9df6d46e1 100644 --- a/src/khoj/utils/initialization.py +++ b/src/khoj/utils/initialization.py @@ -6,11 +6,17 @@ ChatModelOptions, KhojUser, OpenAIProcessorConversationConfig, + ServerChatSettings, SpeechToTextModelOptions, TextToImageModelConfig, ) from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tokenizer -from khoj.utils.constants import default_offline_chat_model, default_online_chat_model +from khoj.utils.constants import ( + default_anthropic_chat_models, + default_gemini_chat_models, + default_offline_chat_models, + default_openai_chat_models, +) logger = logging.getLogger(__name__) @@ -32,78 +38,44 @@ def _create_admin_user(): def _create_chat_configuration(): logger.info( - "🗣️ Configure chat models available to your server. You can always update these at /server/admin using your admin account" + "🗣️ Configure chat models available to your server. You can always update these at /server/admin using your admin account" ) - try: - use_offline_model = "y" if not interactive else input("Use offline chat model? (y/n): ") - if use_offline_model == "y": - logger.info("🗣️ Setting up offline chat model") - - if interactive: - offline_chat_model = input( - f"Enter the offline chat model you want to use. See HuggingFace for available GGUF models (default: {default_offline_chat_model}): " - ) - else: - offline_chat_model = "" - if offline_chat_model == "": - ChatModelOptions.objects.create( - chat_model=default_offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE - ) - else: - default_max_tokens = model_to_prompt_size.get(offline_chat_model, 4000) - max_tokens = input( - f"Enter the maximum number of tokens to use for the offline chat model (default {default_max_tokens}):" - ) - max_tokens = max_tokens or default_max_tokens - - default_tokenizer = model_to_tokenizer.get( - offline_chat_model, "hf-internal-testing/llama-tokenizer" - ) - tokenizer = input( - f"Enter the tokenizer to use for the offline chat model (default: {default_tokenizer}):" - ) - tokenizer = tokenizer or default_tokenizer - - ChatModelOptions.objects.create( - chat_model=offline_chat_model, - model_type=ChatModelOptions.ModelType.OFFLINE, - max_prompt_size=max_tokens, - tokenizer=tokenizer, - ) - except ModuleNotFoundError as e: - logger.warning("Offline models are not supported on this device.") - + # Set up OpenAI's online models default_openai_api_key = os.getenv("OPENAI_API_KEY") default_use_openai_model = {True: "y", False: "n"}[default_openai_api_key != None] - use_openai_model = default_use_openai_model if not interactive else input("Use OpenAI models? (y/n): ") - if use_openai_model == "y": - logger.info("🗣️ Setting up your OpenAI configuration") + use_model_provider = default_use_openai_model if not interactive else input("Add OpenAI models? (y/n): ") + if use_model_provider == "y": + logger.info("️💬 Setting up your OpenAI configuration") if interactive: - api_key = input(f"Enter your OpenAI API key (default: {default_openai_api_key}): ") + user_api_key = input(f"Enter your OpenAI API key (default: {default_openai_api_key}): ") + api_key = user_api_key if user_api_key != "" else default_openai_api_key else: api_key = default_openai_api_key - OpenAIProcessorConversationConfig.objects.create(api_key=api_key) + chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name="OpenAI") if interactive: - openai_chat_model = input( - f"Enter the OpenAI chat model you want to use (default: {default_online_chat_model}): " + chat_model_names = input( + f"Enter the OpenAI chat models you want to use (default: {','.join(default_openai_chat_models)}): " ) - openai_chat_model = openai_chat_model or default_online_chat_model + chat_models = chat_model_names.split(",") if chat_model_names != "" else default_openai_chat_models + chat_models = [model.strip() for model in chat_models] else: - openai_chat_model = default_online_chat_model - default_max_tokens = model_to_prompt_size.get(openai_chat_model, 10000) - if interactive: - max_tokens = input( - f"Enter the maximum number of tokens to use for the OpenAI chat model (default: {default_max_tokens}): " + chat_models = default_openai_chat_models + + # Add OpenAI chat models + for chat_model in chat_models: + vision_enabled = chat_model in ["gpt-4o-mini", "gpt-4o"] + default_max_tokens = model_to_prompt_size.get(chat_model) + ChatModelOptions.objects.create( + chat_model=chat_model, + model_type=ChatModelOptions.ModelType.OPENAI, + max_prompt_size=default_max_tokens, + openai_config=chat_model_provider, + vision_enabled=vision_enabled, ) - max_tokens = max_tokens or default_max_tokens - else: - max_tokens = default_max_tokens - ChatModelOptions.objects.create( - chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_prompt_size=max_tokens - ) + # Add OpenAI speech to text model default_speech2text_model = "whisper-1" if interactive: openai_speech2text_model = input( @@ -116,6 +88,7 @@ def _create_chat_configuration(): model_name=openai_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OPENAI ) + # Add OpenAI text to image model default_text_to_image_model = "dall-e-3" if interactive: openai_text_to_image_model = input( @@ -128,9 +101,124 @@ def _create_chat_configuration(): model_name=openai_text_to_image_model, model_type=TextToImageModelConfig.ModelType.OPENAI ) - if use_offline_model == "y" or use_openai_model == "y": - logger.info("🗣️ Chat model configuration complete") + # Set up Google's Gemini online chat models + default_gemini_api_key = os.getenv("GEMINI_API_KEY") + default_use_gemini_model = {True: "y", False: "n"}[default_gemini_api_key != None] + use_model_provider = default_use_gemini_model if not interactive else input("Add Google's chat models? (y/n): ") + if use_model_provider == "y": + logger.info("️💬 Setting up your Google Gemini configuration") + if interactive: + user_api_key = input(f"Enter your Gemini API key (default: {default_gemini_api_key}): ") + api_key = user_api_key if user_api_key != "" else default_gemini_api_key + else: + api_key = default_gemini_api_key + chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name="Gemini") + + if interactive: + chat_model_names = input( + f"Enter the Gemini chat models you want to use (default: {','.join(default_gemini_chat_models)}): " + ) + chat_models = chat_model_names.split(",") if chat_model_names != "" else default_gemini_chat_models + chat_models = [model.strip() for model in chat_models] + else: + chat_models = default_gemini_chat_models + + # Add Gemini chat models + for chat_model in chat_models: + default_max_tokens = model_to_prompt_size.get(chat_model) + vision_enabled = False + ChatModelOptions.objects.create( + chat_model=chat_model, + model_type=ChatModelOptions.ModelType.GOOGLE, + max_prompt_size=default_max_tokens, + openai_config=chat_model_provider, + vision_enabled=False, + ) + + # Set up Anthropic's online chat models + default_anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") + default_use_anthropic_model = {True: "y", False: "n"}[default_anthropic_api_key != None] + use_model_provider = ( + default_use_anthropic_model if not interactive else input("Add Anthropic's chat models? (y/n): ") + ) + if use_model_provider == "y": + logger.info("️💬 Setting up your Anthropic configuration") + if interactive: + user_api_key = input(f"Enter your Anthropic API key (default: {default_anthropic_api_key}): ") + api_key = user_api_key if user_api_key != "" else default_anthropic_api_key + else: + api_key = default_anthropic_api_key + chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name="Anthropic") + + if interactive: + chat_model_names = input( + f"Enter the Anthropic chat models you want to use (default: {','.join(default_anthropic_chat_models)}): " + ) + chat_models = chat_model_names.split(",") if chat_model_names != "" else default_anthropic_chat_models + chat_models = [model.strip() for model in chat_models] + else: + chat_models = default_anthropic_chat_models + + # Add Anthropic chat models + for chat_model in chat_models: + vision_enabled = False + default_max_tokens = model_to_prompt_size.get(chat_model) + ChatModelOptions.objects.create( + chat_model=chat_model, + model_type=ChatModelOptions.ModelType.ANTHROPIC, + max_prompt_size=default_max_tokens, + openai_config=chat_model_provider, + vision_enabled=False, + ) + + # Set up offline chat models + use_model_provider = "y" if not interactive else input("Add Offline chat models? (y/n): ") + if use_model_provider == "y": + logger.info("️💬 Setting up Offline chat models") + + if interactive: + chat_model_names = input( + f"Enter the offline chat models you want to use. See HuggingFace for available GGUF models (default: {','.join(default_offline_chat_models)}): " + ) + chat_models = chat_model_names.split(",") if chat_model_names != "" else default_offline_chat_models + chat_models = [model.strip() for model in chat_models] + else: + chat_models = default_offline_chat_models + + # Add chat models + for chat_model in chat_models: + default_max_tokens = model_to_prompt_size.get(chat_model) + default_tokenizer = model_to_tokenizer.get(chat_model) + ChatModelOptions.objects.create( + chat_model=chat_model, + model_type=ChatModelOptions.ModelType.OFFLINE, + max_prompt_size=default_max_tokens, + tokenizer=default_tokenizer, + ) + + chat_models_configured = ChatModelOptions.objects.count() + + # Explicitly set default chat model + if chat_models_configured > 0: + default_chat_model_name = ChatModelOptions.objects.first().chat_model + # If there are multiple chat models, ask the user to choose the default chat model + if chat_models_configured > 1 and interactive: + user_chat_model_name = input( + f"Enter the default chat model to use (default: {default_chat_model_name}): " + ) + else: + user_chat_model_name = None + + # If the user's choice is valid, set it as the default chat model + if user_chat_model_name and ChatModelOptions.objects.filter(chat_model=user_chat_model_name).exists(): + default_chat_model_name = user_chat_model_name + + # Create a server chat settings object with the default chat model + default_chat_model = ChatModelOptions.objects.filter(chat_model=default_chat_model_name).first() + ServerChatSettings.objects.create(chat_default=default_chat_model) + logger.info("🗣️ Chat model configuration complete") + # Set up offline speech to text model use_offline_speech2text_model = "n" if not interactive else input("Use offline speech to text model? (y/n): ") if use_offline_speech2text_model == "y": logger.info("🗣️ Setting up offline speech to text model") @@ -163,7 +251,8 @@ def _create_chat_configuration(): try: _create_chat_configuration() break - # Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page. + # Some environments don't support interactive input. We catch the exception and return if that's the case. + # The admin can still configure their settings from the admin page. except EOFError: return except Exception as e: diff --git a/tests/test_offline_chat_actors.py b/tests/test_offline_chat_actors.py index ed17cac9c..2a0383f11 100644 --- a/tests/test_offline_chat_actors.py +++ b/tests/test_offline_chat_actors.py @@ -19,12 +19,12 @@ from khoj.processor.conversation.offline.utils import download_model from khoj.processor.conversation.utils import message_to_log from khoj.routers.helpers import aget_relevant_output_modes -from khoj.utils.constants import default_offline_chat_model +from khoj.utils.constants import default_offline_chat_models @pytest.fixture(scope="session") def loaded_model(): - return download_model(default_offline_chat_model, max_tokens=5000) + return download_model(default_offline_chat_models[0], max_tokens=5000) freezegun.configure(extend_ignore_list=["transformers"])