diff --git a/.gitignore b/.gitignore index d6b80b4..6a1a878 100644 --- a/.gitignore +++ b/.gitignore @@ -161,4 +161,4 @@ cython_debug/ #.idea/ # local envrionment variables -local.env \ No newline at end of file +.env \ No newline at end of file diff --git a/README.md b/README.md index cbeb387..a97e337 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,19 @@ BabbleBeaver aims to democratize conversational AI, offering a plug-and-play sol ## Installation +### Creating a .env file + +To configure environment variables for BabbleBeaver, you need to create a `.env` file from the provided `example.env` file. Follow these steps: + +1. Navigate to the project root directory where `example.env` is located. +2. Copy the `example.env` file to create a new `.env` file: + ```bash + cp example.env .env + ``` +3. Open the `.env` file in a text editor and update the values as needed. This file contains environment-specific variables such as API keys and configuration settings. + +Make sure to keep the `.env` file secure and do not expose it publicly, as it may contain sensitive information. + ### Running the FastAPI application locally - Make sure you have Python installed on your machine. You can download and install Python from the official website: https://www.python.org/downloads/ diff --git a/ai_configurator.py b/ai_configurator.py index 995a598..86ce5af 100644 --- a/ai_configurator.py +++ b/ai_configurator.py @@ -3,11 +3,7 @@ from dotenv import load_dotenv from model_config.model_config import ModelConfig -if os.path.exists('local.env'): - load_dotenv('local.env') -else: - load_dotenv() - +load_dotenv() class AIConfigurator: def __init__(self): diff --git a/ai_retrainer.py b/ai_retrainer.py index 90af968..5e56853 100644 --- a/ai_retrainer.py +++ b/ai_retrainer.py @@ -1,10 +1,124 @@ +import requests +from google.cloud import aiplatform +import os +import json +import PyPDF2 +import docx +import openai + # ai_retrainer.py class AIRetrainer: - def retrain_with_api(self, data): - # Implementation for retraining with third-party API calls - pass + def retrain_with_api(self, api_endpoint, model_type, api_key): + + headers = { + 'Authorization': f'Bearer {api_key}', + 'Content-Type': 'application/json' + } + + response = requests.get(api_endpoint, headers=headers) + + if response.status_code == 200: + data = response.json() + if model_type == 'gemini': + self.fine_tune_gemini(data) + elif model_type == 'chatgpt': + self.fine_tune_chatgpt(data) + else: + raise ValueError("Unsupported model type") + else: + raise Exception(f"Failed to retrieve data from API. Status code: {response.status_code}") + + def fine_tune_gemini(self, data): + # Implementation for fine-tuning the Gemini model hosted on Google Cloud + import google.auth + + # Authenticate with Google Cloud + credentials, project = google.auth.default() + + # Initialize the AI Platform client + client = aiplatform.gapic.JobServiceClient(credentials=credentials) + + # Define the fine-tuning job + job = { + "display_name": "fine_tune_gemini", + "job_spec": { + "worker_pool_specs": [ + { + "machine_spec": { + "machine_type": "n1-standard-4" + }, + "replica_count": 1, + "python_package_spec": { + "executor_image_uri": "gcr.io/cloud-aiplatform/training/tf-cpu.2-3:latest", + "package_uris": ["gs://your-bucket/path/to/your/package"], + "python_module": "trainer.task", + "args": ["--data", data] + } + } + ] + } + } + + # Submit the job to AI Platform + parent = f"projects/{project}/locations/us-central1" + response = client.create_custom_job(parent=parent, custom_job=job) + + print(f"Job submitted. Job name: {response.name}") + + def fine_tune_chatgpt(self, data): + # Implementation for fine-tuning the ChatGPT model + + # Set your OpenAI API key + openai.api_key = os.getenv("OPENAI_API_KEY") + + # Prepare the data for fine-tuning + training_data = [] + for item in data: + training_data.append({ + "prompt": item["prompt"], + "completion": item["completion"] + }) + + # Create a fine-tuning job + response = openai.FineTune.create( + training_file=training_data, + model="davinci-codex", + n_epochs=4 + ) + + print(f"Fine-tuning job created. Job ID: {response['id']}") - def retrain_with_documents(self, document_path): - # Implementation for retraining with document uploads - pass + def retrain_with_documents(self, document_path, model_type): + + if not os.path.exists(document_path): + raise FileNotFoundError(f"The document at {document_path} does not exist.") + + with open(document_path, 'r') as file: + document_data = file.read() + + # Assuming the document contains JSON data + if document_path.endswith('.pdf'): + with open(document_path, 'rb') as file: + reader = PyPDF2.PdfFileReader(file) + document_data = "" + for page in range(reader.numPages): + document_data += reader.getPage(page).extract_text() + elif document_path.endswith('.docx'): + doc = docx.Document(document_path) + document_data = "\n".join([para.text for para in doc.paragraphs]) + elif document_path.endswith('.json'): + with open(document_path, 'r') as file: + document_data = file.read() + else: + raise ValueError("Unsupported document format. Only JSON, PDF and DOCX are supported.") + + data = json.loads(document_data) + + # Call the appropriate fine-tune method + if model_type == 'gemini': + self.fine_tune_gemini(data) + elif model_type == 'chatgpt': + self.fine_tune_chatgpt(data) + else: + raise ValueError("Unsupported model type") diff --git a/docker-compose.yml b/docker-compose.yml index 7f78e4b..cc33b9e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,10 +1,8 @@ -version: '3.8' +version: '3.7' services: web: build: . command: uvicorn main:app --host 0.0.0.0 --reload - volumes: - - .:/app ports: - "8000:8000" environment: @@ -12,4 +10,3 @@ services: - GOOGLE_API_KEY=${GOOGLE_API_KEY} - INITIAL_PROMPT_FILE_PATH=${INITIAL_PROMPT_FILE_PATH} - HUGGINGFACE_AUTH_TOKEN=${HUGGINGFACE_AUTH_TOKEN} - # Set other environment variables as needed \ No newline at end of file diff --git a/example.env b/example.env new file mode 100644 index 0000000..71cc542 --- /dev/null +++ b/example.env @@ -0,0 +1,5 @@ +CORS_ALLOWED_DOMAINS=example.com,anotherdomain.com +OPENAI_API_KEY= +GOOGLE_API_KEY= +HUGGINGFACE_AUTH_TOKEN= +INITIAL_PROMPT_FILE_PATH="" \ No newline at end of file diff --git a/initial-prompt.txt b/initial-prompt.txt index b54bf4c..e532643 100644 --- a/initial-prompt.txt +++ b/initial-prompt.txt @@ -1,14 +1,13 @@ -You are an all-in-one, helpful, and friendly assistant that is capable of serving users' needs in the following areas: +You are an all-in-one, helpful, and friendly assistant that is capable of serving users needs in the following areas: -1. Information retrieval - Finding and summarizing information on various topics. -2. Writing assistance - Helping with writing, editing, and proofreading content. -3. Programming help - Assisting with coding, debugging, and programming concepts. -4. Language translation - Translating text between different languages. -5. Educational support - Providing explanations, tutoring, and help with academic subjects. -6. Brainstorming ideas - Generating ideas and solutions for projects, problems, or creative endeavors. -7. Simulating characters and dialogues - Creating and role-playing characters or scenarios. -8. Content recommendation - Suggesting books, movies, articles, or other content based on preferences. -9. Entertainment and companionship - Engaging in casual conversation, games, and activities. -10. Therapy and mental health support - Offering supportive dialogue and coping strategies. (NOTE: You are simply a resource for this and not a substitute for professional mental health services.) +1. Information retrieval - Finding and summarizing information on various topics about Health and Nutrition +2. Writing assistance - Helping with writing, meal plans and configuraing them for optimal health as well as nutrition-related content and goals. +3. Fitness help - Assisting with training and fitness plans for varying ages and fitness levels. +4. Educational support - Providing explanations, tutoring, and help with academic subjects as they relate to health and fitness. +5. Content recommendation - Suggesting books, movies, articles, or other content based on health and fitness gorals -If the user asks a certain question and you are not sure about how to proceed, ask follow-up questions until you're confident you can provide a relevant and helpful response. Here is also the conversation that has taken place so far between the user and you so make sure to take all that context also into account when responding to the users questions or helping them in any regard if appropriate. I'd also like you to keep in mind that there is no need on your end to summarize the conversation thus far in your responses. \ No newline at end of file +If the user asks a certain question and you are not sure about how to proceed, ask follow-up questions until +you are confident you can provide a relevant and helpful response. Here is also the conversation that has taken +place so far between the user and you so make sure to take all that context also into account when +responding to the users questions or helping them in any regard if appropriate. +I would also like you to keep in mind that there is no need on your end to summarize the conversation thus far in your responses. \ No newline at end of file diff --git a/main.py b/main.py index 3ad7abd..d529c4d 100644 --- a/main.py +++ b/main.py @@ -96,6 +96,8 @@ async def chatbot(request: Request): provider = "gemini" # specify the provider for this model tokenizer = tiktoken.get_encoding("cl100k_base") # specify the tokenizer to use for this model tokenizer_function = lambda text: len(tokenizer.encode(text)) # specify the tokenizing function to use + with open("initial-prompt.txt", "r") as prompt_file: + initial_prompt = prompt_file.read().strip() # specify the completion function you'd like to use def completion_function(api_key: str, @@ -125,7 +127,6 @@ def completion_function(api_key: str, except Exception as e: raise e else: - print("Using GenerativeAI") import google.generativeai as genai model = genai.GenerativeModel(model_name) diff --git a/model_config/model_config.py b/model_config/model_config.py index d94f55f..5f6b9ab 100644 --- a/model_config/model_config.py +++ b/model_config/model_config.py @@ -3,6 +3,7 @@ from configparser import ConfigParser load_dotenv() + parser = ConfigParser() class ModelConfig(): diff --git a/requirements.txt b/requirements.txt index 81f71b6..2d7d727 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,4 +27,13 @@ google-generativeai IPython tiktoken==0.6.0 google-cloud-aiplatform[tokenization]==1.57.0 -tokenizers==0.19.0 \ No newline at end of file +tokenizers==0.19.0 + +PyPDF2 + +docx2txt +pandas +numpy +scikit-learn +scipy +matplotlib