open-build · glind · Mar 20, 2025 · Mar 20, 2025 · Mar 20, 2025
diff --git a/.gitignore b/.gitignore
@@ -161,4 +161,4 @@ cython_debug/
 #.idea/
 
 # local envrionment variables
-local.env
+.env
diff --git a/README.md b/README.md
@@ -8,6 +8,19 @@ BabbleBeaver aims to democratize conversational AI, offering a plug-and-play sol
 
 ## Installation
 
+### Creating a .env file
+
+To configure environment variables for BabbleBeaver, you need to create a `.env` file from the provided `example.env` file. Follow these steps:
+
+1. Navigate to the project root directory where `example.env` is located.
+2. Copy the `example.env` file to create a new `.env` file:
+    ```bash
+    cp example.env .env
+    ```
+3. Open the `.env` file in a text editor and update the values as needed. This file contains environment-specific variables such as API keys and configuration settings.
+
+Make sure to keep the `.env` file secure and do not expose it publicly, as it may contain sensitive information.
+
 ### Running the FastAPI application locally
 
 - Make sure you have Python installed on your machine. You can download and install Python from the official website: https://www.python.org/downloads/

diff --git a/ai_configurator.py b/ai_configurator.py
@@ -3,11 +3,7 @@
 from dotenv import load_dotenv
 from model_config.model_config import ModelConfig
 
-if os.path.exists('local.env'):
-    load_dotenv('local.env')
-else:
-    load_dotenv()
-
+load_dotenv()
 
 class AIConfigurator:
     def __init__(self):

diff --git a/ai_retrainer.py b/ai_retrainer.py
@@ -1,10 +1,124 @@
+import requests
+from google.cloud import aiplatform
+import os
+import json
+import PyPDF2
+import docx
+import openai
+
 # ai_retrainer.py
 
 class AIRetrainer:
-    def retrain_with_api(self, data):
-        # Implementation for retraining with third-party API calls
-        pass
+    def retrain_with_api(self, api_endpoint, model_type, api_key):
+
+        headers = {
+            'Authorization': f'Bearer {api_key}',
+            'Content-Type': 'application/json'
+        }
+
+        response = requests.get(api_endpoint, headers=headers)
+
+        if response.status_code == 200:
+            data = response.json()
+            if model_type == 'gemini':
+                self.fine_tune_gemini(data)
+            elif model_type == 'chatgpt':
+                self.fine_tune_chatgpt(data)
+            else:
+                raise ValueError("Unsupported model type")
+        else:
+            raise Exception(f"Failed to retrieve data from API. Status code: {response.status_code}")
+
+    def fine_tune_gemini(self, data):
+        # Implementation for fine-tuning the Gemini model hosted on Google Cloud
+        import google.auth
+
+        # Authenticate with Google Cloud
+        credentials, project = google.auth.default()
+
+        # Initialize the AI Platform client
+        client = aiplatform.gapic.JobServiceClient(credentials=credentials)
+
+        # Define the fine-tuning job
+        job = {
+            "display_name": "fine_tune_gemini",
+            "job_spec": {
+                "worker_pool_specs": [
+                    {
+                        "machine_spec": {
+                            "machine_type": "n1-standard-4"
+                        },
+                        "replica_count": 1,
+                        "python_package_spec": {
+                            "executor_image_uri": "gcr.io/cloud-aiplatform/training/tf-cpu.2-3:latest",
+                            "package_uris": ["gs://your-bucket/path/to/your/package"],
+                            "python_module": "trainer.task",
+                            "args": ["--data", data]
+                        }
+                    }
+                ]
+            }
+        }
+
+        # Submit the job to AI Platform
+        parent = f"projects/{project}/locations/us-central1"
+        response = client.create_custom_job(parent=parent, custom_job=job)
+
+        print(f"Job submitted. Job name: {response.name}")
+
+    def fine_tune_chatgpt(self, data):
+        # Implementation for fine-tuning the ChatGPT model
+
+        # Set your OpenAI API key
+        openai.api_key = os.getenv("OPENAI_API_KEY")
+
+        # Prepare the data for fine-tuning
+        training_data = []
+        for item in data:
+            training_data.append({
+                "prompt": item["prompt"],
+                "completion": item["completion"]
+            })
+
+        # Create a fine-tuning job
+        response = openai.FineTune.create(
+            training_file=training_data,
+            model="davinci-codex",
+            n_epochs=4
+        )
+
+        print(f"Fine-tuning job created. Job ID: {response['id']}")
 
-    def retrain_with_documents(self, document_path):
-        # Implementation for retraining with document uploads
-        pass
+    def retrain_with_documents(self, document_path, model_type):
+
+        if not os.path.exists(document_path):
+            raise FileNotFoundError(f"The document at {document_path} does not exist.")
+
+        with open(document_path, 'r') as file:
+            document_data = file.read()
+
+        # Assuming the document contains JSON data
+        if document_path.endswith('.pdf'):
+            with open(document_path, 'rb') as file:
+                reader = PyPDF2.PdfFileReader(file)
+                document_data = ""
+                for page in range(reader.numPages):
+                    document_data += reader.getPage(page).extract_text()
+        elif document_path.endswith('.docx'):
+            doc = docx.Document(document_path)
+            document_data = "\n".join([para.text for para in doc.paragraphs])
+        elif document_path.endswith('.json'):
+            with open(document_path, 'r') as file:
+                document_data = file.read()
+        else:
+            raise ValueError("Unsupported document format. Only JSON, PDF and DOCX are supported.")
+
+        data = json.loads(document_data)
+
+        # Call the appropriate fine-tune method
+        if model_type == 'gemini':
+            self.fine_tune_gemini(data)
+        elif model_type == 'chatgpt':
+            self.fine_tune_chatgpt(data)
+        else:
+            raise ValueError("Unsupported model type")
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,15 +1,12 @@
-version: '3.8'
+version: '3.7'
 services:
   web:
     build: .
     command: uvicorn main:app --host 0.0.0.0 --reload
-    volumes:
-      - .:/app
     ports:
       - "8000:8000"
     environment:
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - GOOGLE_API_KEY=${GOOGLE_API_KEY}
       - INITIAL_PROMPT_FILE_PATH=${INITIAL_PROMPT_FILE_PATH}
       - HUGGINGFACE_AUTH_TOKEN=${HUGGINGFACE_AUTH_TOKEN}
-      # Set other environment variables as needed
diff --git a/example.env b/example.env
@@ -0,0 +1,5 @@
+CORS_ALLOWED_DOMAINS=example.com,anotherdomain.com
+OPENAI_API_KEY=
+GOOGLE_API_KEY=
+HUGGINGFACE_AUTH_TOKEN=
+INITIAL_PROMPT_FILE_PATH=""
diff --git a/initial-prompt.txt b/initial-prompt.txt
@@ -1,14 +1,13 @@
-You are an all-in-one, helpful, and friendly assistant that is capable of serving users' needs in the following areas:
+You are an all-in-one, helpful, and friendly assistant that is capable of serving users needs in the following areas:
 
-1. Information retrieval - Finding and summarizing information on various topics.
-2. Writing assistance - Helping with writing, editing, and proofreading content.
-3. Programming help - Assisting with coding, debugging, and programming concepts.
-4. Language translation - Translating text between different languages.
-5. Educational support - Providing explanations, tutoring, and help with academic subjects.
-6. Brainstorming ideas - Generating ideas and solutions for projects, problems, or creative endeavors.
-7. Simulating characters and dialogues - Creating and role-playing characters or scenarios.
-8. Content recommendation - Suggesting books, movies, articles, or other content based on preferences.
-9. Entertainment and companionship - Engaging in casual conversation, games, and activities.
-10. Therapy and mental health support - Offering supportive dialogue and coping strategies. (NOTE: You are simply a resource for this and not a substitute for professional mental health services.)
+1. Information retrieval - Finding and summarizing information on various topics about Health and Nutrition
+2. Writing assistance - Helping with writing, meal plans and configuraing them for optimal health as well as nutrition-related content and goals.
+3. Fitness help - Assisting with training and fitness plans for varying ages and fitness levels.
+4. Educational support - Providing explanations, tutoring, and help with academic subjects as they relate to health and fitness.
+5. Content recommendation - Suggesting books, movies, articles, or other content based on health and fitness gorals
 
-If the user asks a certain question and you are not sure about how to proceed, ask follow-up questions until you're confident you can provide a relevant and helpful response. Here is also the conversation that has taken place so far between the user and you so make sure to take all that context also into account when responding to the users questions or helping them in any regard if appropriate. I'd also like you to keep in mind that there is no need on your end to summarize the conversation thus far in your responses.
+If the user asks a certain question and you are not sure about how to proceed, ask follow-up questions until 
+you are confident you can provide a relevant and helpful response. Here is also the conversation that has taken 
+place so far between the user and you so make sure to take all that context also into account when 
+responding to the users questions or helping them in any regard if appropriate. 
+I would also like you to keep in mind that there is no need on your end to summarize the conversation thus far in your responses.
diff --git a/main.py b/main.py
@@ -96,6 +96,8 @@ async def chatbot(request: Request):
     provider = "gemini" # specify the provider for this model
     tokenizer = tiktoken.get_encoding("cl100k_base") # specify the tokenizer to use for this model
     tokenizer_function = lambda text: len(tokenizer.encode(text)) # specify the tokenizing function to use
+    with open("initial-prompt.txt", "r") as prompt_file:
+        initial_prompt = prompt_file.read().strip()
 
     # specify the completion function you'd like to use
     def completion_function(api_key: str, 
@@ -125,7 +127,6 @@ def completion_function(api_key: str,
             except Exception as e:
                 raise e
         else:
-            print("Using GenerativeAI")
             import google.generativeai as genai
 
             model = genai.GenerativeModel(model_name)

diff --git a/model_config/model_config.py b/model_config/model_config.py
@@ -3,6 +3,7 @@
 from configparser import ConfigParser
 
 load_dotenv()
+
 parser = ConfigParser()
 
 class ModelConfig():

diff --git a/requirements.txt b/requirements.txt
@@ -27,4 +27,13 @@ google-generativeai
 IPython
 tiktoken==0.6.0
 google-cloud-aiplatform[tokenization]==1.57.0
-tokenizers==0.19.0
+tokenizers==0.19.0
+
+PyPDF2
+
+docx2txt
+pandas
+numpy
+scikit-learn
+scipy
+matplotlib
-Original file line number
+Diff line change
@@ Expand Up / @@ -161,4 +161,4 @@ cython_debug/ @@
     #.idea/
     # local envrionment variables
-    local.env
+    .env