Skip to content

Start on fine tune functions #31

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,4 @@ cython_debug/
#.idea/

# local envrionment variables
local.env
.env
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,19 @@ BabbleBeaver aims to democratize conversational AI, offering a plug-and-play sol

## Installation

### Creating a .env file

To configure environment variables for BabbleBeaver, you need to create a `.env` file from the provided `example.env` file. Follow these steps:

1. Navigate to the project root directory where `example.env` is located.
2. Copy the `example.env` file to create a new `.env` file:
```bash
cp example.env .env
```
3. Open the `.env` file in a text editor and update the values as needed. This file contains environment-specific variables such as API keys and configuration settings.

Make sure to keep the `.env` file secure and do not expose it publicly, as it may contain sensitive information.

### Running the FastAPI application locally

- Make sure you have Python installed on your machine. You can download and install Python from the official website: https://www.python.org/downloads/
Expand Down
6 changes: 1 addition & 5 deletions ai_configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@
from dotenv import load_dotenv
from model_config.model_config import ModelConfig

if os.path.exists('local.env'):
load_dotenv('local.env')
else:
load_dotenv()

load_dotenv()

class AIConfigurator:
def __init__(self):
Expand Down
126 changes: 120 additions & 6 deletions ai_retrainer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,124 @@
import requests
from google.cloud import aiplatform
import os
import json
import PyPDF2
import docx
import openai

# ai_retrainer.py

class AIRetrainer:
def retrain_with_api(self, data):
# Implementation for retraining with third-party API calls
pass
def retrain_with_api(self, api_endpoint, model_type, api_key):

headers = {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}

response = requests.get(api_endpoint, headers=headers)

if response.status_code == 200:
data = response.json()
if model_type == 'gemini':
self.fine_tune_gemini(data)
elif model_type == 'chatgpt':
self.fine_tune_chatgpt(data)
else:
raise ValueError("Unsupported model type")
else:
raise Exception(f"Failed to retrieve data from API. Status code: {response.status_code}")

def fine_tune_gemini(self, data):
# Implementation for fine-tuning the Gemini model hosted on Google Cloud
import google.auth

# Authenticate with Google Cloud
credentials, project = google.auth.default()

# Initialize the AI Platform client
client = aiplatform.gapic.JobServiceClient(credentials=credentials)

# Define the fine-tuning job
job = {
"display_name": "fine_tune_gemini",
"job_spec": {
"worker_pool_specs": [
{
"machine_spec": {
"machine_type": "n1-standard-4"
},
"replica_count": 1,
"python_package_spec": {
"executor_image_uri": "gcr.io/cloud-aiplatform/training/tf-cpu.2-3:latest",
"package_uris": ["gs://your-bucket/path/to/your/package"],
"python_module": "trainer.task",
"args": ["--data", data]
}
}
]
}
}

# Submit the job to AI Platform
parent = f"projects/{project}/locations/us-central1"
response = client.create_custom_job(parent=parent, custom_job=job)

print(f"Job submitted. Job name: {response.name}")

def fine_tune_chatgpt(self, data):
# Implementation for fine-tuning the ChatGPT model

# Set your OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

# Prepare the data for fine-tuning
training_data = []
for item in data:
training_data.append({
"prompt": item["prompt"],
"completion": item["completion"]
})

# Create a fine-tuning job
response = openai.FineTune.create(
training_file=training_data,
model="davinci-codex",
n_epochs=4
)

print(f"Fine-tuning job created. Job ID: {response['id']}")

def retrain_with_documents(self, document_path):
# Implementation for retraining with document uploads
pass
def retrain_with_documents(self, document_path, model_type):

if not os.path.exists(document_path):
raise FileNotFoundError(f"The document at {document_path} does not exist.")

with open(document_path, 'r') as file:
document_data = file.read()

# Assuming the document contains JSON data
if document_path.endswith('.pdf'):
with open(document_path, 'rb') as file:
reader = PyPDF2.PdfFileReader(file)
document_data = ""
for page in range(reader.numPages):
document_data += reader.getPage(page).extract_text()
elif document_path.endswith('.docx'):
doc = docx.Document(document_path)
document_data = "\n".join([para.text for para in doc.paragraphs])
elif document_path.endswith('.json'):
with open(document_path, 'r') as file:
document_data = file.read()
else:
raise ValueError("Unsupported document format. Only JSON, PDF and DOCX are supported.")

data = json.loads(document_data)

# Call the appropriate fine-tune method
if model_type == 'gemini':
self.fine_tune_gemini(data)
elif model_type == 'chatgpt':
self.fine_tune_chatgpt(data)
else:
raise ValueError("Unsupported model type")
5 changes: 1 addition & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
version: '3.8'
version: '3.7'
services:
web:
build: .
command: uvicorn main:app --host 0.0.0.0 --reload
volumes:
- .:/app
ports:
- "8000:8000"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- GOOGLE_API_KEY=${GOOGLE_API_KEY}
- INITIAL_PROMPT_FILE_PATH=${INITIAL_PROMPT_FILE_PATH}
- HUGGINGFACE_AUTH_TOKEN=${HUGGINGFACE_AUTH_TOKEN}
# Set other environment variables as needed
5 changes: 5 additions & 0 deletions example.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CORS_ALLOWED_DOMAINS=example.com,anotherdomain.com
OPENAI_API_KEY=
GOOGLE_API_KEY=
HUGGINGFACE_AUTH_TOKEN=
INITIAL_PROMPT_FILE_PATH=""
23 changes: 11 additions & 12 deletions initial-prompt.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
You are an all-in-one, helpful, and friendly assistant that is capable of serving users' needs in the following areas:
You are an all-in-one, helpful, and friendly assistant that is capable of serving users needs in the following areas:

1. Information retrieval - Finding and summarizing information on various topics.
2. Writing assistance - Helping with writing, editing, and proofreading content.
3. Programming help - Assisting with coding, debugging, and programming concepts.
4. Language translation - Translating text between different languages.
5. Educational support - Providing explanations, tutoring, and help with academic subjects.
6. Brainstorming ideas - Generating ideas and solutions for projects, problems, or creative endeavors.
7. Simulating characters and dialogues - Creating and role-playing characters or scenarios.
8. Content recommendation - Suggesting books, movies, articles, or other content based on preferences.
9. Entertainment and companionship - Engaging in casual conversation, games, and activities.
10. Therapy and mental health support - Offering supportive dialogue and coping strategies. (NOTE: You are simply a resource for this and not a substitute for professional mental health services.)
1. Information retrieval - Finding and summarizing information on various topics about Health and Nutrition
2. Writing assistance - Helping with writing, meal plans and configuraing them for optimal health as well as nutrition-related content and goals.
3. Fitness help - Assisting with training and fitness plans for varying ages and fitness levels.
4. Educational support - Providing explanations, tutoring, and help with academic subjects as they relate to health and fitness.
5. Content recommendation - Suggesting books, movies, articles, or other content based on health and fitness gorals

If the user asks a certain question and you are not sure about how to proceed, ask follow-up questions until you're confident you can provide a relevant and helpful response. Here is also the conversation that has taken place so far between the user and you so make sure to take all that context also into account when responding to the users questions or helping them in any regard if appropriate. I'd also like you to keep in mind that there is no need on your end to summarize the conversation thus far in your responses.
If the user asks a certain question and you are not sure about how to proceed, ask follow-up questions until
you are confident you can provide a relevant and helpful response. Here is also the conversation that has taken
place so far between the user and you so make sure to take all that context also into account when
responding to the users questions or helping them in any regard if appropriate.
I would also like you to keep in mind that there is no need on your end to summarize the conversation thus far in your responses.
3 changes: 2 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ async def chatbot(request: Request):
provider = "gemini" # specify the provider for this model
tokenizer = tiktoken.get_encoding("cl100k_base") # specify the tokenizer to use for this model
tokenizer_function = lambda text: len(tokenizer.encode(text)) # specify the tokenizing function to use
with open("initial-prompt.txt", "r") as prompt_file:
initial_prompt = prompt_file.read().strip()

# specify the completion function you'd like to use
def completion_function(api_key: str,
Expand Down Expand Up @@ -125,7 +127,6 @@ def completion_function(api_key: str,
except Exception as e:
raise e
else:
print("Using GenerativeAI")
import google.generativeai as genai

model = genai.GenerativeModel(model_name)
Expand Down
1 change: 1 addition & 0 deletions model_config/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from configparser import ConfigParser

load_dotenv()

parser = ConfigParser()

class ModelConfig():
Expand Down
11 changes: 10 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,13 @@ google-generativeai
IPython
tiktoken==0.6.0
google-cloud-aiplatform[tokenization]==1.57.0
tokenizers==0.19.0
tokenizers==0.19.0

PyPDF2

docx2txt
pandas
numpy
scikit-learn
scipy
matplotlib