pixegami · Erfanm83 · Apr 30, 2025 · Apr 30, 2025 · Apr 30, 2025 · May 3, 2025
diff --git a/.env.example b/.env.example
@@ -0,0 +1,14 @@
+# Application settings
+APP_HOST=127.0.0.1
+APP_PORT=8000
+APP_RELOAD=True
+
+# OpenAI API key - Replace with your actual API key
+OPENAI_API_KEY=your_openai_api_key_here
+
+# Rate limiting settings
+RATE_LIMIT=60
+TIME_WINDOW=60
+
+# Token retrieval secret for developers
+TOKEN_RETRIEVAL_SECRET=your_secret_key_here
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,6 @@
 .env
+./venv
 chroma_*
 chroma
-.DS_Store
+.DS_Store
+__pycache__
diff --git a/README.md b/README.md
@@ -1,47 +1,155 @@
-# Langchain RAG Tutorial
+# Knowledge Base Chatbot
 
-## Install dependencies
+A simple chatbot that can answer questions based on a knowledge base of markdown documents using Retrieval Augmented Generation (RAG).
 
-1. Do the following before installing the dependencies found in `requirements.txt` file because of current challenges installing `onnxruntime` through `pip install onnxruntime`. 
+## Features
 
-    - For MacOS users, a workaround is to first install `onnxruntime` dependency for `chromadb` using:
+- FastAPI backend with token-based authentication
+- Streamlit frontend for easy interaction
+- LangChain + OpenAI for RAG implementation
+- Vector storage using Chroma
+- Rate limiting for API protection
 
-    ```python
-     conda install onnxruntime -c conda-forge
-    ```
-    See this [thread](https://github.com/microsoft/onnxruntime/issues/11037) for additonal help if needed. 
+## Setup
 
-     - For Windows users, follow the guide [here](https://github.com/bycloudai/InstallVSBuildToolsWindows?tab=readme-ov-file) to install the Microsoft C++ Build Tools. Be sure to follow through to the last step to set the enviroment variable path.
+### Prerequisites
 
+- Python 3.9+
+- OpenAI API key
+- Markdown files for training data
 
-2. Now run this command to install dependenies in the `requirements.txt` file. 
+### Installation
 
-```python
-pip install -r requirements.txt
-```
+1. Clone the repository:
+   ```
+   git clone <repository-url>
+   cd <repository-directory>
+   ```
+
+2. Install dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+
+3. Create a `.env` file based on the template:
+   ```
+   cp .env.template .env
+   ```
+
+4. Edit the `.env` file to add your OpenAI API key and other settings.
+
+### Training the Bot
+
+1. Place your markdown (.md) files in the `data/prototype` directory.
+
+2. Run the training script:
+   ```
+   python train.py
+   ```
+
+   This will process all markdown files and create a vector database in the `chroma` directory.
+
+### Running the API Server
+
+1. Start the FastAPI server:
+   ```
+   python run.py
+   ```
+
+   The API will be available at `http://localhost:8000`.
+
+### Using the Chat UI
 
-3. Install markdown depenendies with: 
+1. Start the Streamlit UI:
+   ```
+   streamlit run client/chat_ui.py
+   ```
 
-```python
-pip install "unstructured[md]"
+2. Open your browser at `http://localhost:8501`.
+
+3. Enter your API secret key in the sidebar.
+
+4. Start chatting with the bot!
+
+## API Endpoints
+
+### GET /docs
+OpenAPI documentation for the API.
+
+### POST /get_token/
+Get a one-time token for authentication.
+
+Request body:
+```json
+{
+  "secret": "your_secret_key_here"
+}
 ```
 
-## Create database
+Response:
+```json
+{
+  "token": "uuid-token-here"
+}
+```
 
-Create the Chroma DB.
+### POST /chat
+Send a question to the chatbot.
 
-```python
-python create_database.py
+Request body:
+```json
+{
+  "question": "Your question here?",
+  "token": "your-one-time-token"
+}
 ```
 
-## Query the database
+Response:
+```json
+{
+  "answer": "The answer based on knowledge base."
+}
+```
 
-Query the Chroma DB.
+## Project Structure
 
-```python
-python query_data.py "How does Alice meet the Mad Hatter?"
 ```
+.
+├── client
+│   └── chat_ui.py
+├── config
+│   ├── config.py
+│   └── logging_config.py
+├── run.py
+├── src
+│   ├── main.py
+│   ├── query_data.py
+│   ├── rag.py
+│   └── security.py
+└── train
+    ├── __pycache__
+    │   └── create_database.cpython-39.pyc
+    ├── compare_embeddings.py
+    ├── create_database.py
+    ├── data
+    │   ├── books
+    │   │   └── alice_in_wonderland.md
+    │   ├── prototype
+    │   │   └── test.md
+    │   └── sample_chats
+    │       └── sample_chat_cleaned.md
+    └── train.py
+
+9 directories, 15 files
+```
+
+## License
+
+[Your License Here]
 
-> You'll also need to set up an OpenAI account (and set the OpenAI key in your environment variable) for this to work.
+## Credits
 
-Here is a step-by-step tutorial video: [RAG+Langchain Python Project: Easy AI/Chat For Your Docs](https://www.youtube.com/watch?v=tcqEUSNCn8I&ab_channel=pixegami).
+- [OpenAI](https://openai.com/)
+- [LangChain](https://langchain.com/)
+- [FastAPI](https://fastapi.tiangolo.com/)
+- [Streamlit](https://streamlit.io/)
diff --git a/ragchatbot/client/chat_ui.py b/ragchatbot/client/chat_ui.py
@@ -0,0 +1,118 @@
+import streamlit as st
+import requests
+import time
+
+# Set page title and favicon
+st.set_page_config(page_title="Sales ChatBot", page_icon="🤖")
+
+# Custom CSS to improve appearance
+st.markdown("""
+    <style>
+    .main {
+        padding: 1rem;
+    }
+    .chat-message {
+        padding: 1rem;
+        border-radius: 0.5rem;
+        margin-bottom: 1rem;
+        display: flex;
+        align-items: center;
+    }
+    .user-message {
+        background-color: #fb8200;
+    }
+    .bot-message {
+        background-color: #2533cc;
+    }
+    .message-content {
+        margin-left: 1rem;
+    }
+    </style>
+""", unsafe_allow_html=True)
+
+# App title
+st.title("🤖 سروریار")
+st.markdown("دستیار فروش هوشمند ایران سرور")
+
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+
+# Display chat history
+for message in st.session_state.messages:
+    if message["role"] == "user":
+        st.markdown(f"""
+            <div class="chat-message user-message">
+                <div>👤</div>
+                <div class="message-content">{message["content"]}</div>
+            </div>
+        """, unsafe_allow_html=True)
+    else:  # bot message
+        st.markdown(f"""
+            <div class="chat-message bot-message">
+                <div>🤖</div>
+                <div class="message-content">{message["content"]}</div>
+            </div>
+        """, unsafe_allow_html=True)
+
+# Configuration in sidebar
+with st.sidebar:
+    st.header("Configuration")
+    api_url = st.text_input("API URL", value="http://127.0.0.1:8080", help="The URL of your FastAPI server")
+    secret = st.text_input("🔑 API Secret Key", type="password", help="Secret key for authentication")
+
+# Input form
+with st.form(key="chat_form", clear_on_submit=True):
+    user_input = st.text_area("💬 سوال خود را بپرسید:", key="user_question", height=100)
+    submit_button = st.form_submit_button("ارسال")
+
+if submit_button and user_input:
+    # Add user message to chat history
+    st.session_state.messages.append({"role": "user", "content": user_input})
+
+    # Show "thinking" message
+    thinking_placeholder = st.empty()
+    thinking_placeholder.markdown("🤖 در حال فکر کردن...")
+
+    try:
+        # Step 1: Get token
+        token_resp = requests.post(f"{api_url}/get_token/", json={"secret": secret})
+        if token_resp.status_code != 200:
+            error_detail = token_resp.json().get("detail", {}).get("msg", "خطای نامشخص")
+            st.error(f"خطای توکن: {error_detail}")
+        else:
+            token = token_resp.json()["token"]
+
+            # Step 2: Send question with token in header
+            headers = {"x-api-key": token}
+            chat_resp = requests.post(
+                f"{api_url}/chat", 
+                json={"query": user_input},
+                headers=headers
+            )
+
+            # Remove thinking message
+            thinking_placeholder.empty()
+
+            if chat_resp.status_code == 200:
+                bot_response = chat_resp.json()["answer"]
+                # Add bot response to chat history
+                st.session_state.messages.append({"role": "assistant", "content": bot_response})
+
+                # Display the new response
+                st.markdown(f"""
+                    <div class="chat-message bot-message">
+                        <div>🤖</div>
+                        <div class="message-content">{bot_response}</div>
+                    </div>
+                """, unsafe_allow_html=True)
+            else:
+                error_detail = chat_resp.json().get("detail", {}).get("msg", "خطای نامشخص")
+                st.error(f"خطای گفتگو: {error_detail}")
+    except Exception as e:
+        thinking_placeholder.empty()
+        st.error(f"خطا: {str(e)}")
+
+# Add a small credit at the bottom
+st.markdown("---")
+st.markdown("Made with ❤️ by Erfan Mahmoudi")
diff --git a/ragchatbot/config/__init__.py b/ragchatbot/config/__init__.py
diff --git a/ragchatbot/config/__pycache__/__init__.cpython-39.pyc b/ragchatbot/config/__pycache__/__init__.cpython-39.pyc
diff --git a/ragchatbot/config/__pycache__/config.cpython-39.pyc b/ragchatbot/config/__pycache__/config.cpython-39.pyc
diff --git a/ragchatbot/config/__pycache__/logging_config.cpython-39.pyc b/ragchatbot/config/__pycache__/logging_config.cpython-39.pyc
diff --git a/ragchatbot/config/config.py b/ragchatbot/config/config.py
@@ -0,0 +1,36 @@
+import os
+from dotenv import load_dotenv
+
+# Load the .env file from the root directory
+load_dotenv()
+
+APP_HOST = os.getenv("APP_HOST", "127.0.0.1")
+APP_PORT = int(os.getenv("APP_PORT", 8000))
+APP_RELOAD = os.getenv("APP_RELOAD", "True")
+
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+TOKEN_RETRIEVAL_SECRET = os.getenv("TOKEN_RETRIEVAL_SECRET")
+
+RATE_LIMIT = os.getenv("RATE_LIMIT", 60)
+TIME_WINDOW = os.getenv("TIME_WINDOW", 60)
+TEMP_BAN_DURATION = 300
+THROTTLE_TIME = 2
+MAX_THREADS = 10
+
+# Sanity checks
+if not APP_HOST:
+    raise RuntimeError("APP_HOST not found in environment variables.")
+if not APP_PORT:
+    raise RuntimeError("APP_PORT not found in environment variables.")
+if not APP_RELOAD:
+    raise RuntimeError("APP_RELOAD not found in environment variables.")
+
+if not OPENAI_API_KEY:
+    raise RuntimeError("OPENAI_API_KEY not found in environment variables.")
+
+if not RATE_LIMIT:
+    raise RuntimeError("RATE_LIMIT not found in environment variables.")
+if not TIME_WINDOW:
+    raise RuntimeError("TIME_WINDOW not found in environment variables.")
+if not TOKEN_RETRIEVAL_SECRET:
+    raise RuntimeError("TOKEN_RETRIEVAL_SECRET not found in environment variables.")
diff --git a/ragchatbot/config/logging_config.py b/ragchatbot/config/logging_config.py
@@ -0,0 +1,15 @@
+# logging_config.py remains the same
+from colorama import Fore, Style
+import logging
+
+class CustomFormatter(logging.Formatter):
+    def format(self, record):
+        log_colors = {
+            "INFO": Fore.GREEN,
+            "WARNING": Fore.YELLOW,
+            "ERROR": Fore.RED,
+            "CRITICAL": Fore.MAGENTA,
+            "DEBUG": Fore.CYAN
+        }
+        color = log_colors.get(record.levelname, Fore.WHITE)
+        return f"{color}{record.levelname}:{Style.RESET_ALL}     {record.getMessage()}"  # Reset after level