-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrag_system.py
More file actions
432 lines (360 loc) · 16.6 KB
/
rag_system.py
File metadata and controls
432 lines (360 loc) · 16.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
import os
import groq
import chromadb
import requests
from googleapiclient.discovery import build
from sentence_transformers import SentenceTransformer
from urllib.parse import urlparse
import json
import time
from datetime import datetime
from dotenv import load_dotenv
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
GOOGLE_CSE_ID = os.getenv("GOOGLE_CSE_ID")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
class MedicalRAG:
def __init__(self,
db_path="./chroma_db/",
collection_name="medical_docs",
embedding_model_name="abhinand/MedEmbed-small-v0.1",
llm_model="llama3-70b-8192",
cache_expiry_days=7):
# Setup embedding model
self.embedding_model = SentenceTransformer(embedding_model_name)
# Connect to existing ChromaDB or create a new one
try:
self.chroma_client = chromadb.PersistentClient(path=db_path)
self.collection = self.chroma_client.get_collection(name=collection_name)
print(f"Connected to existing ChromaDB collection: {collection_name}")
except Exception as e:
print(f"Error connecting to ChromaDB: {e}")
print("Creating a new collection instead")
self.chroma_client = chromadb.PersistentClient(path=db_path)
self.collection = self.chroma_client.create_collection(
name=collection_name,
metadata={"hnsw:space": "cosine"}
)
# Setup cache collection
try:
self.cache_collection = self.chroma_client.get_collection(name=f"{collection_name}_cache")
print(f"Connected to existing cache collection: {collection_name}_cache")
except Exception:
print(f"Creating a new cache collection")
self.cache_collection = self.chroma_client.create_collection(
name=f"{collection_name}_cache",
metadata={"hnsw:space": "cosine"}
)
# Setup LLM client
self.groq_client = groq.Client(api_key=GROQ_API_KEY)
self.llm_model = llm_model
self.cache_expiry_days = cache_expiry_days
def google_search(self, query, max_results=5):
"""Run Google search and return results"""
url = "https://customsearch.googleapis.com/customsearch/v1"
params = {
'q': query,
'cx': GOOGLE_CSE_ID,
'key': GOOGLE_API_KEY,
'num': max_results,
'safe': 'active'
}
try:
resp = requests.get(url, params=params, timeout=10)
resp.raise_for_status()
data = resp.json()
if 'error' in data:
print(f"API Error: {data['error']['message']}")
return []
except requests.exceptions.RequestException as e:
print(f"Error during search request: {e}")
return []
except ValueError as e:
print(f"Error parsing JSON response: {e}")
return []
results = []
if "items" in data:
for item in data["items"]:
results.append({
"title": item.get("title"),
"link": item.get("link"),
"snippet": item.get("snippet", ""),
"domain": urlparse(item.get("link", "")).netloc
})
return results
def search_chroma_db(self, query, top_k=3, collection=None):
"""Find relevant documents in ChromaDB"""
if collection is None:
collection = self.collection
try:
# Create query embedding
query_embedding = self.embedding_model.encode(query)
# Search the database
results = collection.query(
query_embeddings=[query_embedding.tolist()],
n_results=top_k
)
if results["documents"] and len(results["documents"]) > 0:
# Format results
formatted_results = []
for i, doc in enumerate(results["documents"][0]):
metadata = results["metadatas"][0][i] if "metadatas" in results and i < len(results["metadatas"][0]) else {}
source = metadata.get("source", "Unknown source")
title = metadata.get("title", "Untitled document")
url = metadata.get("url", "#")
formatted_results.append({
"content": doc,
"title": title,
"source": source,
"url": url
})
return formatted_results, results["distances"][0] if "distances" in results else None
return [], None # No results found
except Exception as e:
print(f"Error searching ChromaDB: {e}")
return [], None
def check_query_cache(self, query):
"""Check if we already have a similar query cached"""
cached_results, distances = self.search_chroma_db(query, top_k=1, collection=self.cache_collection)
# Check for similar queries
if cached_results and distances and distances[0] < 0.05: # Similarity threshold
cached_data = json.loads(cached_results[0]["content"])
timestamp = cached_data.get("timestamp", 0)
# Check if cache is still valid
if (time.time() - timestamp) < (self.cache_expiry_days * 86400):
print(f"Using cached response (similarity: {distances[0]:.4f})")
return cached_data
return None
def store_in_cache(self, query, response_data):
"""Save query and response to cache"""
try:
# Add timestamp
response_data["timestamp"] = time.time()
json_data = json.dumps(response_data)
# Create unique ID
cache_id = f"cache_{int(time.time())}_{hash(query) % 10000}"
# Create embedding
query_embedding = self.embedding_model.encode(query)
# Save to cache
self.cache_collection.add(
ids=[cache_id],
embeddings=[query_embedding.tolist()],
documents=[json_data],
metadatas=[{
"source": "query_cache",
"title": f"Cached response for: {query[:50]}...",
"url": "#",
"query": query,
"cache_date": datetime.now().isoformat()
}]
)
print(f"Stored response in cache with ID: {cache_id}")
except Exception as e:
print(f"Error storing in cache: {e}")
def generate_answer(self, question, context_docs, source_type="Unknown"):
"""Generate answer using LLM based on context"""
# Format context
context = ""
for i, doc in enumerate(context_docs):
if isinstance(doc, dict) and "content" in doc:
# Format ChromaDB results
title = doc.get("title", "Document")
source = doc.get("source", "Unknown")
url = doc.get("url", "#")
content = doc.get("content", "")
context += f"[{i+1}] {title}\nSource: {source} ({url})\n{content}\n\n"
elif isinstance(doc, dict) and "snippet" in doc:
# Format Google results
title = doc.get("title", "")
source = doc.get("domain", "")
url = doc.get("link", "")
snippet = doc.get("snippet", "")
context += f"[{i+1}] {title}\nSource: {source} ({url})\n{snippet}\n\n"
else:
# Plain text
context += f"[{i+1}] {doc}\n\n"
prompt = f"""You are medicos, a medical AI assistant that provides accurate, well-referenced medical information.
Answer the following medical question using ONLY the provided context. Follow these rules:
1. Only use information from the provided sources
2. If the context doesn't contain enough information to answer, say "I don't have enough information to answer this question accurately."
3. Do not make up information or cite sources not provided
4. Include numbered references to the specific sources used
5. Include a medical disclaimer at the end
6. Suggest 2-3 follow-up questions related to the topic
Question: {question}
Context:
{context}
Source type: {source_type}
Your answer should be structured as follows:
1. A concise answer to the question
2. References section with numbered sources
3. 2-3 follow-up questions
4. Medical disclaimer
"""
try:
response = self.groq_client.chat.completions.create(
model=self.llm_model,
messages=[
{"role": "system", "content": "You are medicos, a medical AI assistant that provides accurate, well-referenced information."},
{"role": "user", "content": prompt}
],
temperature=0.1 # Low temperature for factual responses
)
return response.choices[0].message.content
except Exception as e:
print(f"Error generating answer with Groq API: {e}")
return "I encountered an error while generating the answer. Please try again later."
def format_sources(self, sources):
"""Format sources for the response"""
if not sources:
return []
if isinstance(sources[0], dict) and "content" in sources[0]:
# Format ChromaDB results
return [
{
"title": source.get("title", "Unknown document"),
"source": source.get("source", "Unknown source"),
"url": source.get("url", "#")
}
for source in sources
]
elif isinstance(sources[0], dict) and "snippet" in sources[0]:
# Format Google results
return [
{
"title": result.get("title", ""),
"source": result.get("domain", ""),
"url": result.get("link", ""),
"snippet": result.get("snippet", "")
}
for result in sources
]
else:
# Format plain text
return [{"title": f"Result {i+1}", "source": str(source), "url": "#"} for i, source in enumerate(sources)]
def validate_database_response(self, question, chroma_results, similarity_threshold=0.3):
"""Check if database results are actually relevant"""
if not chroma_results:
return False
# Check similarity scores
_, distances = self.search_chroma_db(question)
if distances and distances[0] > similarity_threshold:
print(f"Database results found but relevance too low: {distances[0]}")
return False
# Use LLM to verify relevance
validation_prompt = f"""You are evaluating if a database response is relevant to a user query.
Query: {question}
Database Response: {chroma_results[0].get('content', '')}
Task: Determine if this database response actually answers the user's query.
Answer only 'YES' if it is relevant and informative for the query, or 'NO' if it doesn't properly address the query.
"""
try:
response = self.groq_client.chat.completions.create(
model=self.llm_model,
messages=[
{"role": "system", "content": "You are a validation assistant determining if search results are relevant to a query."},
{"role": "user", "content": validation_prompt}
],
temperature=0.1
)
validation = response.choices[0].message.content
is_valid = "YES" in validation.upper()
print(f"LLM validation result: {validation} (Valid: {is_valid})")
return is_valid
except Exception as e:
print(f"Error in validation: {e}")
# Default to accepting results if validation fails
return True
def process_medical_query(self, question, use_google_fallback=True, top_k=5):
"""Main method to handle medical queries"""
# Check cache first
cached_response = self.check_query_cache(question)
if cached_response:
print("Using cached response")
return cached_response
# Try ChromaDB first
chroma_results, _ = self.search_chroma_db(question, top_k)
# Validate results
if chroma_results:
is_valid = self.validate_database_response(question, chroma_results)
if is_valid:
context = chroma_results
source_type = "ChromaDB (Pre-stored Medical Docs)"
formatted_sources = self.format_sources(chroma_results)
else:
# Results not relevant, use fallback
chroma_results = []
# Use Google if needed
if not chroma_results and use_google_fallback:
google_results = self.google_search(question, max_results=top_k)
if google_results:
context = google_results
source_type = "Google API (Live Search)"
formatted_sources = self.format_sources(google_results)
# Store Google results for future use
try:
for i, result in enumerate(google_results):
doc_id = f"google_{int(time.time())}_{i}"
snippet = result.get("snippet", "")
# Skip short snippets
if len(snippet) < 50:
continue
# Store in database
snippet_embedding = self.embedding_model.encode(snippet)
self.collection.add(
ids=[doc_id],
embeddings=[snippet_embedding.tolist()],
documents=[snippet],
metadatas=[{
"source": result.get("domain", "Google Search"),
"title": result.get("title", "Search Result"),
"url": result.get("link", "#"),
"query": question,
"date_added": datetime.now().isoformat()
}]
)
print(f"Added {len(google_results)} Google results to the database")
except Exception as e:
print(f"Error adding Google results to database: {e}")
else:
return {
"question": question,
"answer": "I couldn't find relevant information to answer this question accurately.",
"context_source": "None",
"sources": []
}
elif not chroma_results:
return {
"question": question,
"answer": "I don't have enough information in my knowledge base to answer this question accurately.",
"context_source": "None",
"sources": []
}
# Generate answer
answer = self.generate_answer(question, context, source_type)
# Prepare response
response = {
"question": question,
"answer": answer,
"context_source": source_type,
"sources": formatted_sources
}
# Cache for future use
self.store_in_cache(question, response)
return response
def main():
rag = MedicalRAG()
print("Welcome to medicos!")
print("This system now validates database responses and caches results.")
while True:
user_query = input("Enter a medical query (or type 'exit' to quit): ").strip()
if user_query.lower() == "exit":
print("Exiting Medical RAG System. Goodbye!")
break
print("Processing query...")
response = rag.process_medical_query(user_query)
print("\n" + "="*50 + "\nRESULTS\n" + "="*50)
print(json.dumps(response, indent=2))
print("="*50)
if __name__ == "__main__":
main()