-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsearch_demo.py
More file actions
48 lines (36 loc) · 1.33 KB
/
search_demo.py
File metadata and controls
48 lines (36 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import faiss
import json
import numpy as np
from sentence_transformers import SentenceTransformer
# Configuration
MODEL_NAME = "intfloat/e5-large-v2"
DATASET = "imdb" # Change this to test other datasets
INDEX_PATH = f"portfolio_index_results/{DATASET}/vectors.index"
CHUNKS_PATH = f"portfolio_index_results/{DATASET}/chunks.jsonl"
print(f"\nLoading model ({MODEL_NAME})...")
model = SentenceTransformer(MODEL_NAME, device="cuda")
print(f"Loading FAISS index from {INDEX_PATH} ...")
index = faiss.read_index(INDEX_PATH)
# Load chunks
chunks = []
with open(CHUNKS_PATH, "r") as f:
for line in f:
chunks.append(json.loads(line))
print(f"Loaded {len(chunks)} chunks.")
def embed_query(text):
text = "query: " + text
emb = model.encode([text], convert_to_numpy=True, normalize_embeddings=True)
return emb.astype("float32")
def search(query, k=5):
print(f"\nQuery: {query}")
qv = embed_query(query)
scores, indices = index.search(qv, k)
print("\nTop results:")
for rank, idx in enumerate(indices[0]):
chunk = chunks[idx]["text"]
print(f"\n#{rank+1} — Score: {scores[0][rank]:.3f}")
print(chunk[:500], "...")
if __name__ == "__main__":
search("movies about heartbreak and betrayal")
search("reviews describing poorly written scripts")
search("films with strong female leads")