Skip to content

Commit 872a322

Browse files
Adding GCP Bigquery vector search with ChatGPT cookbook (#1344)
Co-authored-by: Aaron Wilkowitz <[email protected]>
1 parent 3f85e94 commit 872a322

File tree

4 files changed

+1037
-0
lines changed

4 files changed

+1037
-0
lines changed

examples/chatgpt/rag-quickstart/gcp/Getting_started_with_bigquery_vector_search_and_openai.ipynb

Lines changed: 945 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
from google.cloud import bigquery
2+
import functions_framework
3+
import os
4+
from openai import OpenAI
5+
import json
6+
7+
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
8+
embeddings_model = os.getenv('EMBEDDINGS_MODEL')
9+
project_id = os.getenv('PROJECT_ID')
10+
dataset_id = os.getenv('DATASET_ID')
11+
table_id = os.getenv('TABLE_ID')
12+
13+
def generate_embeddings(text, model):
14+
print(f'Generating embedding for: {text}')
15+
# Generate embeddings for the provided text using the specified model
16+
embeddings_response = openai_client.embeddings.create(model=model, input=text)
17+
# Extract the embedding data from the response
18+
embedding = embeddings_response.data[0].embedding
19+
return embedding
20+
21+
@functions_framework.http
22+
def openai_docs_search(request):
23+
print('received a request')
24+
client = bigquery.Client()
25+
26+
request_json = request.get_json(silent=True)
27+
print(request_json)
28+
29+
if not request_json:
30+
return json.dumps({"error": "Invalid JSON in request"}), 400, {'Content-Type': 'application/json'}
31+
32+
query = request_json.get('query')
33+
top_k = request_json.get('top_k', 3)
34+
category = request_json.get('category', '')
35+
36+
if not query:
37+
return json.dumps({"error": "Query parameter is required"}), 400, {'Content-Type': 'application/json'}
38+
39+
embedding_query = generate_embeddings(query, embeddings_model)
40+
embedding_query_list = ', '.join(map(str, embedding_query))
41+
42+
sql_query = f"""
43+
WITH search_results AS (
44+
SELECT query.id AS query_id, base.id AS base_id, distance
45+
FROM VECTOR_SEARCH(
46+
TABLE `{project_id}.{dataset_id}.{table_id}`, 'content_vector',
47+
(SELECT ARRAY[{embedding_query_list}] AS content_vector, 'query_vector' AS id),
48+
top_k => {top_k}, distance_type => 'COSINE', options => '{{"use_brute_force": true}}')
49+
)
50+
SELECT sr.query_id, sr.base_id, sr.distance, ed.text, ed.title, ed.category
51+
FROM search_results sr
52+
JOIN `{project_id}.{dataset_id}.{table_id}` ed ON sr.base_id = ed.id
53+
"""
54+
55+
if category:
56+
sql_query += f" WHERE ed.category = '{category}'"
57+
58+
sql_query += " ORDER BY sr.distance;"
59+
60+
query_job = client.query(sql_query) # Make an API request.
61+
62+
rows = []
63+
for row in query_job:
64+
print(row.title)
65+
rows.append({
66+
"text": row.text,
67+
"title": row.title,
68+
"distance": row.distance,
69+
"category": row.category
70+
})
71+
72+
response = {
73+
"items": rows
74+
}
75+
print('sending response')
76+
print(len(rows))
77+
return json.dumps(response), 200
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
google-cloud-bigquery
2+
functions-framework
3+
openai

registry.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,6 +1457,18 @@
14571457
- gpt-actions-library
14581458
- chatgpt
14591459

1460+
- title: GCP BigQuery Vector Search with GCP Functions and GPT Actions in ChatGPT
1461+
path: examples/chatgpt/rag-quickstart/gcp/Getting_started_with_bigquery_vector_search_and_openai.ipynb
1462+
date: 2024-08-02
1463+
authors:
1464+
- pap-openai
1465+
- maxreid-openai
1466+
tags:
1467+
- embeddings
1468+
- chatgpt
1469+
- tiktoken
1470+
- completions
1471+
14601472
- title: GPT Actions library - Zapier
14611473
path: examples/chatgpt/gpt_actions_library/gpt_action_zapier.ipynb
14621474
date: 2024-08-05

0 commit comments

Comments
 (0)