1
+ from google .cloud import bigquery
2
+ import functions_framework
3
+ import os
4
+ from openai import OpenAI
5
+ import json
6
+
7
+ openai_client = OpenAI (api_key = os .getenv ("OPENAI_API_KEY" ))
8
+ embeddings_model = os .getenv ('EMBEDDINGS_MODEL' )
9
+ project_id = os .getenv ('PROJECT_ID' )
10
+ dataset_id = os .getenv ('DATASET_ID' )
11
+ table_id = os .getenv ('TABLE_ID' )
12
+
13
+ def generate_embeddings (text , model ):
14
+ print (f'Generating embedding for: { text } ' )
15
+ # Generate embeddings for the provided text using the specified model
16
+ embeddings_response = openai_client .embeddings .create (model = model , input = text )
17
+ # Extract the embedding data from the response
18
+ embedding = embeddings_response .data [0 ].embedding
19
+ return embedding
20
+
21
+ @functions_framework .http
22
+ def openai_docs_search (request ):
23
+ print ('received a request' )
24
+ client = bigquery .Client ()
25
+
26
+ request_json = request .get_json (silent = True )
27
+ print (request_json )
28
+
29
+ if not request_json :
30
+ return json .dumps ({"error" : "Invalid JSON in request" }), 400 , {'Content-Type' : 'application/json' }
31
+
32
+ query = request_json .get ('query' )
33
+ top_k = request_json .get ('top_k' , 3 )
34
+ category = request_json .get ('category' , '' )
35
+
36
+ if not query :
37
+ return json .dumps ({"error" : "Query parameter is required" }), 400 , {'Content-Type' : 'application/json' }
38
+
39
+ embedding_query = generate_embeddings (query , embeddings_model )
40
+ embedding_query_list = ', ' .join (map (str , embedding_query ))
41
+
42
+ sql_query = f"""
43
+ WITH search_results AS (
44
+ SELECT query.id AS query_id, base.id AS base_id, distance
45
+ FROM VECTOR_SEARCH(
46
+ TABLE `{ project_id } .{ dataset_id } .{ table_id } `, 'content_vector',
47
+ (SELECT ARRAY[{ embedding_query_list } ] AS content_vector, 'query_vector' AS id),
48
+ top_k => { top_k } , distance_type => 'COSINE', options => '{{"use_brute_force": true}}')
49
+ )
50
+ SELECT sr.query_id, sr.base_id, sr.distance, ed.text, ed.title, ed.category
51
+ FROM search_results sr
52
+ JOIN `{ project_id } .{ dataset_id } .{ table_id } ` ed ON sr.base_id = ed.id
53
+ """
54
+
55
+ if category :
56
+ sql_query += f" WHERE ed.category = '{ category } '"
57
+
58
+ sql_query += " ORDER BY sr.distance;"
59
+
60
+ query_job = client .query (sql_query ) # Make an API request.
61
+
62
+ rows = []
63
+ for row in query_job :
64
+ print (row .title )
65
+ rows .append ({
66
+ "text" : row .text ,
67
+ "title" : row .title ,
68
+ "distance" : row .distance ,
69
+ "category" : row .category
70
+ })
71
+
72
+ response = {
73
+ "items" : rows
74
+ }
75
+ print ('sending response' )
76
+ print (len (rows ))
77
+ return json .dumps (response ), 200
0 commit comments