-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAI project
More file actions
97 lines (76 loc) · 2.43 KB
/
AI project
File metadata and controls
97 lines (76 loc) · 2.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-
"""Untitled0.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1VXsDpUtbDoa8VfiJ7zf1LicPno-AB1Ku
"""
!pip install google-cloud-aiplatform
!pip install langchain
!pip install chromadb
!pip install pytube
!pip install youtube-transcript-api
!pip install gradio
from google.cloud import aiplatform
from google.colab import auth as google_auth
google_auth.authenticate_user()
import vertexai
PROJECT_ID = "fleet-reserve-406414" #enter your project id here
vertexai.init(project=PROJECT_ID)
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import VertexAI
llm = VertexAI(
model_name="text-bison@001",
max_output_tokens=256,
temperature=0.1,
top_p=0.8,
top_k=40,
verbose=True,
)
from langchain.embeddings import VertexAIEmbeddings
# Embedding
EMBEDDING_QPM = 100
EMBEDDING_NUM_BATCH =5
embeddings = VertexAIEmbeddings(
requests_per_minute=EMBEDDING_QPM,
num_instances_per_batch=EMBEDDING_NUM_BATCH,
)
loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=A8jyW_6hCGU&t=161s", add_video_info=True)
result = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=0)
docs = text_splitter.split_documents(result)
print(f"# of documents = {len(docs)}")
db = Chroma.from_documents(docs, embeddings)
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})
qa = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
def sm_ask(question, print_results=True):
video_subset = qa({"query": question})
context = video_subset
prompt = f"""
Answer the following question in a detailed manner, using information from the text below. If the answer is not in the text,say I dont know and do not generate your own response.
Question:
{question}
Text:
{context}
Question:
{question}
Answer:
"""
parameters = {
"temperature": 0.1,
"max_output_tokens": 256,
"top_p": 0.8,
"top_k": 40
}
response = llm.predict(prompt, **parameters)
return {
"answer": response
}
import gradio as gr
def get_response(input_text):
response = sm_ask(input_text)
return response
grapp = gr.Interface(fn=get_response, inputs="text", outputs="text")
grapp.launch()