Skip to content

Commit 738ec97

Browse files
committed
Update
1 parent 6e8d901 commit 738ec97

18 files changed

+782
-17
lines changed
1.58 KB
Binary file not shown.

Democehk/langchain.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from dotenv import load_dotenv
2+
import streamlit as st
3+
from PyPDF2 import PdfReader
4+
from langchain.text_splitter import CharacterTextSplitter
5+
from langchain.embeddings.openai import OpenAIEmbeddings
6+
from langchain.vectorstores import FAISS
7+
from langchain.chains.question_answering import load_qa_chain
8+
from langchain.llms import OpenAI
9+
from langchain.callbacks import get_openai_callback
10+
11+
12+
def main():
13+
load_dotenv()
14+
st.set_page_config(page_title="Ask your PDF")
15+
st.header("Ask your PDF 💬")
16+
17+
# upload file
18+
pdf = st.file_uploader("Upload your PDF", type="pdf")
19+
20+
# extract the text
21+
if pdf is not None:
22+
pdf_reader = PdfReader(pdf)
23+
text = ""
24+
for page in pdf_reader.pages:
25+
text += page.extract_text()
26+
27+
# split into chunks
28+
text_splitter = CharacterTextSplitter(
29+
separator="\n",
30+
chunk_size=1000,
31+
chunk_overlap=200,
32+
length_function=len
33+
)
34+
chunks = text_splitter.split_text(text)
35+
36+
# create embeddings
37+
embeddings = OpenAIEmbeddings()
38+
knowledge_base = FAISS.from_texts(chunks, embeddings)
39+
40+
# show user input
41+
user_question = st.text_input("Ask a question about your PDF:")
42+
if user_question:
43+
docs = knowledge_base.similarity_search(user_question)
44+
45+
llm = OpenAI()
46+
chain = load_qa_chain(llm, chain_type="stuff")
47+
with get_openai_callback() as cb:
48+
response = chain.run(input_documents=docs, question=user_question)
49+
print(cb)
50+
51+
st.write(response)
52+
53+
if __name__ == '__main__':
54+
main()

Democehk/requirements.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pip install google-cloud-aiplatform --upgrade
2+
pip install langchain --upgrade
3+
pip install bs4 docarray tiktoken streamlit
4+
pip install faiss-cpu
5+
pip install pypdf

Langchain/pdf test.ipynb

Whitespace-only changes.

Langchain/pdf_analysis.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# from langchain.document_loaders import PyPDFLoader
2+
# from langchain.vectorstores import FAISS
3+
# from langchain.embeddings.openai import OpenAIEmbeddings
4+
5+
# loader = PyPDFLoader("samplepdf.pdf")
6+
# pages = loader.load_and_split()
7+
8+
# # Set your OpenAI API key here
9+
# openai_api_key = "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"
10+
11+
# faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings(openai_api_key=openai_api_key))
12+
# docs = faiss_index.similarity_search("How will the community be engaged?", k=2)
13+
# for doc in docs:
14+
# print(str(doc.metadata["page"]) + ":", doc.page_content[:300])
15+
16+
from langchain.text_splitter import CharacterTextSplitter
17+
from langchain.embeddings.openai import OpenAIEmbeddings
18+
from langchain.vectorstores import FAISS
19+
from langchain.chains.question_answering import load_qa_chain
20+
from langchain.llms import OpenAI
21+
from langchain.callbacks import get_openai_callback
22+
23+
24+
25+
def main():
26+
load_dotenv()
27+
st.set_page_config(page_title="Ask your PDF")
28+
st.header("Ask your PDF 💬")
29+
30+
# upload file
31+
pdf = st.file_uploader("Upload your PDF", type="pdf")
32+
33+
# extract the text
34+
if pdf is not None:
35+
pdf_reader = PdfReader(pdf)
36+
text = ""
37+
for page in pdf_reader.pages:
38+
text += page.extract_text()
39+
40+
# split into chunks
41+
text_splitter = CharacterTextSplitter(
42+
separator="\n",
43+
chunk_size=1000,
44+
chunk_overlap=200,
45+
length_function=len
46+
)
47+
chunks = text_splitter.split_text(text)
48+
49+
# create embeddings
50+
openai_api_key = "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"
51+
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
52+
knowledge_base = FAISS.from_texts(chunks, embeddings)
53+
54+
# show user input
55+
user_question = st.text_input("Ask a question about your PDF:")
56+
if user_question:
57+
docs = knowledge_base.similarity_search(user_question)
58+
59+
llm = OpenAI()
60+
chain = load_qa_chain(llm, chain_type="stuff")
61+
with get_openai_callback() as cb:
62+
response = chain.run(input_documents=docs, question=user_question)
63+
print(cb)
64+
65+
st.write(response)
66+
67+
68+
if __name__ == '__main__':
69+
main()

Langchain/samplepdf.pdf

445 KB
Binary file not shown.
0 Bytes
Binary file not shown.
Binary file not shown.
1.1 KB
Binary file not shown.

TestDemo-2/ai_chat.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
import os
6363
from pprint import pprint
6464

65-
6665
# this is a key file for a service account, which only has the role "Vertex AI User"
6766
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'my_credentials.json'
6867

0 commit comments

Comments
 (0)