Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
  • Loading branch information
cboettig committed Dec 19, 2024
1 parent a255d8d commit b4b03d4
Show file tree
Hide file tree
Showing 9 changed files with 357 additions and 2 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: Sync to Hugging Face hub
on:
push:
branches: [main]

# to run this workflow manually from the Actions tab
workflow_dispatch:

jobs:
sync-to-hub:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
lfs: true
- name: Push to hub
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: git push https://cboettig:[email protected]/spaces/${GITHUB_ACTOR}/streamlit main
11 changes: 11 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM gitlab-registry.nrp-nautilus.io/cboettig/images
WORKDIR /app

COPY . .

# huggingface uses port 7860 by default
CMD streamlit run app.py \
--server.address 0.0.0.0 \
--server.port 7860 \
--server.headless true \
--server.fileWatcherType none
23 changes: 23 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
BASE="nature.datahub.berkeley.edu"
MAKEFLAGS += s

.PHONY: serve
serve:
@echo "\n 🌎 preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
streamlit run app.py --server.port 8501 1> /dev/null 2>&1


.PHONY: chat
chat:
@echo "\n 🌎 preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
streamlit run chat.py --server.port 8501 1> /dev/null 2>&1

.PHONY: rag
rag:
@echo "\n 🌎 preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
streamlit run rag.py --server.port 8501 1> /dev/null 2>&1

.PHONY: sql
sql:
@echo "\n 🌎 preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
streamlit run sql.py --server.port 8501 1> /dev/null 2>&1
21 changes: 19 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,19 @@
# app-python-template
Creating Web applications using Streamlit & LLM Agents
---
title: Streamlit Demo
emoji: 🌍
colorFrom: yellow
colorTo: indigo
sdk: docker
pinned: false
license: bsd-2-clause
---

For ESPM-157 students using <https://nature.datahub.berkeley.edu> servers:
To preview locally, use

```
make
```

and click the link provided

42 changes: 42 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import streamlit as st

st.set_page_config(
page_title="Streamlit demos",
)

st.sidebar.success("Select a demo above.")

st.title("Exploring LLM Agent Use")

'''
Select any of the demos on the sidebar. Each illustrates a different way we can incorporate an LLM tool to perform reliable data retrieval (sometimes called retrieval augmented generation, RAG) from specified data resources.
In this module, you will be adapt one or more of these agents into an interactive application exploring the redlining data we encountered in Module 3 (as seen below).
'''

import streamlit as st
import leafmap.maplibregl as leafmap
import ibis
from ibis import _
con = ibis.duckdb.connect()


# fixme could create drop-down selection of the 300 cities
city_name = st.text_input("Select a city", "Oakland")

# Extract the specified city
city = (con
.read_geo("/vsicurl/https://dsl.richmond.edu/panorama/redlining/static/mappinginequality.gpkg")
.filter(_.city == city_name, _.residential)
.execute()
)

# Render the map
m = leafmap.Map(style="positron")
if city_name == "Oakland":
m.add_cog_layer("https://espm-157-f24.github.io/spatial-carl-amanda-tyler/ndvi.tif", name="ndvi", palette = "greens")
m.add_gdf(city, "fill", paint = {"fill-color": ["get", "fill"], "fill-opacity": 0.8})
m.add_layer_control()
m.to_streamlit()

54 changes: 54 additions & 0 deletions pages/chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import streamlit as st
from openai import OpenAI

st.title("Chat Demo")

'''
This application presents a traditional chat interface to a range of open source or open weights models running on the National Research Platform (<https://nrp.ai>). Unlike the other two demos, this pattern does not use specified data resources.
'''


with st.sidebar:
model = st.radio("Select an LLM:", ['olmo', 'gemma2', 'phi3', 'llama3', 'embed-mistral', 'mixtral', 'gorilla', 'groq-tools', 'llava'])
st.session_state["model"] = model

## dockerized streamlit app wants to read from os.getenv(), otherwise use st.secrets
import os
api_key = os.getenv("LITELLM_KEY")
if api_key is None:
api_key = st.secrets["LITELLM_KEY"]


if "messages" not in st.session_state:
st.session_state.messages = []

for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])

client = OpenAI(
api_key = api_key,
base_url = "https://llm.nrp-nautilus.io"
)

# Button to clear session state
if st.button('Clear History'):
st.session_state.clear()

if prompt := st.chat_input("What is up?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)

with st.chat_message("assistant"):
stream = client.chat.completions.create(
model=st.session_state["model"],
messages=[
{"role": m["role"], "content": m["content"]}
for m in st.session_state.messages
],
stream=True,
)
response = st.write_stream(stream)
st.session_state.messages.append({"role": "assistant", "content": response})
105 changes: 105 additions & 0 deletions pages/rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader

## dockerized streamlit app wants to read from os.getenv(), otherwise use st.secrets
import os
api_key = os.getenv("LITELLM_KEY")
if api_key is None:
api_key = st.secrets["LITELLM_KEY"]


st.title("RAG Demo")


'''
This demonstration combines an LLM trained specifically text embedding (`embed-mistral` in our case) with a traditional "instruct" based LLM (`llama3`) to create a retreival augmented generation (RAG) interface to a provided PDF document. We can query the model to return relatively precise citations to the matched text in the PDF document to verify the queries.
Provide a URL to a PDF document you want to ask questions about.
Once the document has been uploaded and parsed, ask your questions in the chat dialog that will appear below. The default example comes from a recent report on California's initiative for biodiversity conservation.
'''

# Create a file uploader?
# st.sidebar.file_uploader("Choose a PDF file", type=["pdf"])
url = st.text_input("PDF URL", "https://www.resources.ca.gov/-/media/CNRA-Website/Files/2024_30x30_Pathways_Progress_Report.pdf")

# +
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


@st.cache_data
def pdf_loader(url):
loader = PyPDFLoader(url)
return loader.load()

docs = pdf_loader(url)


# Set up the language model
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model = "llama3", api_key = api_key, base_url = "https://llm.nrp-nautilus.io", temperature=0)

# Set up the embedding model
from langchain_openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(
model = "embed-mistral",
api_key = api_key,
base_url = "https://llm.nrp-nautilus.io"
)

# Build a retrival agent
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = InMemoryVectorStore.from_documents(documents=splits, embedding=embedding)
retriever = vectorstore.as_retriever()

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
system_prompt = (
"You are an assistant for question-answering tasks. "
"Use the following pieces of retrieved context to answer "
"the question. If you don't know the answer, say that you "
"don't know. Use three sentences maximum and keep the "
"answer concise."
"\n\n"
"{context}"
)
prompt = ChatPromptTemplate.from_messages(
[
("system", system_prompt),
("human", "{input}"),
]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


# Place agent inside a streamlit application:

if prompt := st.chat_input("What is the goal of CA 30x30?"):
with st.chat_message("user"):
st.markdown(prompt)

with st.chat_message("assistant"):
results = rag_chain.invoke({"input": prompt})
st.write(results['answer'])

with st.expander("See context matched"):
st.write(results['context'][0].page_content)
st.write(results['context'][0].metadata)


# adapt for memory / multi-question interaction with:
# https://python.langchain.com/docs/tutorials/qa_chat_history/

# Also see structured outputs.
82 changes: 82 additions & 0 deletions pages/sql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import streamlit as st

st.title("SQL demo")


'''
This demonstration illustrates building an LLM-based agent that performs tasks by generating and executing code based on plain-text queries. In this example, we use a custom system prompt to instruct the LLM to generate SQL code which is then executed against the parquet data we generated in Module 3. Note that SQL query itself is shown as well as the table produced by the query.
'''

## dockerized streamlit app wants to read from os.getenv(), otherwise use st.secrets
import os
api_key = os.getenv("LITELLM_KEY")
if api_key is None:
api_key = st.secrets["LITELLM_KEY"]


parquet = st.text_input("parquet file:", "https://espm-157-f24.github.io/spatial-carl-amanda-tyler/new_haven_stats.parquet")

# create sharable low-level connection, see: https://github.com/Mause/duckdb_engine
import sqlalchemy
eng = sqlalchemy.create_engine("duckdb:///:memory:")

# ibis can talk to this connection and create the VIEW
import ibis
from ibis import _
con = ibis.duckdb.from_connection(eng.raw_connection())
tbl = con.read_parquet(parquet, "mydata")

# langchain can also talk to this connection and see the table:
from langchain_community.utilities import SQLDatabase
db = SQLDatabase(eng, view_support=True)

#db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
#print(db.get_usable_table_names())

# Build the template for system prompt
template = '''
You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
Always return all columns from a query (select *) unless otherwise instructed.
Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below.
Be careful to not query for columns that do not exist.
Also, pay attention to which column is in which table.
Pay attention to use today() function to get the current date, if the question involves "today".
Respond with only the SQL query to run. Do not repeat the question or explanation. Just the raw SQL query.
Only use the following tables:
{table_info}
Question: {input}
'''

with st.sidebar:
model = st.selectbox("LLM:", ["gorilla", "llama3", "olmo"])


from langchain_core.prompts import PromptTemplate
prompt = PromptTemplate.from_template(template, partial_variables = {"dialect": "duckdb", "top_k": 10})

# Now we are ready to create our model and start querying!
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gorilla", # Try: llama3, gorilla, or groq-tools, or other models
temperature=0,
api_key=api_key,
base_url = "https://llm.nrp-nautilus.io")


from langchain.chains import create_sql_query_chain
chain = create_sql_query_chain(llm, db, prompt)

prompt = st.chat_input("What is the mean ndvi by grade?")

if prompt:
response = chain.invoke({"question": prompt})
with st.chat_message("ai"):
st.write(response)
df = tbl.sql(response).head(10).execute()
df




1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
streamlit

0 comments on commit b4b03d4

Please sign in to comment.