Skip to content

Commit 8a90f79

Browse files
committed
material
1 parent 7395ef7 commit 8a90f79

File tree

6 files changed

+6110
-0
lines changed

6 files changed

+6110
-0
lines changed

01-intro.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
# /// script
2+
# requires-python = ">=3.12"
3+
# dependencies = [
4+
# "llm==0.24.2",
5+
# "llm-anthropic==0.15.1",
6+
# "marimo",
7+
# "mohtml==0.1.7",
8+
# "pydantic==2.11.3",
9+
# "python-dotenv==1.1.0",
10+
# ]
11+
# ///
12+
13+
import marimo
14+
15+
__generated_with = "0.13.6"
16+
app = marimo.App(width="medium")
17+
18+
19+
@app.cell
20+
def _():
21+
import marimo as mo
22+
return (mo,)
23+
24+
25+
@app.cell
26+
def _():
27+
import llm
28+
from dotenv import load_dotenv
29+
30+
load_dotenv(".env")
31+
return (llm,)
32+
33+
34+
@app.cell
35+
def _(llm):
36+
model = llm.get_model("gpt-4o-mini")
37+
38+
resp = model.prompt("Write me a haiku about Python")
39+
return model, resp
40+
41+
42+
@app.cell
43+
def _(resp):
44+
resp.json()
45+
return
46+
47+
48+
@app.cell
49+
def _(model):
50+
from pydantic import BaseModel
51+
52+
class Haiku(BaseModel):
53+
poem: str
54+
55+
class Haikus(BaseModel):
56+
topic: str
57+
haikus: list[Haiku]
58+
59+
out = model.prompt("Haiku about Python", schema=Haikus)
60+
return BaseModel, out
61+
62+
63+
@app.cell
64+
def _(out):
65+
import json
66+
67+
json.loads(out.json()["content"])
68+
return (json,)
69+
70+
71+
@app.cell
72+
def _(model):
73+
convo = model.conversation()
74+
75+
_ = convo.prompt("Give me a haiku about Python")
76+
print(_.text())
77+
print("\n")
78+
_ = convo.prompt("Give me another one about Snakes")
79+
print(_.text())
80+
return
81+
82+
83+
@app.cell
84+
def _(mo, model):
85+
conversation = model.conversation()
86+
chat_widget = mo.ui.chat(lambda messages: conversation.prompt(messages[-1].content))
87+
chat_widget
88+
return
89+
90+
91+
@app.cell
92+
def _(BaseModel, json, mo, model):
93+
class Summary(BaseModel):
94+
title: str
95+
summary: str
96+
pros: list[str]
97+
cons: list[str]
98+
99+
def summary(text_in):
100+
resp = model.prompt(
101+
f"Make a summary of the following text: {text_in}",
102+
schema=Summary)
103+
return json.loads(resp.json()["content"])
104+
105+
text_widget = mo.ui.text_area(
106+
label="Input to summary function"
107+
).form()
108+
109+
text_widget
110+
return summary, text_widget
111+
112+
113+
@app.cell
114+
def _(summary, text_widget):
115+
from pprint import pprint
116+
117+
pprint(summary(text_widget.value))
118+
return
119+
120+
121+
@app.cell
122+
def _():
123+
return
124+
125+
126+
if __name__ == "__main__":
127+
app.run()

02-score.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import marimo
2+
3+
__generated_with = "0.13.6"
4+
app = marimo.App(width="medium")
5+
6+
7+
@app.cell
8+
def _():
9+
import marimo as mo
10+
import polars as pl
11+
import llm
12+
from dotenv import load_dotenv
13+
14+
load_dotenv(".env")
15+
return llm, mo, pl
16+
17+
18+
@app.cell
19+
def _(pl):
20+
df = pl.read_csv("spam.csv")
21+
df.head(200).group_by("label").len()
22+
return (df,)
23+
24+
25+
@app.cell
26+
async def _():
27+
import asyncio
28+
from mosync import async_map_with_retry
29+
30+
31+
async def delayed_double(x):
32+
await asyncio.sleep(1)
33+
return x * 2
34+
35+
results = await async_map_with_retry(
36+
range(100),
37+
delayed_double,
38+
max_concurrency=10,
39+
description="Showing a simple demo"
40+
)
41+
return (async_map_with_retry,)
42+
43+
44+
@app.cell
45+
def _(llm):
46+
for model in llm.get_async_models():
47+
print(model.model_id)
48+
return
49+
50+
51+
@app.cell
52+
def _(llm):
53+
from diskcache import Cache
54+
55+
cache = Cache("accuracy-experiment")
56+
57+
models = {
58+
"gpt-4": llm.get_async_model("gpt-4"),
59+
"gpt-4o": llm.get_async_model("gpt-4o"),
60+
}
61+
62+
63+
prompt = "is this spam or ham? only reply with spam or ham"
64+
mod = "gpt-4o"
65+
66+
async def classify(text, prompt=prompt, model=mod):
67+
tup = (text, prompt, model)
68+
if tup in cache:
69+
return cache[tup]
70+
resp = await models[model].prompt(prompt + "\n" + text).json()
71+
cache[tup] = resp
72+
return resp
73+
return classify, prompt
74+
75+
76+
@app.cell
77+
async def _(classify):
78+
await classify("hello there")
79+
return
80+
81+
82+
@app.cell
83+
async def _(async_map_with_retry, classify, df):
84+
n_eval = 200
85+
86+
llm_results = await async_map_with_retry(
87+
[_["text"] for _ in df.head(n_eval).to_dicts()],
88+
classify,
89+
max_concurrency=3,
90+
description="Running LLM experiments"
91+
)
92+
return llm_results, n_eval
93+
94+
95+
@app.cell
96+
def _(df, llm_results, mo, n_eval, pl, prompt):
97+
n_correct = pl.DataFrame({**d, "pred": p} for d, p in zip(
98+
df.head(200).to_dicts(),
99+
[i.result["content"] for i in llm_results]
100+
)).filter(pl.col("label") == pl.col("pred")).shape[0]
101+
102+
mo.md(f"""
103+
### Prompt:
104+
```
105+
{prompt}
106+
```
107+
The accuracy is {n_correct}/{n_eval} = {n_correct/n_eval*100:.1f}%
108+
""")
109+
return
110+
111+
112+
@app.cell
113+
def _(mo):
114+
mo.md(
115+
r"""
116+
Let's jot down some summaries.
117+
118+
- "is this spam or ham? only reply with spam or ham" / `gpt-4` `67.0%`
119+
- "is this spam or ham? only reply with spam or ham" / `gpt-4o` `67.5%`
120+
- "sometimes we need to deal with spammy text messages, that often promise free/cheap good. is this spam or ham? only reply with spam or ham" / `gpt-4` `66.5%`
121+
- "sometimes we need to deal with spammy text messages, that often promise free/cheap good. is this spam or ham? only reply with spam or ham" / `gpt-4o` `72.5%`
122+
"""
123+
)
124+
return
125+
126+
127+
@app.cell
128+
def _(df):
129+
df
130+
return
131+
132+
133+
@app.cell
134+
def _(mo):
135+
mo.md("""Running this experiment cost me about $2. In fairness: I had to rerun a few things a few times. But at the same time: that's pretty darn expensive for 6 variants on just 200 examples! Especially when you consider you could also build a spaCy/scikit-learn pipeline for this task.""")
136+
return
137+
138+
139+
@app.cell
140+
def _(df):
141+
import numpy as np
142+
from sklearn.pipeline import make_pipeline
143+
from sklearn.linear_model import LogisticRegression
144+
from sklearn.feature_extraction.text import CountVectorizer
145+
146+
df_valid, df_train = df.head(200), df.tail(200)
147+
text_valid = df_valid["text"].to_list()
148+
text_train = df_train["text"].to_list()
149+
y_valid = df_valid["label"].to_list()
150+
y_train = df_train["label"].to_list()
151+
152+
pipe = make_pipeline(CountVectorizer(), LogisticRegression())
153+
154+
# It's pretty dang accurate
155+
preds = pipe.fit(text_train, y_train).predict(text_valid)
156+
np.mean(preds == np.array(y_valid))
157+
return
158+
159+
160+
if __name__ == "__main__":
161+
app.run()

0 commit comments

Comments
 (0)