forked from OIM3640/Text-Analysis-Project
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapi.py
More file actions
104 lines (86 loc) · 3.17 KB
/
api.py
File metadata and controls
104 lines (86 loc) · 3.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
# load .env and get key
load_dotenv()
if "OPENAI_API_KEY" not in os.environ:
raise RuntimeError("Please set OPENAI_API_KEY in a .env file or as an environment variable.")
client = OpenAI()
# stopwords tailored to Iliad / Odyssey style text
STOPWORDS = {
# core English
"the", "and", "to", "of", "a", "in", "that", "it", "is", "was", "for",
"on", "with", "as", "by", "this", "but", "from", "or", "not", "are",
"at", "be", "an", "which", "so", "we", "were", "have", "has", "had",
# pronouns
"i", "you", "he", "she", "they", "them", "their", "theirs", "his",
"her", "hers", "him", "me", "my", "mine", "our", "ours", "us", "your",
"yours", "who", "whom", "whose",
# connectors / often-seen words
"what", "when", "where", "why", "how", "then", "now", "there", "here",
"thus", "all", "one",
# helper verbs
"shall", "will", "would", "may", "might", "can", "could", "must", "should",
# archaic
"thy", "thou", "thee", "ye", "o", "unto", "doth", "hath",
# dialogue
"said", "say", "says",
# iterated words from testing
"no", "nor", "more", "these", "some", "yet", "o'er", "come", "own", "into", "if", "went", "been", "up",
"out", "do", "about", "one", "man", "us", "went", "go", "upon", "men", "did", "tell", "see", "any", "made",
"other", "good", "much", "back",
# stray unicode
"“", "”", "—"
}
def extract_entities_with_openai(text):
"""
Call OpenAI to get characters / places / gods from a passage.
"""
prompt = (
"You will be given a passage from Homer (Iliad or Odyssey). "
"Extract the names of characters, places, and gods that are explicitly mentioned. "
"Return ONLY valid JSON like this:\n"
"{\n"
' \"characters\": [\"Achilles\"],\n'
' \"places\": [\"Troy\"],\n'
' \"gods\": [\"Athena\"]\n'
"}\n"
"If none are found, return empty lists.\n\n"
"Passage:\n" + text
)
resp = client.chat.completions.create(
model="gpt-5-nano",
messages=[
{"role": "system", "content": "You extract entities from classical literature and return strict JSON."},
{"role": "user", "content": prompt}
],
temperature=1
)
content = resp.choices[0].message.content
try:
data = json.loads(content)
except json.JSONDecodeError:
data = {"characters": [], "places": [], "gods": []}
return data
def tally_entities(all_passage_entities):
"""
Combine entity dicts into frequency dicts.
"""
char_counts = {}
place_counts = {}
god_counts = {}
for ent in all_passage_entities:
for c in ent.get("characters", []):
c = c.strip()
if c:
char_counts[c] = char_counts.get(c, 0) + 1
for p in ent.get("places", []):
p = p.strip()
if p:
place_counts[p] = place_counts.get(p, 0) + 1
for g in ent.get("gods", []):
g = g.strip()
if g:
god_counts[g] = god_counts.get(g, 0) + 1
return char_counts, place_counts, god_counts