forked from OIM3640/Text-Analysis-Project
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpart2_sensitivity_analysis.py
More file actions
50 lines (39 loc) · 1.85 KB
/
part2_sensitivity_analysis.py
File metadata and controls
50 lines (39 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import urllib.request
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
try:
nltk.data.find('sentiment/vader_lexicon.zip')
except LookupError:
nltk.download('vader_lexicon')
def fetch_text(url):
"""Fetch text from the given URL using urllib."""
try:
with urllib.request.urlopen(url) as f:
return f.read().decode('utf-8')
except Exception as e:
print("An error occurred while fetching the text:", e)
return ""
def analyze_sentiment(text):
"""Performs sentiment analysis on the text."""
analyzer = SentimentIntensityAnalyzer()
return analyzer.polarity_scores(text)
def main():
url = "https://www.gutenberg.org/cache/epub/11/pg11.txt"
text = fetch_text(url)
if not text:
return
sentiment = analyze_sentiment(text)
# Display sentiment analysis results
print("Sentiment Analysis:")
print(f"Positive: {sentiment['pos']:.3f}, Neutral: {sentiment['neu']:.3f}, Negative: {sentiment['neg']:.3f}, Compound: {sentiment['compound']:.3f}")
if __name__ == "__main__":
main()
# Suggestions for improvement by ChatGPT:
# Prompt: "I just finished my Python assingment, can you help me go through every file and
# make any suggestions for improvement in terms of overall functionality, organization, and style?"
# Small things to fix:
# 🎯 NLP Best Practice Ideally nltk.download('vader_lexicon') should be inside a try/except, or placed under a setup script, not inside the main script Just cleaner: avoids re-downloading every time
# 📜 Extra Tip Sentiment works better when broken into paragraphs or sentences Right now, you're analyzing the whole book at once
# Notes:
# nltk.data.find() avoids re-downloading vader_lexicon every time you run the script.
# Added a little formatting to make your output look cleaner (3 decimal places).