|
| 1 | +import csv |
| 2 | +import re |
| 3 | +import os |
| 4 | +import string |
| 5 | +import preprocessor.api as p |
| 6 | +import tweepy |
| 7 | +from nltk.corpus import stopwords |
| 8 | +from nltk.tokenize import word_tokenize |
| 9 | +import nltk |
| 10 | +# nltk.download('stopwords') |
| 11 | +# nltk.download('punkt') |
| 12 | + |
| 13 | +consumer_key = 'QVyKmk4cLDv36NNPD4E0Nq4Js' |
| 14 | +consumer_secret = 'hfJ2wBDDjNNvrGhaRVmmPzK4eUFxmDMPXta3JMlVqq5hWy49i5' |
| 15 | + |
| 16 | +access_token = '1207510958991560705-uxt72yWJBOyc70Iue0UsaG9Z4sfFiO' |
| 17 | +access_token_secret = 'iUKmWnhHqZxYuwjalwWmuUUsjEEkoPjHv8R1HkjASmGfP' |
| 18 | + |
| 19 | +# perform authentication |
| 20 | +auth = tweepy.OAuthHandler(consumer_key, consumer_secret) |
| 21 | +auth.set_access_token(access_token, access_token_secret) |
| 22 | + |
| 23 | +# Create our twitter api to access tweets from it |
| 24 | +api = tweepy.API(auth) |
| 25 | + |
| 26 | +# Happy Emoticons |
| 27 | +emoticons_happy = set([ |
| 28 | + ':-)', ':)', ';)', ':o)', ':]', ':3', ':c)', ':>', '=]', '8)', '=)', ':}', |
| 29 | + ':^)', ':-D', ':D', '8-D', '8D', 'x-D', 'xD', 'X-D', 'XD', '=-D', '=D', |
| 30 | + '=-3', '=3', ':-))', ":'-)", ":')", ':*', ':^*', '>:P', ':-P', ':P', 'X-P', |
| 31 | + 'x-p', 'xp', 'XP', ':-p', ':p', '=p', ':-b', ':b', '>:)', '>;)', '>:-)', |
| 32 | + '<3' |
| 33 | + ]) |
| 34 | + |
| 35 | +# Sad Emoticons |
| 36 | +emoticons_sad = set([ |
| 37 | + ':L', ':-/', '>:/', ':S', '>:[', ':@', ':-(', ':[', ':-||', '=L', ':<', |
| 38 | + ':-[', ':-<', '=\\', '=/', '>:(', ':(', '>.<', ":'-(", ":'(", ':\\', ':-c', |
| 39 | + ':c', ':{', '>:\\', ';(' |
| 40 | + ]) |
| 41 | + |
| 42 | +#Emoji patterns |
| 43 | +emoji_pattern = re.compile("[" |
| 44 | + u"\U0001F600-\U0001F64F" # emoticons |
| 45 | + u"\U0001F300-\U0001F5FF" # symbols & pictographs |
| 46 | + u"\U0001F680-\U0001F6FF" # transport & map symbols |
| 47 | + u"\U0001F1E0-\U0001F1FF" # flags (iOS) |
| 48 | + u"\U00002702-\U000027B0" |
| 49 | + u"\U000024C2-\U0001F251" |
| 50 | + "]+", flags=re.UNICODE) |
| 51 | + |
| 52 | +#combine sad and happy emoticons |
| 53 | +emoticons = emoticons_happy.union(emoticons_sad) |
| 54 | + |
| 55 | +#mrhod clean_tweets() |
| 56 | +def clean_tweets(tweet): |
| 57 | + stop_words = set(stopwords.words('english')) |
| 58 | + |
| 59 | + #after tweepy preprocessing the colon left remain after removing mentions |
| 60 | + #or RT sign in the beginning of the tweet |
| 61 | + tweet = re.sub(r':', '', tweet) |
| 62 | + tweet = re.sub(r'…', '', tweet) |
| 63 | + #replace consecutive non-ASCII characters with a space |
| 64 | + tweet = re.sub(r'[^\x00-\x7F]+',' ', tweet) |
| 65 | + #remove emojis from tweet |
| 66 | + tweet = emoji_pattern.sub(r'', tweet) |
| 67 | + |
| 68 | + word_tokens = word_tokenize(tweet) |
| 69 | + |
| 70 | + filtered_tweet = [] |
| 71 | + |
| 72 | + #looping through conditions |
| 73 | + for w in word_tokens: |
| 74 | + #check tokens against stop words , emoticons and punctuations |
| 75 | + if w not in stop_words and w not in emoticons and w not in string.punctuation: |
| 76 | + filtered_tweet.append(w) |
| 77 | + return ' '.join(filtered_tweet) |
| 78 | + |
| 79 | +loop = True |
| 80 | +characters = ["Caitlyn", "Kai'Sa", 'Lucian', 'Thresh', 'Yasuo', 'Lee Sin'] |
| 81 | + |
| 82 | +for character in characters: |
| 83 | + |
| 84 | + print( |
| 85 | + ''' |
| 86 | + 1. Search tweets by keywords |
| 87 | + 2. Exit |
| 88 | + ''') |
| 89 | + |
| 90 | + user_input = 1 |
| 91 | + |
| 92 | + if int(user_input) == 1: |
| 93 | + search_term = character |
| 94 | + no_of_search_items = 50 |
| 95 | + date_since = "2019-12-21" |
| 96 | + # 여기에 location을 넣으면 위도,경도 값도 받을 수 있다. |
| 97 | + public_tweets = tweepy.Cursor(api.search, |
| 98 | + q=search_term, |
| 99 | + lang="en", |
| 100 | + since=date_since).items(no_of_search_items) |
| 101 | + |
| 102 | + index = 0 |
| 103 | + # 있는경우 |
| 104 | + if os.path.isfile('./total.csv'): |
| 105 | + my_csv_file = open('total.csv', 'r+') |
| 106 | + name = search_term + '.csv' |
| 107 | + file_name = open(name, 'w') |
| 108 | + reader = csv.DictReader(my_csv_file) |
| 109 | + field_names = ['Index', 'Keyword', 'Tweets'] |
| 110 | + for each_row in reader: |
| 111 | + try: |
| 112 | + if search_term == each_row['Keyword']: |
| 113 | + index += 1 |
| 114 | + except: |
| 115 | + break |
| 116 | + |
| 117 | + writer = csv.DictWriter(my_csv_file, fieldnames=field_names) |
| 118 | + writer2 = csv.DictWriter(file_name, fieldnames=field_names) |
| 119 | + writer2.writeheader() |
| 120 | + # 없는경우 |
| 121 | + else: |
| 122 | + my_csv_file = open('total.csv', 'w') |
| 123 | + name = search_term + '.csv' |
| 124 | + file_name = open(name, 'w') |
| 125 | + field_names = ['Index', 'Keyword', 'Tweets'] |
| 126 | + writer = csv.DictWriter(my_csv_file, fieldnames=field_names) |
| 127 | + writer2 = csv.DictWriter(file_name, fieldnames=field_names) |
| 128 | + writer.writeheader() |
| 129 | + writer2.writeheader() |
| 130 | + |
| 131 | + for each_tweet in public_tweets: |
| 132 | + data = p.clean(each_tweet.text) |
| 133 | + data = clean_tweets(data) |
| 134 | + data = data.encode('utf-8') |
| 135 | + data = data.decode('unicode_escape') |
| 136 | + writer.writerow({'Index': index, 'Keyword': search_term, \ |
| 137 | + 'Tweets': data}) |
| 138 | + writer2.writerow({'Index': index, 'Keyword': search_term, \ |
| 139 | + 'Tweets': data}) |
| 140 | + index += 1 |
| 141 | + |
| 142 | + elif int(user_input) == 2: |
| 143 | + loop = False |
| 144 | + else: |
| 145 | + print('Please enter 1 or 2') |
0 commit comments