From 9294c766674765e5dd8b277129fc4bf61ceeeb01 Mon Sep 17 00:00:00 2001
From: "Saurabh S. Chaturvedi" <saurabh.chaturvedi63@gmail.com>
Date: Thu, 7 Sep 2017 23:48:33 +0530
Subject: [PATCH] Effort to add news links

---
 keywords.txt      |  1 -
 runtime.txt       |  2 +-
 useful_twitter.py | 50 +++++++++++++++++++++++------------------------
 3 files changed, 26 insertions(+), 27 deletions(-)
diff --git a/keywords.txt b/keywords.txt
index 825349a..c6a1b17 100644
--- a/keywords.txt
+++ b/keywords.txt
@@ -16,7 +16,6 @@ Tkinter
 scala
 einstein
 feynman
-deep learning
 multithreading
 parallel computing
 apache framework
diff --git a/runtime.txt b/runtime.txt
index fc81e75..cfa5aa5 100644
--- a/runtime.txt
+++ b/runtime.txt
@@ -1 +1 @@
-python-3.6.2
\ No newline at end of file
+python-3.6.2
diff --git a/useful_twitter.py b/useful_twitter.py
index 6d44e00..9cff43a 100644
--- a/useful_twitter.py
+++ b/useful_twitter.py
@@ -2,6 +2,8 @@
 Script defining my Twitter bot, using sixohsix's Python wrapper for the
 Twitter API.
 """
+# Employ machine learning - follow only those people who follow back, and unfollow only
+# those people who don't unfollow back!
 # Instead of searching tweets and then doing actions on them, why not try
 # streaming interesting tweets in realtime and then performing actions on them?
 
@@ -27,6 +29,8 @@
     flags=re.IGNORECASE) #Copyright (c) 2013-2016 Molly White
     #Above offensive compilation is not my stuff
 
+news_block_expr = re.compile(r'(?s)<a class="story-link".*?href="(.*?)".*?>.*?<h2.*?>(.*?)</h2>.*?</a>')
+latest_expr = re.compile(r'(?s)<ol class="story-menu theme-stream initial-set">(.*)</ol>')
 
 try:
     oauth = OAuth(
@@ -131,17 +135,22 @@ def print_tweet(tweet):
         hashtags.append(h["text"])
     print(hashtags)
 
-def find_news():
+def find_news():  # I'm adventuring with regular expressions for parsing!
     nyTech = requests.get('https://nytimes.com/section/technology')
-    latest_patt = r'(?s)<ol class="story-menu theme-stream initial-set">(.*)</ol>'
-    latest = re.search(latest_patt, nyTech.text)
-    news = re.findall(r'(?s)<h2.*?>(.*?)</h2>', latest.group(1))
-    news = [item.strip() for item in list(set(news))]
-    for i in range(len(news)):
-        item = news[i]
-        if item.startswith('Daily Report: '):
-            news[i] = item[14:]
-    tv = requests.get('https://theverge.com', headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Cafari/537.36'})
+    
+    latest = latest_expr.search(nyTech.text)
+    #news = re.findall(r'(?s)<h2.*?>(.*?)</h2>', latest.group(1))
+    news_blocks = news_block_expr.findall(latest.group(1))
+    #news_links = re.findall(r'(?s)<a class="story-link" href=".*">', latest.group(1))
+    #news = [item.strip() for item in list(set(news))]
+    news = []
+    for i in range(len(news_blocks)):
+        item = news_blocks[i][1].strip() + ' ' + news_blocks[i][0]
+        if item[1].startswith('Daily Report: '):
+            item = item[14:]
+        news.append(item)
+    
+    '''tv = requests.get('https://theverge.com', headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Cafari/537.36'})
     feed_patt = r'(?s)<div class="c-compact-river">(.*?)<div class="l-col__sidebar"'
     bunches = re.findall(feed_patt, tv.text)
     verge_news = []
@@ -150,13 +159,14 @@ def find_news():
         verge_news.extend(snippets)
     for item in verge_news:
         news.append(parser.unescape(item))
-    random.shuffle(news) #to bring a feel of randomness
+    random.shuffle(news) #to bring a feel of randomness'''
     return news
 
-
+for item in find_news():
+    print(item)
 #confused stuff happened during the initialization at Heroku on Saturday, 4 Feb, 2017: around 2 pm.
 #see the confused_stuff snap.
-#By the way, confused stuff happens in the middle as well.
+#By the way, confused stuff still happens sometimes.
 class AccountThread(threading.Thread):
     def __init__(self, handler):
         self.t = handler
@@ -188,7 +198,7 @@ def run(self):
             if len(fr) > 4990: #To unfollow old follows because Twitter doesn't allow a large following / followers ratio for people with less followers.
                             #Using 4990 instead of 5000 for 'safety', so that I'm able to follow some interesting people
                             #manually even after a bot crash.
-                for i in range(2500): #probably this is the upper limit of mass unfollow in one go
+                for i in range(2500):  # Probably this is the upper limit of mass unfollow in one go, so only 1000 are unfollowed.
                     unfollow(fr.pop())
 
             for tweet in tweets:
@@ -199,21 +209,11 @@ def run(self):
                         print()
                         print("Heart =", fav_tweet(tweet))
                         print("Retweet =", retweet(tweet))
-                        #prev_follow = tweet["user"]["following"]
                         self.t.friendships.create(_id=tweet["user"]["id"])
-                        #now_follow = t.users.lookup(user_id=tweet["user"]["id"])[0]["following"]
-                        #if prev_follow==0 and now_follow==1:
-                        #    time.sleep(11)
-                        #    unfollow(fr.pop())
                         if "retweeted_status" in tweet:
                             op = tweet["retweeted_status"]["user"]
-                            #prev_follow_o = op["following"]
-                            #time.sleep(11)
                             self.t.friendships.create(_id=op["id"])
-                            #now_follow_o = t.users.lookup(user_id=op["id"])[0]["following"]
-                            #if prev_follow_o==0 and now_follow_o==1:
-                            #    time.sleep(11)
-                            #    unfollow(fr.pop())
+                            
                         print()
 
                         if not news: