diff --git a/README.md b/README.md index 91844b7..ff23c67 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,24 @@ pip install pillow Executing ========= -Execute `Gonewild.py` in the `./py/` directory. Include no arguments to start infinite loop which checks for and downloads new content. Other options available, see: +Execute `Gonewild.py` in the `./py/` directory. +Initial setup: +```bash +./py/Gonewild.py --reddit username password +./py/Gonewild.py --add-sub subreddit1,sub2 +``` +Include no arguments to start infinite loop which checks for and downloads new content. + +Other options available, see: ```bash python Gonewild.py --help ``` + +Upgrade from previous version +============================= + +```bash +sqlite3 ./database.db 'create table subs(id integer primary key autoincrement , sub text unique, sinceid text);' #updrade db +./py/Gonewild.py --add-sub oldsub,oldsub2 #readd old subs manually +``` diff --git a/py/DB.py b/py/DB.py index 0d82704..7abd00b 100755 --- a/py/DB.py +++ b/py/DB.py @@ -19,7 +19,7 @@ 'id integer primary key autoincrement, \n\t' + 'username text unique, \n\t' + 'sinceid text, \n\t' + - 'created integer, \n\t' + + 'created integer, \n\t' + 'updated integer, \n\t' + 'deleted integer, \n\t' + 'blacklist integer, \n\t' + @@ -27,6 +27,12 @@ 'rating integer, \n\t' + 'ratings integer \n\t', + 'subs' : + '\n\t' + + 'id integer primary key autoincrement, \n\t' + + 'sub text unique, \n\t' + + 'sinceid text \n\t', + 'posts' : '\n\t' + 'id text primary key, \n\t' + @@ -57,7 +63,7 @@ 'downs integer, \n\t' + 'foreign key(userid) references users(id)\n\t', - 'albums' : + 'albums' : '\n\t' 'id integer primary key, \n\t' + 'path text unique, \n\t' + @@ -76,7 +82,7 @@ 'source text, \n\t' + 'width integer, \n\t' + 'height integer, \n\t' + - 'size integer, \n\t' + + 'size integer, \n\t' + 'thumb text, \n\t' + 'type text, \n\t' + # image/video 'albumid integer, \n\t' + @@ -85,7 +91,7 @@ 'views integer, \n\t' + 'foreign key(userid) references users(id), \n\t' + 'foreign key(albumid) references albums(id)\n\t', - + 'zips' : '\n\t' + 'zippath text unique, \n\t' + @@ -129,21 +135,21 @@ def __init__(self): # Create table for every schema given. for key in SCHEMA: self.create_table(key, SCHEMA[key]) - + def debug(self, text): tstamp = time.strftime('[%Y-%m-%dT%H:%M:%SZ]', time.gmtime()) text = '%s DB: %s' % (tstamp, text) self.logger.write('%s\n' % text) if self.logger != stderr: stderr.write('%s\n' % text) - + def create_table(self, table_name, schema): cur = self.conn.cursor() query = '''create table if not exists %s (%s)''' % (table_name, schema) cur.execute(query) self.commit() cur.close() - + def commit(self): try_again = True while try_again: @@ -152,7 +158,7 @@ def commit(self): try_again = False except: time.sleep(1) - + def insert(self, table, values): cur = self.conn.cursor() try: @@ -168,7 +174,7 @@ def insert(self, table, values): except sqlite3.IntegrityError: cur.close() return -1 - + def delete(self, table, where, values=[]): cur = self.conn.cursor() q = ''' @@ -176,13 +182,13 @@ def delete(self, table, where, values=[]): where %s ''' % (table, where) cur.execute(q, values) - + def get_cursor(self): return self.conn.cursor() - + def count(self, table, where='', values=[]): return self.select_one('count(*)', table, where, values=values) - + def select(self, what, table, where='', values=[]): cur = self.conn.cursor() query = ''' @@ -211,7 +217,7 @@ def select_one(self, what, table, where='', values=[]): one = execur.fetchone() cur.close() return one[0] - + def update(self, table, what, where='', values=[]): cur = self.conn.cursor() if where != '': @@ -257,7 +263,7 @@ def add_user(self, user, new=False): self.debug('add_user: user "%s" already exists in %susers: %s' % (user, 'new' if new else '', str(e))) raise e self.commit() - + def remove_user(self, user): userid = self.get_user_id(user) user = self.select_one('username', 'users', where='id = ?', values=[userid]) @@ -325,7 +331,41 @@ def set_last_since_id(self, user, since_id): ''' % (since_id, user) cur.execute(query) self.commit() - + + def add_sub(self, sub): + cur = self.conn.cursor() + cur.execute('insert or ignore into subs(sub) values (?)', [sub]) + self.commit() + + def remove_sub(self, sub): + self.delete('subs', 'sub like ?', [sub]) + self.commit() + + def get_subs_list(self): + result = [] + for sub in self.select('sub', 'subs'): + result.append(sub[0]) + return result + + def get_sub_last_since_id(self, sub): + cur = self.conn.cursor() + results = cur.execute(''' + select sinceid + from subs + where sub like "%s" + ''' % sub) + return results.fetchall()[0][0] + + def set_sub_last_since_id(self, sub, since_id): + cur = self.conn.cursor() + query = ''' + update subs + set sinceid = "%s" + where sub like "%s" + ''' % (since_id, sub) + cur.execute(query) + self.commit() + def add_post(self, post, legacy=0): userid = self.get_user_id(post.author) values = [ ( @@ -515,8 +555,8 @@ def add_existing_image(self, user, oldimage, oldpath, subdir='', album_id=-1): self.debug('add_existing_image: create_thumbnail failed: %s' % str(e)) thumbnail = path.join(ImageUtils.get_root(), 'images', 'nothumb.png') try: - self.add_image(newimage, user, url, - dims[0], dims[1], size, thumbnail, 'image', + self.add_image(newimage, user, url, + dims[0], dims[1], size, thumbnail, 'image', album_id, post, comment) except Exception, e: self.debug('add_existing_image: failed: %s' % str(e)) @@ -598,7 +638,7 @@ def add_existing_album(self, user, oldalbum, oldpath): except Exception, e: #self.debug('add_existing_image: %s' % str(e)) pass - + def get_credentials(self, site): if self.count('credentials', 'site = ?', [site]) == 0: raise Exception('Credentials for %s not found in database, run "Gonewild.py --help" for more info' % site) @@ -628,7 +668,7 @@ def set_credentials(self, site, username, password): from traceback import format_exc self.debug('\n%s' % format_exc()) raise e - + def update_user(self, user): cur = self.conn.cursor() query = ''' @@ -671,7 +711,7 @@ def mark_as_deleted(self, user): def already_friend(self, user): return self.count('friends', 'username = ?', [user]) > 0 - + def add_friend(self, user): cur = self.conn.cursor() cur.execute('insert into friends values (?)', [user]) @@ -723,7 +763,7 @@ def set_config(self, key, value): cur.close() except Exception, e: self.debug('failed to set config key "%s" to value "%s": %s' % (key, value, str(e))) - + if __name__ == '__main__': db = DB() diff --git a/py/Gonewild.py b/py/Gonewild.py index 6d013f6..7c07d61 100755 --- a/py/Gonewild.py +++ b/py/Gonewild.py @@ -22,7 +22,7 @@ def __init__(self): self.db = DB() # Database instance self.reddit = Reddit() self.excluded_subs = self.db.get_excluded_subreddits() - + def debug(self, text): tstamp = strftime('[%Y-%m-%dT%H:%M:%SZ]', gmtime()) text = '%s Gonewild: %s' % (tstamp, text) @@ -127,7 +127,7 @@ def poll_user(self, user): continue self.get_and_process_urls_from_child(child) - + self.debug('%s: poll_user: done' % user) # Set last 'since' to the most-recent post/comment ID @@ -297,7 +297,7 @@ def process_url(self, url, url_index, child): commid ) self.db.update_user(child.author) - + def infinite_loop(self): users = self.db.get_users(new=False) @@ -354,28 +354,47 @@ def infinite_loop(self): except Exception, e: self.debug('infinite_loop: poll_user: %s' % str(e)) from traceback import format_exc - print format_exc() + print format_exc() def add_top_users(self): - users = [] - subs = ['gonewild'] - self.debug('add_top_users: loading top posts for the week from %s' % ','.join(subs)) - try: - posts = self.reddit.get('http://www.reddit.com/r/%s/top.json?t=week' % '+'.join(subs)) - except Exception, e: - self.debug('add_top_users: Exception: %s' % str(e)) - return users - for post in posts: - if post.author == '[deleted]': continue - if not self.db.user_already_added(post.author): - self.debug('add_top_users: Found new user, adding /u/%s' % post.author) - self.db.add_user(post.author, new=True) + subusers = set() + for sub in self.db.get_subs_list(): + subusers.update(self.poll_sub(sub)) + users = [] + for user in subusers: + if user == '[deleted]': continue + if not self.db.user_already_added(user): + self.debug('add_top_users: Found new user, adding /u/%s' % user) + self.db.add_user(user, new=True) friend_zone = self.db.get_config('friend_zone') if friend_zone == None or friend_zone == 'none': - self.add_friend(post.author) - users.append(post.author) + self.add_friend(user) + users.append(user) return users + def poll_sub(self, sub): + since_id = self.db.get_sub_last_since_id(sub) + # Get posts/comments for user + self.debug('%s: poll_sub: since "%s"' % (sub, since_id)) + try: + posts = self.reddit.get_sub(sub, since=since_id) + except Exception, e: + if '404: Not Found' in str(e): + # User is deleted, mark it as such + self.debug('%s: poll_sub: user is 404' % sub) + return + self.debug('%s: poll_sub: error %s' % (sub, str(e))) + return + + if len(posts) == 0: + return set() + + users = set([post.author for post in posts]) + self.debug('%s: poll_sub: %d new posts found, %d posters' % (sub, len(posts), len(users))) + self.debug('%s: poll_sub: setting most-recent since_id to "%s"' % (sub, posts[0].id)) + self.db.set_sub_last_since_id(sub, posts[0].id) + return users + def add_friend(self, user): try: @@ -441,7 +460,7 @@ def compare_friends(self, add_friends=False): self.debug('Added /u/%s as a friend on reddit' % friend) else: self.debug('Found %d users that are not friended. to friend them, execute:\npython Gonewild.py --friend %s' % (len(need2add), ','.join(need2add))) - + def toggle_addtop(self): if self.db.get_config('add_top_users') != 'false': self.db.set_config('add_top_users', 'false') @@ -503,7 +522,7 @@ def setup_config(self): 'save_thumbnails' : 'true', 'add_top_users' : 'true', 'excluded_subreddits' : '', - 'friend_zone' : 'some', + 'friend_zone' : 'none', 'last_user' : '' } for (key,value) in keys.iteritems(): @@ -522,6 +541,9 @@ def handle_arguments(gw): parser.add_argument('--add', '-a', help='Add user(s) to scan for new content', metavar='USER') + parser.add_argument('--add-sub', + help='Add sub(s) to scan for new content', + metavar='SUBREDDIT') parser.add_argument('--add-top', '-tz', help='Toggle adding top users from /r/gonewild', action='store_true') @@ -616,6 +638,12 @@ def handle_arguments(gw): else: gw.debug('Warning: User already added: /u/%s' % user) + elif args.add_sub: + subs = args.add_sub.replace('r/', '').replace('/', '').split(',') + for sub in subs: + gw.db.add_sub(sub) + gw.debug('Added sub: /r/%s' % sub) + elif args.friend: users = args.friend.replace('u/', '').replace('/', '').split(',') gw.login() @@ -716,4 +744,3 @@ def handle_arguments(gw): gw.login() gw.infinite_loop() - diff --git a/py/Reddit.py b/py/Reddit.py index 2b0b5d9..0f19eb2 100755 --- a/py/Reddit.py +++ b/py/Reddit.py @@ -45,7 +45,7 @@ def from_json(self, json): self.url = Reddit.asciify(json['url']) self.selftext = Reddit.asciify(json['selftext']) if json['is_self'] else None self.title = Reddit.asciify(json['title']) - + def permalink(self): if self.subreddit != '': return 'http://reddit.com/r/%s/comments/%s' % (self.subreddit, self.id) @@ -95,7 +95,7 @@ def debug(text): Reddit.logger.write('%s\n' % text) if Reddit.logger != stderr: stderr.write('%s\n' % text) - + ''' Parses reddit response. Returns either: @@ -172,11 +172,11 @@ def get(url): Reddit.debug('exception: %s' % str(e)) raise e return Reddit.parse_json(json) - - + + @staticmethod def get_user(user, since=None, max_pages=None): - """ + """ Get all comments and posts for a user since 'since'. 'since' is either a post id or comment id """ @@ -217,6 +217,49 @@ def get_user(user, since=None, max_pages=None): page += 1 return results + @staticmethod + def get_sub(sub, since=None, max_pages=None): + """ + Get all posts for a sub since 'since'. + 'since' is a post id + """ + results = [] + url = 'http://www.reddit.com/r/%s/new.json' % sub + Reddit.debug('loading %s' % url) + Reddit.wait() + try: + r = Reddit.httpy.get(url) + except Exception, e: + Reddit.debug('exception: %s' % str(e)) + raise e + if r.strip() == '': + # sub is deleted + raise Exception('sub is deleted') + page = 1 + while True: + try: + json = loads(r) + except Exception, e: + Reddit.debug('failed to load JSON: %s\n%s' % (str(e), r)) + return results + if 'error' in json and json['error'] == 404: + raise Exception('sub %s is deleted (404)' % sub) + for item in Reddit.parse_json(json): + if item.id == since: + return results + results.append(item) + if not 'after' in json['data'] or json['data']['after'] == None: + Reddit.debug('get: hit end of posts/comments') + break + after = json['data']['after'] + if max_pages != None and max_pages >= page: break + next_url = '%s?after=%s' % (url, after) + Reddit.debug('loading %s' % next_url) + Reddit.wait() + r = Reddit.httpy.get(next_url) + page += 1 + return results + @staticmethod def get_links_from_text(text): ''' Returns list of URLs from given text (comment or selftext) ''' @@ -390,4 +433,3 @@ def get_friends_list(): print 'COMMENT: /u/%s: "%s"' % (item.author, item.body.replace('\n', ' ')), print '(+%d/-%d)' % (item.ups, item.downs) ''' -