embr · detrout · Jun 9, 2015 · Jun 9, 2015 · Jun 9, 2015 · Jun 9, 2015
diff --git a/gcat/__init__.py b/gcat/__init__.py
@@ -1,6 +1,7 @@
 #!/usr/bin/python
+from __future__ import print_function
 
-from oauth2client.client import  OAuth2WebServerFlow, OAuth2Credentials
+from oauth2client.client import OAuth2WebServerFlow
 from oauth2client.file import Storage
 from apiclient.discovery import build
 from apiclient.http import MediaFileUpload
@@ -9,39 +10,44 @@
 
 import argparse
 import sys, os.path
-import re
 import time
 import logging
 
-
-from operator import itemgetter
 import collections
-from collections import defaultdict, OrderedDict
+from collections import OrderedDict
 import webbrowser
-import yaml, csv, json, pprint
-import StringIO
+import yaml, pprint
+from io import BytesIO
 import shelve
 import pandas as pd
-import datetime
+
+# python 2 / 3 compatibility
+try:
+    input = raw_input
+except NameError:
+    pass
 
 LOGLEVELS = {'DEBUG': logging.DEBUG,
              'INFO': logging.INFO,
              'WARNING': logging.WARNING,
              'ERROR': logging.ERROR,
-             'CRITICAL': logging.CRITICAL}  
+             'CRITICAL': logging.CRITICAL}
 
 logger = logging.getLogger(__name__)
 
 def default_options():
     defaults = {}
     defaults['store'] = os.path.expanduser('~/.gcat/store')
     defaults['config'] = os.path.expanduser('~/.gcat/config')
-    defaults['cache'] = os.path.expanduser('~/.gcat/cache')
+    cache_suffix = ''
+    if sys.version_info[0] >= 3:
+        cache_suffix = '3'
+    defaults['cache'] = os.path.expanduser('~/.gcat/cache' + cache_suffix)
     defaults['usecache'] = False
     defaults['redirect_uri'] = 'urn:ietf:wg:oauth:2.0:oob'
     defaults['header'] = 0
     return defaults
-     
+
 def load_config(opts):
     if 'config' in opts:
         try:
@@ -79,7 +85,7 @@ def get_file(title=None, fmt='dict', **kwargs):
         store     (str)   : location in which to store file-specific credentials
         header    (int)   : which row to use as the header. use None for no header in which case
                             column names will be X1, X2, ...
-        
+
 
     """
     opts = default_options()
@@ -92,17 +98,17 @@ def get_file(title=None, fmt='dict', **kwargs):
         raise ValueError('`title` not found in options.  exiting')
 
     content = get_content(opts)
-    wb = pd.ExcelFile(StringIO.StringIO(content))
+    wb = pd.ExcelFile(BytesIO(content))
 
     if fmt == 'pandas_excel':
         return wb
-   
+
     try:
         parsed_wb = OrderedDict([(sheet_name, wb.parse(sheet_name, header=opts['header'])) for sheet_name in wb.sheet_names])
     except:
-        print 'error parsing worksheet using pandas.ExcelFile.parse(sheet_name). '\
+        print('error parsing worksheet using pandas.ExcelFile.parse(sheet_name). '\
               'Consider using the pandas_excel fmt in get_file and parsing the fileA '\
-              'yourself to have more control'
+              'yourself to have more control')
         logger.exception('error parsing worksheet using pandas.ExcelFile.parse(sheet_name). '
                          'Consider using the pandas_excel fmt in get_file and parsing the file '
                          'yourself to have more control')
@@ -117,13 +123,13 @@ def get_file(title=None, fmt='dict', **kwargs):
     else:
         raise ValueError('unkown format: %s' % fmt)
     if len(fmt_wb) == 1:
-        return fmt_wb.values()[0]
+        return list(fmt_wb.values())[0]
     if 'sheet' in opts:
         try:
             return fmt_wb[opts['sheet']]
         except:
-            print 'sheet name: `%s` not found in workbook.  sheet_names: %s' % (opts['sheet'], fmt_wb.keys())
-            logger.exception('sheet name: %s not found in workbook.  sheet_names: %s', opts['sheet'], fmt_wb.keys())
+            print('sheet name: `%s` not found in workbook.  sheet_names: %s' % (opts['sheet'], list(fmt_wb.keys())))
+            logger.exception('sheet name: %s not found in workbook.  sheet_names: %s', opts['sheet'], list(fmt_wb.keys()))
             raise
     else:
         return fmt_wb
@@ -227,36 +233,48 @@ def put_file(title=None, data=None, sheet_names=None, fname=None, update=False,
                 body=body,
                 media_body=media_body,
                 newRevision=True,
-                convert=True).execute() 
+                convert=True).execute()
         else:
             if not opts['update']:
-                logger.warning('creating file with duplicate name: %s', opts['title']) 
+                logger.warning('creating file with duplicate name: %s', opts['title'])
             file = service.files().insert(
                 body=body,
                 media_body=media_body,
                 convert=True).execute()
- 
-    except errors.HttpError, error:
+
+    except errors.HttpError as error:
         logger.exception('An error occured while attempting to insert file: %s', title)
 
 
 def find_file(service, opts):
     files = service.files()
     try:
-        res = files.list().execute()
-    except errors.HttpError, error:
+        files_list_request = files.list()
+        files_list_start = files_list_request
+
+        while files_list_request is not None:
+            res = files_list_request.execute()
+            files_page = res['items']
+            fs = [f for f in files_page if f['title'] == opts['title'] ]
+
+            if len(fs) > 0:
+                break
+
+            # pagination
+            files_list_request = files.list_next(files_list_start, res)
+    except errors.HttpError as error:
         logger.error('An error occurred: %s', exc_info=error)
         raise error
 
-    files = res['items']
-    fs = [f for f in files if f['title'] == opts['title'] ]
     if not fs:
         title_list = sorted([f['title'] for f in files])
         logger.error('file title: %s not in list:\n%s', opts['title'], pprint.pformat(title_list))
         return None
     if len(fs) > 1:
         dups = '\n'.join([f['alternateLink'] for f in fs])
-        logger.warning('title `%s` matches several files in Google Drive.  Using first item in the following link:\n%s', opts['title'], dups)  
+        logger.warning('title `%s` matches several files in Google Drive.  Using first item in the following link:\n%s',
+                       opts['title'], dups)
+
     file = fs[0]
     return file
 
@@ -299,20 +317,20 @@ def get_credentials(flow, opts):
         # get the credentials the hard way
         auth_url = flow.step1_get_authorize_url()
         webbrowser.open(auth_url)
-        code = raw_input('go to:\n\n\t%s\n\nand enter in the code displayed:' % auth_url)
+        code = input('go to:\n\n\t%s\n\nand enter in the code displayed:' % auth_url)
         credentials = flow.step2_exchange(code)
         storage.put(credentials)
 
     #pprint.pprint(json.loads(credentials.to_json()), indent=2)
     if credentials.access_token_expired:
         logger.info('refreshing token')
         refresh_http = httplib2.Http()
-        credentials.refresh(refresh_http) 
+        credentials.refresh(refresh_http)
     return credentials
 
 
 def download(service, file):
-    logger.debug('file.viewkeys(): %s', pprint.pformat(file.viewkeys()))
+    logger.debug('file.viewkeys(): %s', pprint.pformat(file.keys()))
     #download_url = file.get('downloadUrl') # not present for some reason
     #download_url_pdf = file.get('exportLinks')['application/pdf']
     download_url = file.get('exportLinks')['application/vnd.openxmlformats-officedocument.spreadsheetml.sheet']
@@ -330,7 +348,7 @@ def download(service, file):
     else:
         # The file doesn't have any content stored on Drive.
         logger.error('file does not have any content stored on Drive')
-        return None    
+        return None
 
 
 def merge_config(opts, yaml_name):
@@ -391,7 +409,7 @@ def parse_args(**kwopts):
     parser.add_argument('--usecache',
                         action='store_true',
                         help='instructs gcat to use the cache located in a file specified by the --cache option')
-    parser.add_argument('--header', 
+    parser.add_argument('--header',
                         type=int,
                         default=0,
                         help='row to use as header')
@@ -403,11 +421,11 @@ def parse_args(**kwopts):
     if args.no_header:
         setattr(args,'header', None)
     return vars(args)
-    
+
 
 def write_to_stdout(content):
     for line in content:
-        print '\t'.join(map(str, line))
+        print('\t'.join(map(str, line)))
 
 
 def main():
@@ -423,7 +441,7 @@ def main():
 
     content = get_file(fmt='list', **parse_args())
     if isinstance(content, dict):
-        content = content.values()[0]
+        content = list(content.values())[0]
     write_to_stdout(content)
 
 

diff --git a/setup.py b/setup.py
@@ -24,9 +24,9 @@
             ]
         },
     install_requires=[
-       "oauth2client >= 1.0",
-       "google-api-python-client >= 1.0",
-       "httplib2 >= 0.7.6",
+       "oauth2client >= 3.0",
+       "google-api-python-client >= 1.5",
+       "httplib2 >= 0.9.0",
        "pandas >= 0.9.0",
        "PyYAML >= 3.10",
        "openpyxl >=1.5.8"