diff --git a/gcat/__init__.py b/gcat/__init__.py index d0da384..ae78641 100755 --- a/gcat/__init__.py +++ b/gcat/__init__.py @@ -1,6 +1,7 @@ #!/usr/bin/python +from __future__ import print_function -from oauth2client.client import OAuth2WebServerFlow, OAuth2Credentials +from oauth2client.client import OAuth2WebServerFlow from oauth2client.file import Storage from apiclient.discovery import build from apiclient.http import MediaFileUpload @@ -9,26 +10,28 @@ import argparse import sys, os.path -import re import time import logging - -from operator import itemgetter import collections -from collections import defaultdict, OrderedDict +from collections import OrderedDict import webbrowser -import yaml, csv, json, pprint -import StringIO +import yaml, pprint +from io import BytesIO import shelve import pandas as pd -import datetime + +# python 2 / 3 compatibility +try: + input = raw_input +except NameError: + pass LOGLEVELS = {'DEBUG': logging.DEBUG, 'INFO': logging.INFO, 'WARNING': logging.WARNING, 'ERROR': logging.ERROR, - 'CRITICAL': logging.CRITICAL} + 'CRITICAL': logging.CRITICAL} logger = logging.getLogger(__name__) @@ -36,12 +39,15 @@ def default_options(): defaults = {} defaults['store'] = os.path.expanduser('~/.gcat/store') defaults['config'] = os.path.expanduser('~/.gcat/config') - defaults['cache'] = os.path.expanduser('~/.gcat/cache') + cache_suffix = '' + if sys.version_info[0] >= 3: + cache_suffix = '3' + defaults['cache'] = os.path.expanduser('~/.gcat/cache' + cache_suffix) defaults['usecache'] = False defaults['redirect_uri'] = 'urn:ietf:wg:oauth:2.0:oob' defaults['header'] = 0 return defaults - + def load_config(opts): if 'config' in opts: try: @@ -79,7 +85,7 @@ def get_file(title=None, fmt='dict', **kwargs): store (str) : location in which to store file-specific credentials header (int) : which row to use as the header. use None for no header in which case column names will be X1, X2, ... - + """ opts = default_options() @@ -92,17 +98,17 @@ def get_file(title=None, fmt='dict', **kwargs): raise ValueError('`title` not found in options. exiting') content = get_content(opts) - wb = pd.ExcelFile(StringIO.StringIO(content)) + wb = pd.ExcelFile(BytesIO(content)) if fmt == 'pandas_excel': return wb - + try: parsed_wb = OrderedDict([(sheet_name, wb.parse(sheet_name, header=opts['header'])) for sheet_name in wb.sheet_names]) except: - print 'error parsing worksheet using pandas.ExcelFile.parse(sheet_name). '\ + print('error parsing worksheet using pandas.ExcelFile.parse(sheet_name). '\ 'Consider using the pandas_excel fmt in get_file and parsing the fileA '\ - 'yourself to have more control' + 'yourself to have more control') logger.exception('error parsing worksheet using pandas.ExcelFile.parse(sheet_name). ' 'Consider using the pandas_excel fmt in get_file and parsing the file ' 'yourself to have more control') @@ -117,13 +123,13 @@ def get_file(title=None, fmt='dict', **kwargs): else: raise ValueError('unkown format: %s' % fmt) if len(fmt_wb) == 1: - return fmt_wb.values()[0] + return list(fmt_wb.values())[0] if 'sheet' in opts: try: return fmt_wb[opts['sheet']] except: - print 'sheet name: `%s` not found in workbook. sheet_names: %s' % (opts['sheet'], fmt_wb.keys()) - logger.exception('sheet name: %s not found in workbook. sheet_names: %s', opts['sheet'], fmt_wb.keys()) + print('sheet name: `%s` not found in workbook. sheet_names: %s' % (opts['sheet'], list(fmt_wb.keys()))) + logger.exception('sheet name: %s not found in workbook. sheet_names: %s', opts['sheet'], list(fmt_wb.keys())) raise else: return fmt_wb @@ -227,36 +233,48 @@ def put_file(title=None, data=None, sheet_names=None, fname=None, update=False, body=body, media_body=media_body, newRevision=True, - convert=True).execute() + convert=True).execute() else: if not opts['update']: - logger.warning('creating file with duplicate name: %s', opts['title']) + logger.warning('creating file with duplicate name: %s', opts['title']) file = service.files().insert( body=body, media_body=media_body, convert=True).execute() - - except errors.HttpError, error: + + except errors.HttpError as error: logger.exception('An error occured while attempting to insert file: %s', title) def find_file(service, opts): files = service.files() try: - res = files.list().execute() - except errors.HttpError, error: + files_list_request = files.list() + files_list_start = files_list_request + + while files_list_request is not None: + res = files_list_request.execute() + files_page = res['items'] + fs = [f for f in files_page if f['title'] == opts['title'] ] + + if len(fs) > 0: + break + + # pagination + files_list_request = files.list_next(files_list_start, res) + except errors.HttpError as error: logger.error('An error occurred: %s', exc_info=error) raise error - files = res['items'] - fs = [f for f in files if f['title'] == opts['title'] ] if not fs: title_list = sorted([f['title'] for f in files]) logger.error('file title: %s not in list:\n%s', opts['title'], pprint.pformat(title_list)) return None if len(fs) > 1: dups = '\n'.join([f['alternateLink'] for f in fs]) - logger.warning('title `%s` matches several files in Google Drive. Using first item in the following link:\n%s', opts['title'], dups) + logger.warning('title `%s` matches several files in Google Drive. Using first item in the following link:\n%s', + opts['title'], dups) + file = fs[0] return file @@ -299,7 +317,7 @@ def get_credentials(flow, opts): # get the credentials the hard way auth_url = flow.step1_get_authorize_url() webbrowser.open(auth_url) - code = raw_input('go to:\n\n\t%s\n\nand enter in the code displayed:' % auth_url) + code = input('go to:\n\n\t%s\n\nand enter in the code displayed:' % auth_url) credentials = flow.step2_exchange(code) storage.put(credentials) @@ -307,12 +325,12 @@ def get_credentials(flow, opts): if credentials.access_token_expired: logger.info('refreshing token') refresh_http = httplib2.Http() - credentials.refresh(refresh_http) + credentials.refresh(refresh_http) return credentials def download(service, file): - logger.debug('file.viewkeys(): %s', pprint.pformat(file.viewkeys())) + logger.debug('file.viewkeys(): %s', pprint.pformat(file.keys())) #download_url = file.get('downloadUrl') # not present for some reason #download_url_pdf = file.get('exportLinks')['application/pdf'] download_url = file.get('exportLinks')['application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'] @@ -330,7 +348,7 @@ def download(service, file): else: # The file doesn't have any content stored on Drive. logger.error('file does not have any content stored on Drive') - return None + return None def merge_config(opts, yaml_name): @@ -391,7 +409,7 @@ def parse_args(**kwopts): parser.add_argument('--usecache', action='store_true', help='instructs gcat to use the cache located in a file specified by the --cache option') - parser.add_argument('--header', + parser.add_argument('--header', type=int, default=0, help='row to use as header') @@ -403,11 +421,11 @@ def parse_args(**kwopts): if args.no_header: setattr(args,'header', None) return vars(args) - + def write_to_stdout(content): for line in content: - print '\t'.join(map(str, line)) + print('\t'.join(map(str, line))) def main(): @@ -423,7 +441,7 @@ def main(): content = get_file(fmt='list', **parse_args()) if isinstance(content, dict): - content = content.values()[0] + content = list(content.values())[0] write_to_stdout(content) diff --git a/setup.py b/setup.py index b28580e..fbcdc1b 100644 --- a/setup.py +++ b/setup.py @@ -24,9 +24,9 @@ ] }, install_requires=[ - "oauth2client >= 1.0", - "google-api-python-client >= 1.0", - "httplib2 >= 0.7.6", + "oauth2client >= 3.0", + "google-api-python-client >= 1.5", + "httplib2 >= 0.9.0", "pandas >= 0.9.0", "PyYAML >= 3.10", "openpyxl >=1.5.8"