Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 54 additions & 36 deletions gcat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/python
from __future__ import print_function

from oauth2client.client import OAuth2WebServerFlow, OAuth2Credentials
from oauth2client.client import OAuth2WebServerFlow
from oauth2client.file import Storage
from apiclient.discovery import build
from apiclient.http import MediaFileUpload
Expand All @@ -9,39 +10,44 @@

import argparse
import sys, os.path
import re
import time
import logging


from operator import itemgetter
import collections
from collections import defaultdict, OrderedDict
from collections import OrderedDict
import webbrowser
import yaml, csv, json, pprint
import StringIO
import yaml, pprint
from io import BytesIO
import shelve
import pandas as pd
import datetime

# python 2 / 3 compatibility
try:
input = raw_input
except NameError:
pass

LOGLEVELS = {'DEBUG': logging.DEBUG,
'INFO': logging.INFO,
'WARNING': logging.WARNING,
'ERROR': logging.ERROR,
'CRITICAL': logging.CRITICAL}
'CRITICAL': logging.CRITICAL}

logger = logging.getLogger(__name__)

def default_options():
defaults = {}
defaults['store'] = os.path.expanduser('~/.gcat/store')
defaults['config'] = os.path.expanduser('~/.gcat/config')
defaults['cache'] = os.path.expanduser('~/.gcat/cache')
cache_suffix = ''
if sys.version_info[0] >= 3:
cache_suffix = '3'
defaults['cache'] = os.path.expanduser('~/.gcat/cache' + cache_suffix)
defaults['usecache'] = False
defaults['redirect_uri'] = 'urn:ietf:wg:oauth:2.0:oob'
defaults['header'] = 0
return defaults

def load_config(opts):
if 'config' in opts:
try:
Expand Down Expand Up @@ -79,7 +85,7 @@ def get_file(title=None, fmt='dict', **kwargs):
store (str) : location in which to store file-specific credentials
header (int) : which row to use as the header. use None for no header in which case
column names will be X1, X2, ...


"""
opts = default_options()
Expand All @@ -92,17 +98,17 @@ def get_file(title=None, fmt='dict', **kwargs):
raise ValueError('`title` not found in options. exiting')

content = get_content(opts)
wb = pd.ExcelFile(StringIO.StringIO(content))
wb = pd.ExcelFile(BytesIO(content))

if fmt == 'pandas_excel':
return wb

try:
parsed_wb = OrderedDict([(sheet_name, wb.parse(sheet_name, header=opts['header'])) for sheet_name in wb.sheet_names])
except:
print 'error parsing worksheet using pandas.ExcelFile.parse(sheet_name). '\
print('error parsing worksheet using pandas.ExcelFile.parse(sheet_name). '\
'Consider using the pandas_excel fmt in get_file and parsing the fileA '\
'yourself to have more control'
'yourself to have more control')
logger.exception('error parsing worksheet using pandas.ExcelFile.parse(sheet_name). '
'Consider using the pandas_excel fmt in get_file and parsing the file '
'yourself to have more control')
Expand All @@ -117,13 +123,13 @@ def get_file(title=None, fmt='dict', **kwargs):
else:
raise ValueError('unkown format: %s' % fmt)
if len(fmt_wb) == 1:
return fmt_wb.values()[0]
return list(fmt_wb.values())[0]
if 'sheet' in opts:
try:
return fmt_wb[opts['sheet']]
except:
print 'sheet name: `%s` not found in workbook. sheet_names: %s' % (opts['sheet'], fmt_wb.keys())
logger.exception('sheet name: %s not found in workbook. sheet_names: %s', opts['sheet'], fmt_wb.keys())
print('sheet name: `%s` not found in workbook. sheet_names: %s' % (opts['sheet'], list(fmt_wb.keys())))
logger.exception('sheet name: %s not found in workbook. sheet_names: %s', opts['sheet'], list(fmt_wb.keys()))
raise
else:
return fmt_wb
Expand Down Expand Up @@ -227,36 +233,48 @@ def put_file(title=None, data=None, sheet_names=None, fname=None, update=False,
body=body,
media_body=media_body,
newRevision=True,
convert=True).execute()
convert=True).execute()
else:
if not opts['update']:
logger.warning('creating file with duplicate name: %s', opts['title'])
logger.warning('creating file with duplicate name: %s', opts['title'])
file = service.files().insert(
body=body,
media_body=media_body,
convert=True).execute()
except errors.HttpError, error:

except errors.HttpError as error:
logger.exception('An error occured while attempting to insert file: %s', title)


def find_file(service, opts):
files = service.files()
try:
res = files.list().execute()
except errors.HttpError, error:
files_list_request = files.list()
files_list_start = files_list_request

while files_list_request is not None:
res = files_list_request.execute()
files_page = res['items']
fs = [f for f in files_page if f['title'] == opts['title'] ]

if len(fs) > 0:
break

# pagination
files_list_request = files.list_next(files_list_start, res)
except errors.HttpError as error:
logger.error('An error occurred: %s', exc_info=error)
raise error

files = res['items']
fs = [f for f in files if f['title'] == opts['title'] ]
if not fs:
title_list = sorted([f['title'] for f in files])
logger.error('file title: %s not in list:\n%s', opts['title'], pprint.pformat(title_list))
return None
if len(fs) > 1:
dups = '\n'.join([f['alternateLink'] for f in fs])
logger.warning('title `%s` matches several files in Google Drive. Using first item in the following link:\n%s', opts['title'], dups)
logger.warning('title `%s` matches several files in Google Drive. Using first item in the following link:\n%s',
opts['title'], dups)

file = fs[0]
return file

Expand Down Expand Up @@ -299,20 +317,20 @@ def get_credentials(flow, opts):
# get the credentials the hard way
auth_url = flow.step1_get_authorize_url()
webbrowser.open(auth_url)
code = raw_input('go to:\n\n\t%s\n\nand enter in the code displayed:' % auth_url)
code = input('go to:\n\n\t%s\n\nand enter in the code displayed:' % auth_url)
credentials = flow.step2_exchange(code)
storage.put(credentials)

#pprint.pprint(json.loads(credentials.to_json()), indent=2)
if credentials.access_token_expired:
logger.info('refreshing token')
refresh_http = httplib2.Http()
credentials.refresh(refresh_http)
credentials.refresh(refresh_http)
return credentials


def download(service, file):
logger.debug('file.viewkeys(): %s', pprint.pformat(file.viewkeys()))
logger.debug('file.viewkeys(): %s', pprint.pformat(file.keys()))
#download_url = file.get('downloadUrl') # not present for some reason
#download_url_pdf = file.get('exportLinks')['application/pdf']
download_url = file.get('exportLinks')['application/vnd.openxmlformats-officedocument.spreadsheetml.sheet']
Expand All @@ -330,7 +348,7 @@ def download(service, file):
else:
# The file doesn't have any content stored on Drive.
logger.error('file does not have any content stored on Drive')
return None
return None


def merge_config(opts, yaml_name):
Expand Down Expand Up @@ -391,7 +409,7 @@ def parse_args(**kwopts):
parser.add_argument('--usecache',
action='store_true',
help='instructs gcat to use the cache located in a file specified by the --cache option')
parser.add_argument('--header',
parser.add_argument('--header',
type=int,
default=0,
help='row to use as header')
Expand All @@ -403,11 +421,11 @@ def parse_args(**kwopts):
if args.no_header:
setattr(args,'header', None)
return vars(args)


def write_to_stdout(content):
for line in content:
print '\t'.join(map(str, line))
print('\t'.join(map(str, line)))


def main():
Expand All @@ -423,7 +441,7 @@ def main():

content = get_file(fmt='list', **parse_args())
if isinstance(content, dict):
content = content.values()[0]
content = list(content.values())[0]
write_to_stdout(content)


Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
]
},
install_requires=[
"oauth2client >= 1.0",
"google-api-python-client >= 1.0",
"httplib2 >= 0.7.6",
"oauth2client >= 3.0",
"google-api-python-client >= 1.5",
"httplib2 >= 0.9.0",
"pandas >= 0.9.0",
"PyYAML >= 3.10",
"openpyxl >=1.5.8"
Expand Down