From da0cfffce1100648fb4c1f4ddea7995bdfc691ba Mon Sep 17 00:00:00 2001
From: Jeremiah Jordan <jeremiah@datastax.com>
Date: Sun, 26 May 2013 23:14:41 -0500
Subject: [PATCH] Refactor code into multiple classes.  Add org name to folder
 name.  Fix files with spaces, and other strange chars messing things up.

---
 .gitignore       |   2 +
 README.md        |   2 +-
 download         | 237 +++--------------------------------------------
 downloadfiles.py | 129 ++++++++++++++++++++++++++
 dszendesk.py     |  92 ++++++++++++++++++
 5 files changed, 235 insertions(+), 227 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 downloadfiles.py
 create mode 100644 dszendesk.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c31f4be
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.idea
+*.py[oc]
\ No newline at end of file
diff --git a/README.md b/README.md
index 6d4adc6..5a89ff0 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@ where:
 Purpose
 -------
 
-* Allows filesystems to remain clutterless with support tickets
+* Allows filesystems to remain less cluttered with support tickets
 * Have all organization files ready to easily be grepped
 * Ensure that we don't have many `cassandra.yaml (30)` files (maybe just OSX)
 * Keep files organized by date of attachment, not download
diff --git a/download b/download
index 45c0059..f85026e 100755
--- a/download
+++ b/download
@@ -1,246 +1,31 @@
 #!/usr/bin/env python
 
-import calendar
-import getpass
-import json
 import os
-import requests
 import sys
-import time
-import urllib
+from downloadfiles import DownloadFiles
+from dszendesk import ZenDesk
 
-import ConfigParser
-
-domain = False
-admin_email = False
-admin_password = False
-download_directory = '/tmp'
-run_open = False
-open_program = 'open'
-
-def print_json(json_object):
-    print json.dumps(json_object, sort_keys=True, indent=4)
-
-def get_authentication():
-    global domain
-    global admin_email
-    global admin_password
-    global download_directory
-    global run_open
-    global open_program
-    configfile = os.path.join(os.path.expanduser('~'), '.zendesk.cfg')
-
-    while True:
-        if not os.path.exists(configfile):
-            admin_email = raw_input('Zendesk Email Address: ')
-            admin_password = getpass.getpass()
-        else:
-            config = ConfigParser.RawConfigParser()
-            config.read(configfile)
-
-            domain = config.get('ZenDesk', 'domain') if config.has_option('ZenDesk', 'domain') else raw_input('Zendesk Domain: ')
-            admin_email = config.get('ZenDesk', 'email') if config.has_option('ZenDesk', 'email') else raw_input('Zendesk Email Address: ')
-            admin_password = config.get('ZenDesk', 'pass') if config.has_option('ZenDesk', 'pass') and config.get('ZenDesk', 'pass') else getpass.getpass()
-            download_directory = config.get('Downloader', 'download_directory') if config.has_option('Downloader', 'download_directory') else download_directory
-            run_open = config.getboolean('Downloader', 'run_open') if config.has_option('Downloader', 'run_open') and config.get('Downloader', 'run_open') else run_open
-            open_program = config.get('Downloader', 'open_program') if config.has_option('Downloader', 'open_program') else open_program
-
-        if 'error' in get_users_by_page():
-            print 'Authentication with "%s" failed. Please try again...' % (admin_email)
-        else:
-            break
-    # print 'Succesfully authenticated to ZenDesk!'
-
-def get_users_by_page(page_num=1):
-    '''Test function for authentication purposes'''
-
-    r = requests.get('https://%s/api/v2/users.json' % domain,
-                      params={'page': page_num},
-                      auth=(admin_email, admin_password))
-    return r.json
-
-def get_ticket_metadata(ticket_id):
-    '''Get the ticket meta per id'''
-
-    r = requests.get('https://%s/api/v2/tickets/%s.json' % (domain, ticket_id),
-                      auth=(admin_email, admin_password))
-    return r.json
-
-def get_ticket(ticket_id):
-    '''Get the ticket postings per id'''
-
-    r = requests.get('https://%s/api/v2/tickets/%s/audits.json' % (domain, ticket_id),
-                      auth=(admin_email, admin_password))
-    to_return = r.json['audits']
-
-    while r.json['next_page']:
-        r = requests.get(r.json['next_page'], auth=(admin_email, admin_password))
-        to_return = to_return + r.json['audits']
-
-    return to_return
-
-def extract_file_information(ticket):
-    '''Extract curl'd XML data and format into a dictionary with an array of attachments'''
-
-    ticket_id = ticket[0]['ticket_id']
-    organization_id = get_ticket_metadata(ticket_id)['ticket']['organization_id']
-    attachment_list = []
-
-    for audit in ticket:
-        time_created = audit['created_at']
-
-        for event in audit['events']:
-            if 'attachments' in event:
-                for attachment in event['attachments']:
-                    attachment_list.append([attachment['id'], time_created, attachment['file_name'], attachment['content_url']])
-
-    return {
-        'ticket_id': str(ticket_id),
-        'organization_id': str(organization_id),
-        'attachments': attachment_list
-    }
-
-def filename_split(filename):
-    '''Return the filename root and extension taking .tar.gz's into account'''
-
-    root, extension = os.path.splitext(filename)
-    if filename.endswith('.tar.gz') or filename.endswith('.tar.bz2'):
-        root, first_ext = os.path.splitext(root)
-        extension = first_ext + extension
-    return root, extension
-
-def fix_filenames(file_id, filename):
-    '''Make filenames ZenDesk organizational friendly'''
-
-    root, extension = filename_split(filename)
-    if not extension:
-        extension = '.txt'
-    return '%s_%s%s' % (root, file_id, extension)
-
-def get_formatted_time(created_at):
-    '''Correctly format the time for touch -t from the provided ZenDesk timestamp'''
-
-    time_format = '%Y-%m-%dT%H:%M:%S'
-    created_time = created_at[:-1]
-    created_stamp = calendar.timegm(time.strptime(created_time, time_format))
-    created_date = time.localtime(created_stamp)
-    formatted_time = time.strftime('%Y%m%d%H%M', created_date)
-    return formatted_time
-
-def check_and_extract_files(download_directory, filename, local_filename, formatted_time):
-    '''Extract files to their proper directories if file type is known'''
-
-    # Compare known extractable file extensions to see if there is a match
-    compressed_extensions = ['.tar.gz', '.tgz', '.zip', '.gz', '.bz2', '.tar.bz2', '.tbz']
-    file_root, file_extension = filename_split(filename)
-    if not file_extension in compressed_extensions:
-        return
-
-    # Calculate and ensure archive directory exists
-    archive_folder = os.path.join(download_directory, file_root)
-    if not os.path.isdir(archive_folder) and not file_extension in ['.gz', '.bz2']:
-        os.makedirs(archive_folder)
-
-    # Perform the actual extraction process
-    print "Extracting %s into %s..." % (filename, archive_folder)
-    if file_extension in ['.tar.gz', '.tgz']:
-        os.system('cd %s; tar xf %s' % (archive_folder, local_filename))
-    elif file_extension in ['.tar.bz2', '.tbz']:
-        os.system('cd %s; tar jxf %s' % (archive_folder, local_filename))
-    elif file_extension == '.zip':
-        os.system('cd %s; unzip %s' % (archive_folder, local_filename))
-    elif file_extension == '.gz':
-        os.system('gunzip -d %s' % (local_filename))
-    elif file_extension == '.bz2':
-        os.system('bzip2 -d %s' % (local_filename))
-
-    # Modify the created and modified timestamps for the archive folder
-    os.system('touch -t  %s %s' % (formatted_time, archive_folder))
-    os.system('touch -mt %s %s' % (formatted_time, archive_folder))
-
-def check_and_convert_files(download_directory, filename, local_filename, formatted_time):
-    '''Converts files to txt if file type is known'''
-
-    # Currently only supports OSX due to `textutil`
-    if sys.platform != 'darwin':
-        return
-
-    # Compare known extractable file extensions to see if there is a match
-    compressed_extensions = ['.rtf', '.doc', '.docx']
-    file_root, file_extension = filename_split(filename)
-    if not file_extension in compressed_extensions:
-        return
-
-    # Perform the actual conversion process
-    print "Converting %s into txt..." % (filename)
-    os.system('cd %s; textutil -convert txt %s' % (download_directory, local_filename))
-
-    # Modify the created and modified timestamps for the converted file
-    os.system('touch -t  %s %s.txt' % (formatted_time, file_root))
-    os.system('touch -mt %s %s.txt' % (formatted_time, file_root))
-
-    # Move the original file (to be touched later)
-    raw_directory = os.path.join(download_directory, 'raw_files')
-    if not os.path.exists(raw_directory):
-        os.makedirs(raw_directory)
-    os.system('mv %s %s' % (local_filename, raw_directory))
-
-def download_files(attachments):
-    '''Download all the files provided in the attachments dictionary'''
-
-    global download_directory
-
-    # Ensure the download directory exists
-    download_directory = os.path.join(download_directory, attachments['organization_id'], attachments['ticket_id'])
-    if not os.path.exists(download_directory):
-        os.makedirs(download_directory)
-
-    # Download each attachment
-    for attachment in attachments['attachments']:
-
-        # Extract and properly format all data
-        file_id, created_at, filename, url = attachment
-        filename = fix_filenames(file_id, filename)
-        local_filename = os.path.join(download_directory, filename)
-        formatted_time = get_formatted_time(created_at)
-
-        # Perform the actual download of each file
-        if not os.path.exists(local_filename):
-            print "Downloading %s to %s..." % (filename, local_filename)
-            urllib.urlretrieve(url, local_filename)
-
-            # Check if file extraction is possible
-            check_and_extract_files(download_directory, filename, local_filename, formatted_time)
-            check_and_convert_files(download_directory, filename, local_filename, formatted_time)
-
-            # Modify the created and modified timestamps for each file
-            os.system('touch -t  %s %s' % (formatted_time, local_filename))
-            os.system('touch -mt %s %s' % (formatted_time, local_filename))
-
-    return download_directory
 
 def main():
     if len(sys.argv) == 2:
         ticket_id = sys.argv[1]
     else:
-        sys.stderr.write('Usage: %s <ticket_id>\n' % __file__)
+        sys.stderr.write('Usage: {0} <ticket_id>\n'.format(__file__))
         sys.exit(1)
 
+    zd = ZenDesk()
     # Ensure our authentication is correct
-    get_authentication()
+    zd.authenticate()
 
     # Perform all attachment downloads
-    ticket = get_ticket(ticket_id)
-    attachments = extract_file_information(ticket)
-    download_directory = download_files(attachments)
+    attachments = zd.extract_file_information(ticket_id)
+    dl = DownloadFiles()
+    download_directory = dl.download_files(zd.download_directory, attachments)
 
     # Open the finder to display downloaded files
-    print
-    print "Files download to:"
-    print download_directory
-    print
-    if run_open:
-        os.system('%s %s' % (open_program, download_directory))
+    print "\nFiles download to:\n{0}\n".format(download_directory)
+    if zd.run_open:
+        os.system('{0} "{1}"'.format(zd.open_program, download_directory))
 
 
 if __name__ == "__main__":
diff --git a/downloadfiles.py b/downloadfiles.py
new file mode 100644
index 0000000..b0c9ee3
--- /dev/null
+++ b/downloadfiles.py
@@ -0,0 +1,129 @@
+import calendar
+import os
+import time
+import urllib
+import re
+
+
+class DownloadFiles(object):
+    """Class which downloads and extracts files."""
+    extToCommand = {".tar.gz": "tar xzf",
+                    ".tgz": "tar xzf",
+                    ".tar.bz2": "tar xjf",
+                    ".tbz": "tar xjf",
+                    ".zip": "unzip",
+                    ".gz": "gunzip",
+                    ".bz2": "bzip2",
+                    ".rtf": "textutil -convert txt",
+                    ".doc": "textutil -convert txt",
+                    ".docx": "textutil -convert txt"}
+    extNoCd = {".rtf", ".doc", ".docx"}
+
+    def filename_split(self, filename):
+        """
+        Return the filename root and extension taking multi-extension files, like .tar.gz, into account
+
+        :param filename: the filename to split
+        """
+        root, extension = os.path.splitext(filename)
+        full_extension = extension
+        while extension:
+            root, extension = os.path.splitext(root)
+            full_extension = extension + full_extension
+        full_extension = full_extension.lower()
+        return root, full_extension
+
+    def fix_filename(self, file_id, filename):
+        """Make filename ZenDesk organizational friendly"""
+        root, extension = self.filename_split(filename)
+        if not extension:
+            extension = '.txt'
+        #Clean up name
+        root = re.sub(r"[^a-zA-Z_0-9\-]", "", root)
+        return '%s_%s%s' % (root, file_id, extension)
+
+    def get_formatted_time(self, created_at):
+        """Correctly format the time for touch -t from the provided ZenDesk timestamp"""
+        time_format = '%Y-%m-%dT%H:%M:%S'
+        created_time = created_at[:-1]
+        created_stamp = calendar.timegm(time.strptime(created_time, time_format))
+        created_date = time.localtime(created_stamp)
+        formatted_time = time.strftime('%Y%m%d%H%M', created_date)
+        return formatted_time
+
+    def touch_file(self, filetime, filename):
+        """
+        Modify the created and modified timestamps of the given file
+
+        :param filetime: the time to use
+        :param filename: the file to touch
+        """
+        os.system('touch -t  {0} "{1}"'.format(filetime, filename))
+        os.system('touch -mt {0} "{1}"'.format(filetime, filename))
+
+    def maybe_create_dir_and_run_command(self, command, filename, dir_name=''):
+        """
+        Run the command on the given file.  If a directory is specified, run the command
+        inside the given directory.  If needed create the directory first.
+
+        :param command: command to run
+        :param filename: file to run it on
+        :param dir_name: directory to run it from
+        """
+        prefix = ""
+        if dir_name and not os.path.isdir(dir_name):
+            os.makedirs(dir_name)
+            prefix = 'cd "{0}";'.format(dir_name)
+        os.system(prefix + command + ' "{0}"'.format(filename))
+
+    def check_and_extract_files(self, download_directory, filename, local_filename, formatted_time):
+        """
+        Extract files to their proper directories if file type is known
+
+        :param download_directory: where to find the file file
+        :param filename: full path to file
+        :param local_filename: filename only
+        :param formatted_time: timestamp of the file
+        """
+        # Compare known extractable file extensions to see if there is a match
+        file_root, file_extension = self.filename_split(filename)
+        if not file_extension in DownloadFiles.extToCommand:
+            return
+
+        # Calculate and archive directory
+        command = DownloadFiles.extToCommand[file_extension]
+        archive_folder = os.path.join(download_directory, file_root) if file_extension not in DownloadFiles.extNoCd else ''
+
+        # Perform the actual extraction process
+        output = "Using '{0}' to extract '{1}'".format(command, filename)
+        if archive_folder:
+            output += " into '{0}'".format(archive_folder)
+        print output
+
+        self.maybe_create_dir_and_run_command(command, local_filename, archive_folder)
+
+    def download_files(self, base_download_directory, attachment_info):
+        """Download all the files provided in the attachments dictionary"""
+        # Ensure the download directory exists
+        download_directory = os.path.join(base_download_directory, attachment_info['organization_id'], attachment_info['ticket_id'])
+        if not os.path.exists(download_directory):
+            os.makedirs(download_directory)
+
+        # Download each attachment
+        for attachment in attachment_info['attachments']:
+            # Extract and properly format all data
+            file_id, created_at, filename, url = attachment
+            filename = self.fix_filename(file_id, filename)
+            local_filename = os.path.join(download_directory, filename)
+            formatted_time = self.get_formatted_time(created_at)
+
+            # Perform the actual download of each file
+            if not os.path.exists(local_filename):
+                print "Downloading {0} to {1} ...".format(filename, local_filename)
+                urllib.urlretrieve(url, local_filename)
+
+                # Check if file extraction is possible
+                self.check_and_extract_files(download_directory, filename, local_filename, formatted_time)
+                self.touch_file(formatted_time, local_filename)
+
+        return download_directory
\ No newline at end of file
diff --git a/dszendesk.py b/dszendesk.py
new file mode 100644
index 0000000..bf6fb0d
--- /dev/null
+++ b/dszendesk.py
@@ -0,0 +1,92 @@
+import os
+import getpass
+import ConfigParser
+import requests
+import re
+
+
+class ZenDesk(object):
+    """Class for interacting with ZenDesk."""
+
+    def __init__(self):
+        """Constructor for ZenDesk object"""
+        self.domain = None
+        self.admin_email = None
+        self.admin_password = None
+        self.download_directory = None
+        self.run_open = None
+        self.open_program = None
+
+    def authenticate(self):
+        configfile = os.path.join(os.path.expanduser('~'), '.zendesk.cfg')
+        config = ConfigParser.SafeConfigParser({'download_directory': './',
+                                                'run_open': 'False',
+                                                'open_program': 'open'})
+        if os.path.exists(configfile):
+            config.read(configfile)
+
+        self.domain = config.get('ZenDesk', 'domain') if config.has_option('ZenDesk', 'domain') else raw_input('Zendesk Domain: ')
+        self.admin_email = config.get('ZenDesk', 'email') if config.has_option('ZenDesk', 'email') else raw_input('Zendesk Email Address: ')
+        self.admin_password = config.get('ZenDesk', 'pass') if config.has_option('ZenDesk', 'pass') and config.get('ZenDesk', 'pass') else getpass.getpass()
+        self.download_directory = config.get('Downloader', 'download_directory')
+        self.run_open = config.getboolean('Downloader', 'run_open')
+        self.open_program = config.get('Downloader', 'open_program')
+
+        while True:
+            if 'error' in self.get_users_by_page():
+                print 'Authentication with "{0}" failed. Please try again...'.format(self.admin_email)
+                self.domain = raw_input('Zendesk Domain: ')
+                self.admin_email = raw_input('Zendesk Email Address: ')
+                self.admin_password = getpass.getpass()
+            else:
+                print 'Successfully authenticated to ZenDesk!'
+                break
+
+    def get_users_by_page(self, page_num=1):
+        """Test function for authentication purposes"""
+        r = requests.get('https://%s/api/v2/users.json' % self.domain,
+                         params={'page': page_num},
+                         auth=(self.admin_email, self.admin_password))
+        return r.json
+
+    def get_all_ticket_metadata(self, ticket_id):
+        """Get the ticket meta per id"""
+        r = requests.get('https://%s/api/v2/tickets/%s.json?include=organizations' % (self.domain, ticket_id),
+                         auth=(self.admin_email, self.admin_password))
+        return r.json
+
+    def get_ticket(self, ticket_id):
+        """Get the ticket postings per id"""
+        r = requests.get('https://%s/api/v2/tickets/%s/audits.json' % (self.domain, ticket_id),
+                         auth=(self.admin_email, self.admin_password))
+        to_return = r.json['audits']
+
+        while r.json['next_page']:
+            r = requests.get(r.json['next_page'], auth=(self.admin_email, self.admin_password))
+            to_return = to_return + r.json['audits']
+
+        return to_return
+
+    def extract_file_information(self, ticket_id):
+        """Extract curl'd json data and format into a dictionary with an array of attachments"""
+        ticket = self.get_ticket(ticket_id)
+        all_ticket_data = self.get_all_ticket_metadata(ticket_id)
+        ticket_data = all_ticket_data["ticket"]
+        organization_data = all_ticket_data['organizations'][0]
+        organization_id = ticket_data["organization_id"]
+        organization_name = organization_data.get("name", "Null")
+        # Clean up org names so they don't have funny characters
+        organization_name = re.sub(r"[^a-zA-Z_0-9]", "", organization_name)
+        attachment_list = []
+        for audit in ticket:
+            time_created = audit['created_at']
+            for event in audit['events']:
+                if 'attachments' in event:
+                    for attachment in event['attachments']:
+                        attachment_list.append([attachment['id'], time_created, attachment['file_name'], attachment['content_url']])
+
+        return {
+            'ticket_id': str(ticket_id),
+            'organization_id': str("{0}_{1}".format(organization_name,organization_id)),
+            'attachments': attachment_list
+        }
\ No newline at end of file