From e7852985b2807b0c634f3943418cdeb582f51bc3 Mon Sep 17 00:00:00 2001 From: James Gartrell Date: Thu, 21 Mar 2024 07:58:44 -0700 Subject: [PATCH 1/6] Apply 2to3 recommended changes --- _tests/test_pput.py | 16 ++++++++-------- _tests/test_snap.py | 14 +++++++------- _tests/test_ssh_sync.py | 8 ++++---- z3/config.py | 4 ++-- z3/pput.py | 12 ++++++------ z3/s3_mp_cleanup.py | 6 +++--- z3/snap.py | 10 ++++------ z3/ssh_sync.py | 2 -- 8 files changed, 34 insertions(+), 38 deletions(-) diff --git a/_tests/test_pput.py b/_tests/test_pput.py index 27af122..dfa9df4 100644 --- a/_tests/test_pput.py +++ b/_tests/test_pput.py @@ -1,6 +1,6 @@ -from cStringIO import StringIO +from io import StringIO from datetime import datetime -from Queue import Queue +from queue import Queue from uuid import uuid4 import hashlib @@ -40,18 +40,18 @@ def sample_data(): global _cached_sample_data if _cached_sample_data is None: data = StringIO() - chars = "".join(chr(i) for i in xrange(256)) - for count in xrange(6): + chars = "".join(chr(i) for i in range(256)) + for count in range(6): cc = chr(count) - for _ in xrange(2 * 1024): + for _ in range(2 * 1024): # each iteration adds 1MB # each 1MB chunk is made up of an alternation of the block's index (zero based) # and an incrementing counter (overflows to 0 several times) # the first block will be: 00 00 00 01 00 02 ... 00 ff 00 00 ... 00 ff data.write( - "".join(cc+chars[i] for i in xrange(256)) + "".join(cc+chars[i] for i in range(256)) ) - print "wrote {} MB" .format(data.tell() / 1024.0 / 1024.0) + print("wrote {} MB" .format(data.tell() / 1024.0 / 1024.0)) # give the test a read-only file to avoid accidentally modifying the data between tests _cached_sample_data = ReadOnlyFile(data) _cached_sample_data.seek(0) @@ -174,7 +174,7 @@ def call(self): def test_retry_decorator(): boom = Boom() with pytest.raises(BoomException) as excp_info: - for _ in xrange(3): + for _ in range(3): boom.call() assert boom.count == 3 diff --git a/_tests/test_snap.py b/_tests/test_snap.py index 106163c..dc10e29 100644 --- a/_tests/test_snap.py +++ b/_tests/test_snap.py @@ -1,6 +1,6 @@ # pylint: disable=redefined-outer-name,protected-access from collections import OrderedDict -from cStringIO import StringIO +from io import StringIO import contextlib import string import sys @@ -30,7 +30,7 @@ def __init__(self, name, metadata=None): class FakeBucket(object): - rand_prefix = 'test-' + ''.join([random.choice(string.ascii_letters) for _ in xrange(8)]) + '/' + rand_prefix = 'test-' + ''.join([random.choice(string.ascii_letters) for _ in range(8)]) + '/' fake_data = { "pool/fs@snap_0": {'parent': 'pool/fs@snap_expired'}, "pool/fs@snap_1_f": {'isfull': 'true', 'compressor': 'pigz1'}, @@ -44,7 +44,7 @@ class FakeBucket(object): def list(self, *a, **kwa): # boto bucket.list gives you keys without metadata, let's emulate that - return (FakeKey(os.path.join(self.rand_prefix, name)) for name in self.fake_data.iterkeys()) + return (FakeKey(os.path.join(self.rand_prefix, name)) for name in self.fake_data.keys()) def get_key(self, key): name = key[len(self.rand_prefix):] @@ -60,9 +60,9 @@ def write_s3_data(): cfg = get_config() bucket = boto.connect_s3( cfg['S3_KEY_ID'], cfg['S3_SECRET']).get_bucket(cfg['BUCKET']) - for name, metadata in FakeBucket.fake_data.iteritems(): + for name, metadata in FakeBucket.fake_data.items(): key = bucket.new_key(os.path.join(FakeBucket.rand_prefix, name)) - headers = {("x-amz-meta-" + k): v for k, v in metadata.iteritems()} + headers = {("x-amz-meta-" + k): v for k, v in metadata.items()} key.set_contents_from_string("spam", headers=headers) return bucket @@ -204,8 +204,8 @@ def test_list_local_snapshots(): } snapshots = zfs._parse_snapshots() # comparing .items() because we care about the sorting in the OrderedDict's - assert snapshots['pool'].items() == expected['pool'].items() - assert snapshots['pool/fs'].items() == expected['pool/fs'].items() + assert list(snapshots['pool'].items()) == list(expected['pool'].items()) + assert list(snapshots['pool/fs'].items()) == list(expected['pool/fs'].items()) @pytest.mark.parametrize("fs_name, expected", [ diff --git a/_tests/test_ssh_sync.py b/_tests/test_ssh_sync.py index 58d73d7..21be758 100644 --- a/_tests/test_ssh_sync.py +++ b/_tests/test_ssh_sync.py @@ -22,7 +22,7 @@ ) -@pytest.mark.parametrize("pair, expected", HAPPY_PATH.values(), ids=HAPPY_PATH.keys()) +@pytest.mark.parametrize("pair, expected", list(HAPPY_PATH.values()), ids=list(HAPPY_PATH.keys())) def test_snapshots_to_send(pair, expected): local, remote = pair assert snapshots_to_send(local, remote) == expected @@ -36,7 +36,7 @@ def test_snapshots_to_send(pair, expected): ) -@pytest.mark.parametrize('pair, err_msg', ERRORS.values(), ids=ERRORS.keys()) +@pytest.mark.parametrize('pair, err_msg', list(ERRORS.values()), ids=list(ERRORS.keys())) def test_snapshots_to_send_error(pair, err_msg): local, remote = pair with pytest.raises(AssertionError) as err: @@ -59,7 +59,7 @@ def test_snapshots_to_send_error(pair, err_msg): ) -@pytest.mark.parametrize('pair, expected', PULL_HAPPY_PATH.values(), ids=PULL_HAPPY_PATH.keys()) +@pytest.mark.parametrize('pair, expected', list(PULL_HAPPY_PATH.values()), ids=list(PULL_HAPPY_PATH.keys())) def test_pull_command(pair, expected): commands = sync_snapshots( pair, @@ -87,7 +87,7 @@ def test_pull_command(pair, expected): ) -@pytest.mark.parametrize('pair, expected', PUSH_HAPPY_PATH.values(), ids=PUSH_HAPPY_PATH.keys()) +@pytest.mark.parametrize('pair, expected', list(PUSH_HAPPY_PATH.values()), ids=list(PUSH_HAPPY_PATH.keys())) def test_push_command(pair, expected): commands = sync_snapshots( pair, diff --git a/z3/config.py b/z3/config.py index 3cbebdd..1085f46 100644 --- a/z3/config.py +++ b/z3/config.py @@ -1,4 +1,4 @@ -import ConfigParser +import configparser import os import os.path @@ -50,7 +50,7 @@ def get(self, key, default=None, section=None): def get_config(): global _settings if _settings is None: - _config = ConfigParser.ConfigParser() + _config = configparser.ConfigParser() default = os.path.join(z3.__path__[0], "z3.conf") _config.read(default) _config.read("/etc/z3_backup/z3.conf") diff --git a/z3/pput.py b/z3/pput.py index 002251b..e1dd74b 100644 --- a/z3/pput.py +++ b/z3/pput.py @@ -4,8 +4,8 @@ pput bucket_name/filename """ -from Queue import Queue -from cStringIO import StringIO +from queue import Queue +from io import StringIO from collections import namedtuple from threading import Thread import argparse @@ -47,7 +47,7 @@ def multipart_etag(digests): def parse_size(size): - if isinstance(size, (int, long)): + if isinstance(size, int): return size size = size.strip().upper() last = size[-1] @@ -92,7 +92,7 @@ def retry(times=int(CFG['MAX_RETRIES'])): def decorator(func): @functools.wraps(func) def wrapped(*a, **kwa): - for attempt in xrange(1, times+1): + for attempt in range(1, times+1): try: return func(*a, **kwa) except: # pylint: disable=bare-except @@ -179,7 +179,7 @@ def _start_workers(self, concurrency, worker_class): inbox=work_queue, outbox=result_queue, ).start() - for _ in xrange(concurrency)] + for _ in range(concurrency)] return workers def _begin_upload(self): @@ -359,7 +359,7 @@ def main(): sys.stderr.write("{}\n".format(excp)) return 1 if verbosity >= VERB_NORMAL: - print json.dumps({'status': 'success', 'etag': etag}) + print(json.dumps({'status': 'success', 'etag': etag})) if __name__ == '__main__': diff --git a/z3/s3_mp_cleanup.py b/z3/s3_mp_cleanup.py index 54dbd9a..97b06f1 100644 --- a/z3/s3_mp_cleanup.py +++ b/z3/s3_mp_cleanup.py @@ -10,15 +10,15 @@ def cleanup_multipart(bucket, max_days=1, dry_run=False): max_age_seconds = max_days * 24 * 3600 now = datetime.utcnow() fmt = "{} | {:30} | {:20}" - print fmt.format("A", "key", "initiated") + print(fmt.format("A", "key", "initiated")) for multi in bucket.list_multipart_uploads(): delta = now-boto.utils.parse_ts(multi.initiated) if delta.total_seconds() >= max_age_seconds: - print fmt.format("X", multi.key_name, multi.initiated) + print(fmt.format("X", multi.key_name, multi.initiated)) if not dry_run: multi.cancel_upload() else: - print fmt.format(" ", multi.key_name, multi.initiated) + print(fmt.format(" ", multi.key_name, multi.initiated)) def main(): diff --git a/z3/snap.py b/z3/snap.py index 48bd1d2..859dd27 100644 --- a/z3/snap.py +++ b/z3/snap.py @@ -1,5 +1,3 @@ -from __future__ import print_function - import argparse import functools import logging @@ -145,7 +143,7 @@ def _snapshots(self): return snapshots def list(self): - return sorted(self._snapshots.values(), key=operator.attrgetter('name')) + return sorted(list(self._snapshots.values()), key=operator.attrgetter('name')) def get(self, name): return self._snapshots.get(name) @@ -206,7 +204,7 @@ def _build_snapshots(self, fs_name): # for fs_name, fs_snaps in self._parse_snapshots().iteritems(): fs_snaps = self._parse_snapshots().get(fs_name, {}) parent = None - for snap_name, data in fs_snaps.iteritems(): + for snap_name, data in fs_snaps.items(): if not snap_name.startswith(self._snapshot_prefix): continue full_name = '{}@{}'.format(fs_name, snap_name) @@ -226,7 +224,7 @@ def _snapshots(self): return self._build_snapshots(self._fs_name) def list(self): - return self._snapshots.values() + return list(self._snapshots.values()) def get_latest(self): if len(self._snapshots) == 0: @@ -235,7 +233,7 @@ def get_latest(self): 'Nothing to backup for filesystem "{}". Are you sure ' 'SNAPSHOT_PREFIX="{}" is correct?'.format( cfg.get('FILESYSTEM'), cfg.get('SNAPSHOT_PREFIX'))) - return self._snapshots.values()[-1] + return list(self._snapshots.values())[-1] def get(self, name): return self._snapshots.get(name) diff --git a/z3/ssh_sync.py b/z3/ssh_sync.py index 828997c..45f1cd9 100644 --- a/z3/ssh_sync.py +++ b/z3/ssh_sync.py @@ -1,5 +1,3 @@ -from __future__ import print_function - import argparse import subprocess import sys From 9d6c40813a4e6bfa981399944e1d4e9582930dd7 Mon Sep 17 00:00:00 2001 From: James Gartrell Date: Thu, 21 Mar 2024 08:02:50 -0700 Subject: [PATCH 2/6] Manage zfs data stream as raw bytes in Python3 --- z3/get.py | 2 +- z3/pput.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/z3/get.py b/z3/get.py index 7b8a5b4..c46303d 100644 --- a/z3/get.py +++ b/z3/get.py @@ -26,7 +26,7 @@ def main(): else: s3 = boto3.client('s3', **extra_config) try: - s3.download_fileobj(cfg['BUCKET'], args.name, sys.stdout, Config=config) + s3.download_fileobj(cfg['BUCKET'], args.name, sys.stdout.buffer, Config=config) except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == "404": print("The object does not exist.") diff --git a/z3/pput.py b/z3/pput.py index e1dd74b..73e51a8 100644 --- a/z3/pput.py +++ b/z3/pput.py @@ -5,7 +5,7 @@ """ from queue import Queue -from io import StringIO +from io import BytesIO from collections import namedtuple from threading import Thread import argparse @@ -66,7 +66,7 @@ class StreamHandler(object): def __init__(self, input_stream, chunk_size=5*1024*1024): self.input_stream = input_stream self.chunk_size = chunk_size - self._partial_chunk = "" + self._partial_chunk = b"" self._eof_reached = False @property @@ -82,7 +82,7 @@ def get_chunk(self): self._partial_chunk += read if len(self._partial_chunk) == self.chunk_size or self._eof_reached: chunk = self._partial_chunk - self._partial_chunk = "" + self._partial_chunk = b"" return chunk # else: # print "partial", len(self._partial_chunk) @@ -123,7 +123,7 @@ def upload_part(self, index, chunk): part.id = self.multipart.id part.key_name = self.multipart.key_name return part.upload_part_from_file( - StringIO(chunk), index, replace=True).md5 + BytesIO(chunk), index, replace=True).md5 def start(self): self._thread = Thread(target=self.main_loop) @@ -321,7 +321,7 @@ def parse_args(): def main(): args = parse_args() - input_fd = os.fdopen(args.file_descriptor, 'r') if args.file_descriptor else sys.stdin + input_fd = os.fdopen(args.file_descriptor, 'rb') if args.file_descriptor else sys.stdin.buffer if args.estimated is not None: chunk_size = optimize_chunksize(parse_size(args.estimated)) else: From 048a2c93103a554ebbcbce55480f40a948fe8c05 Mon Sep 17 00:00:00 2001 From: James Gartrell Date: Thu, 21 Mar 2024 08:04:10 -0700 Subject: [PATCH 3/6] Manage zfs list command parsing explicitly as encoded text in Python3 --- z3/snap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/z3/snap.py b/z3/snap.py index 859dd27..1b7166d 100644 --- a/z3/snap.py +++ b/z3/snap.py @@ -169,7 +169,7 @@ def _list_snapshots(self): # see FakeZFSManager return subprocess.check_output( ['zfs', 'list', '-Ht', 'snap', '-o', - 'name,used,refer,mountpoint,written']) + 'name,used,refer,mountpoint,written'], universal_newlines=True) def _parse_snapshots(self): """Returns all snapshots grouped by filesystem, a dict of OrderedDict's From 4d92d255915f7b955bcb9162f3f856ab637d9732 Mon Sep 17 00:00:00 2001 From: James Gartrell Date: Thu, 21 Mar 2024 09:48:14 -0700 Subject: [PATCH 4/6] Manage zfs data stream as raw bytes in Python3 for tests --- _tests/test_pput.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/_tests/test_pput.py b/_tests/test_pput.py index dfa9df4..65b1ce2 100644 --- a/_tests/test_pput.py +++ b/_tests/test_pput.py @@ -1,4 +1,4 @@ -from io import StringIO +from io import BytesIO from datetime import datetime from queue import Queue from uuid import uuid4 @@ -39,7 +39,7 @@ def sample_data(): """ global _cached_sample_data if _cached_sample_data is None: - data = StringIO() + data = BytesIO() chars = "".join(chr(i) for i in range(256)) for count in range(6): cc = chr(count) @@ -49,7 +49,7 @@ def sample_data(): # and an incrementing counter (overflows to 0 several times) # the first block will be: 00 00 00 01 00 02 ... 00 ff 00 00 ... 00 ff data.write( - "".join(cc+chars[i] for i in range(256)) + "".join(cc+chars[i] for i in range(256)).encode("latin1") ) print("wrote {} MB" .format(data.tell() / 1024.0 / 1024.0)) # give the test a read-only file to avoid accidentally modifying the data between tests @@ -71,12 +71,12 @@ def test_multipart_etag(sample_data): def test_stream_handler(): - stream_handler = StreamHandler(StringIO("aabbccdde"), chunk_size=2) + stream_handler = StreamHandler(BytesIO(b"aabbccdde"), chunk_size=2) chunks = [] while not stream_handler.finished: chunk = stream_handler.get_chunk() chunks.append(chunk) - assert chunks == ['aa', 'bb', 'cc', 'dd', 'e'] + assert chunks == [b'aa', b'bb', b'cc', b'dd', b'e'] def test_handle_results(): @@ -134,7 +134,7 @@ def test_supervisor_loop(sample_data): def test_zero_data(sample_data): - stream_handler = StreamHandler(StringIO()) + stream_handler = StreamHandler(BytesIO()) bucket = FakeBucket() sup = UploadSupervisor(stream_handler, 'test', bucket=bucket) with pytest.raises(UploadException): From f0bbd28c8558e10b56793b74dab1a875d9b63fc8 Mon Sep 17 00:00:00 2001 From: James Gartrell Date: Thu, 21 Mar 2024 10:15:42 -0700 Subject: [PATCH 5/6] Remove message attribute from Exception class, see PEP 352 --- _tests/test_snap.py | 8 ++++---- _tests/test_ssh_sync.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/_tests/test_snap.py b/_tests/test_snap.py index dc10e29..632579a 100644 --- a/_tests/test_snap.py +++ b/_tests/test_snap.py @@ -324,7 +324,7 @@ def test_backup_incremental_missing_parent(s3_manager): pair_manager = PairManager(s3_manager, zfs_manager, command_executor=fake_cmd) with pytest.raises(IntegrityError) as excp_info: pair_manager.backup_incremental() - assert excp_info.value.message == \ + assert str(excp_info.value) == \ "Broken snapshot detected pool/fs@snap_5, reason: 'parent broken'" assert fake_cmd._called_commands == [] @@ -345,7 +345,7 @@ def test_backup_incremental_cycle(s3_manager): pair_manager = PairManager(s3_manager, zfs_manager, command_executor=fake_cmd) with pytest.raises(IntegrityError) as excp_info: pair_manager.backup_incremental() - assert excp_info.value.message == \ + assert str(excp_info.value) == \ "Broken snapshot detected pool/fs@snap_7_cycle, reason: 'cycle detected'" assert fake_cmd._called_commands == [] @@ -456,7 +456,7 @@ def test_restore_broken(s3_manager): pair_manager = PairManager(s3_manager, zfs_manager, command_executor=fake_cmd) with pytest.raises(IntegrityError) as excp_info: pair_manager.restore('pool/fs@snap_4_mp') - assert excp_info.value.message == \ + assert str(excp_info.value) == \ "Broken snapshot detected pool/fs@snap_4_mp, reason: 'missing parent'" @@ -501,7 +501,7 @@ def test_get_latest(): fake_cmd = FakeCommandExecutor() with pytest.raises(SoftError) as excp_info: zfs_manager.get_latest() - assert excp_info.value.message == \ + assert str(excp_info.value) == \ 'Nothing to backup for filesystem "None". Are you sure ' \ 'SNAPSHOT_PREFIX="zfs-auto-snap:daily" is correct?' assert fake_cmd._called_commands == [] diff --git a/_tests/test_ssh_sync.py b/_tests/test_ssh_sync.py index 21be758..1f379f4 100644 --- a/_tests/test_ssh_sync.py +++ b/_tests/test_ssh_sync.py @@ -41,7 +41,7 @@ def test_snapshots_to_send_error(pair, err_msg): local, remote = pair with pytest.raises(AssertionError) as err: snapshots_to_send(local, remote) - assert err_msg == err.value.message + assert err_msg == str(err.value) PULL_HAPPY_PATH = dict( From 3d44468537aa0710e79e05db51b1d6c3749b3e44 Mon Sep 17 00:00:00 2001 From: James Gartrell Date: Thu, 21 Mar 2024 10:26:40 -0700 Subject: [PATCH 6/6] Ignore unhandled warning during work crash test --- _tests/test_pput.py | 1 + 1 file changed, 1 insertion(+) diff --git a/_tests/test_pput.py b/_tests/test_pput.py index 65b1ce2..c40d49c 100644 --- a/_tests/test_pput.py +++ b/_tests/test_pput.py @@ -149,6 +149,7 @@ def upload_part(self, index, chunk): return hashlib.md5(chunk).hexdigest() +@pytest.mark.filterwarnings("ignore:Exception in thread") def test_supervisor_loop_with_worker_crash(sample_data): stream_handler = StreamHandler(sample_data) bucket = FakeBucket()