@@ -66,18 +66,28 @@ class BaseTest(unittest.TestCase):
6666 EMPTY_DATA = b'BZh9\x17 rE8P\x90 \x00 \x00 \x00 \x00 '
6767 BAD_DATA = b'this is not a valid bzip2 file'
6868
69- # Some tests need more than one block of uncompressed data. Since one block
70- # is at least 100,000 bytes, we gather some data dynamically and compress it.
71- # Note that this assumes that compression works correctly, so we cannot
72- # simply use the bigger test data for all tests.
69+ # Some tests need more than one block of data. The bz2 module does not
70+ # support flushing a block during compression, so we must read in data until
71+ # there are at least 2 blocks. Since different orderings of Python files may
72+ # be compressed differently, we need to check the compression output for
73+ # more than one bzip2 block header magic, a hex encoding of Pi
74+ # (0x314159265359)
75+ bz2_block_magic = bytes .fromhex ('314159265359' )
7376 test_size = 0
74- BIG_TEXT = bytearray (128 * 1024 )
77+ BIG_TEXT = b''
78+ BIG_DATA = b''
79+ compressor = BZ2Compressor (1 )
7580 for fname in glob .glob (os .path .join (glob .escape (os .path .dirname (__file__ )), '*.py' )):
7681 with open (fname , 'rb' ) as fh :
77- test_size += fh .readinto (memoryview (BIG_TEXT )[test_size :])
78- if test_size > 128 * 1024 :
82+ data = fh .read ()
83+ BIG_DATA += compressor .compress (data )
84+ BIG_TEXT += data
85+ # TODO(emmatyping): if it is impossible for a block header to cross
86+ # multiple outputs, we can just search the output of each compress call
87+ # which should be more efficient
88+ if BIG_DATA .count (bz2_block_magic ) > 1 :
89+ BIG_DATA += compressor .flush ()
7990 break
80- BIG_DATA = bz2 .compress (BIG_TEXT , compresslevel = 1 )
8191
8292 def setUp (self ):
8393 fd , self .filename = tempfile .mkstemp ()
0 commit comments