Skip to content

[file_packager] split data files when file size exceeds ArrayBuffer limit #24802

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 51 additions & 1 deletion test/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,12 @@ def parse_wasm(self, filename):
funcs.append(name)
return imports, exports, funcs

def create_huge_file(self, name, length):
f = open(name,"wb")
f.seek(length - 1)
f.write(b"\0")
f.close()

# Test that running `emcc -v` always works even in the presence of `EMCC_CFLAGS`.
# This needs to work because many tools run `emcc -v` internally and it should
# always work even if the user has `EMCC_CFLAGS` set.
Expand Down Expand Up @@ -7459,14 +7465,58 @@ def test_underscore_exit(self):

def test_file_packager_huge(self):
MESSAGE = 'warning: file packager is creating an asset bundle of 257 MB. this is very large, and browsers might have trouble loading it'
create_file('huge.dat', 'a' * (1024 * 1024 * 257))
self.create_huge_file('huge.dat', 1024 * 1024 * 257)
create_file('tiny.dat', 'a')
err = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'tiny.dat'], stdout=PIPE, stderr=PIPE).stderr
self.assertNotContained(MESSAGE, err)
err = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'huge.dat'], stdout=PIPE, stderr=PIPE).stderr
self.assertContained(MESSAGE, err)
self.clear()

def test_file_packager_huge_no_split(self):
self.create_huge_file('huge.dat', 1024 * 1024 * 1024)
self.create_huge_file('huge2.dat', 1022 * 1024 * 1024)
err = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'huge.dat', '--preload', 'huge2.dat'], stdout=PIPE, stderr=PIPE).stderr
self.assertContained('warning: file packager is creating an asset bundle of 2046 MB. this is very large, and browsers might have trouble loading it', err)
self.assertExists('test.data')
self.assertEqual(os.path.getsize('test.data'), (1024 * 1024 * 1024) + (1022 * 1024 * 1024))
self.clear()

def test_file_packager_huge_split(self):
self.create_huge_file('huge.dat', 1024 * 1024 * 1024)
self.create_huge_file('huge2.dat', (1022 * 1024 * 1024) + 1)
err = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'huge.dat', '--preload', 'huge2.dat'], stdout=PIPE, stderr=PIPE).stderr
self.assertContained('warning: file packager is creating an asset bundle of 1024 MB. this is very large, and browsers might have trouble loading it', err)
self.assertContained('warning: file packager is splitting bundle into 2 chunks', err)
self.assertExists('test.data')
self.assertExists('test_1.data')
self.assertEqual(os.path.getsize('test.data'), 1024 * 1024 * 1024)
self.assertEqual(os.path.getsize('test_1.data'), (1022 * 1024 * 1024) + 1)
self.clear()

def test_file_packager_huge_split_metadata(self):
self.create_huge_file('huge.dat', 1024 * 1024 * 1024)
self.create_huge_file('huge2.dat', (1022 * 1024 * 1024) + 1)
err = self.run_process([FILE_PACKAGER, 'test.data', '--separate-metadata', '--js-output=immutable.js', '--preload', 'huge.dat', '--preload', 'huge2.dat'], stdout=PIPE, stderr=PIPE).stderr
self.assertContained('warning: file packager is creating an asset bundle of 1024 MB. this is very large, and browsers might have trouble loading it', err)
self.assertContained('warning: file packager is splitting bundle into 2 chunks', err)
self.assertExists('test.data')
self.assertExists('immutable.js')
self.assertExists('immutable.js.metadata')
self.assertExists('test_1.data')
self.assertExists('immutable_1.js')
self.assertExists('immutable_1.js.metadata')
self.assertEqual(os.path.getsize('test.data'), 1024 * 1024 * 1024)
self.assertEqual(os.path.getsize('test_1.data'), (1022 * 1024 * 1024) + 1)
self.clear()

def test_file_packager_huge_split_too_large(self):
self.create_huge_file('huge.dat', (1024 * 1024 * 1024) + ((1022 * 1024 * 1024) + 1))
proc = self.run_process([FILE_PACKAGER, 'test.data', '--preload', 'huge.dat'], check=False, stdout=PIPE, stderr=PIPE)
self.assertEqual(proc.returncode, 1)
self.assertContained('error: cannot package file greater than 2046 MB does not exist', proc.stderr)
self.clear()

@parameterized({
'': (True,),
'wasm2js': (False,),
Expand Down
77 changes: 52 additions & 25 deletions tools/file_packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@

DEBUG = os.environ.get('EMCC_DEBUG')

# chrome limit is 2MB under 2Gi
PRELOAD_DATA_FILE_LIMIT = 2046 * 1024 * 1024

excluded_patterns: List[str] = []
new_data_files = []
walked = []
Expand Down Expand Up @@ -543,36 +546,59 @@ def was_seen(name):
data_files = sorted(data_files, key=lambda file_: file_.dstpath)
data_files = [file_ for file_ in data_files if not was_seen(file_.dstpath)]

metadata = {'files': []}

if options.obj_output:
if not options.has_embedded:
diagnostics.error('--obj-output is only applicable when embedding files')
generate_object_file(data_files)
if not options.has_preloaded:
return 0

ret = generate_js(data_target, data_files, metadata)

if options.force or len(data_files):
if options.jsoutput is None:
print(ret)
else:
# Overwrite the old jsoutput file (if exists) only when its content
# differs from the current generated one, otherwise leave the file
# untouched preserving its old timestamp
if os.path.isfile(options.jsoutput):
old = utils.read_file(options.jsoutput)
if old != ret:
utils.write_file(options.jsoutput, ret)
file_chunks = [data_files]
if options.has_preloaded and not options.has_embedded:
file_chunks = [[]]
current_size = 0
for file_ in data_files:
fsize = os.path.getsize(file_.srcpath)
if current_size + fsize <= PRELOAD_DATA_FILE_LIMIT:
file_chunks[-1].append(file_)
current_size += fsize
elif fsize > PRELOAD_DATA_FILE_LIMIT:
diagnostics.error('error: cannot package file greater than %d MB does not exist' % (PRELOAD_DATA_FILE_LIMIT / (1024 * 1024)))
return 1
else:
current_size = fsize
file_chunks.append([file_])

if len(file_chunks) > 1:
diagnostics.warn('warning: file packager is splitting bundle into %d chunks' % len(file_chunks))

targets = []
for counter, data_files in enumerate(file_chunks):
metadata = {'files': []}
base, ext = data_target.rsplit('.', 1)
targets.append(f"{base}{f'_{counter}' if counter else ''}.{ext}")
ret = generate_js(targets[-1], data_files, metadata)
if options.force or len(data_files):
if options.jsoutput is None:
print(ret)
else:
utils.write_file(options.jsoutput, ret)
if options.separate_metadata:
utils.write_file(options.jsoutput + '.metadata', json.dumps(metadata, separators=(',', ':')))
# Overwrite the old jsoutput file (if exists) only when its content
# differs from the current generated one, otherwise leave the file
# untouched preserving its old timestamp
base, ext = options.jsoutput.rsplit('.', 1)
targets.append(f"{base}{f'_{counter}' if counter else ''}.{ext}")
if os.path.isfile(targets[-1]):
old = utils.read_file(targets[-1])
if old != ret:
utils.write_file(targets[-1], ret)
else:
utils.write_file(targets[-1], ret)
if options.separate_metadata:
utils.write_file(targets[-1] + '.metadata', json.dumps(metadata, separators=(',', ':')))

if options.depfile:
with open(options.depfile, 'w') as f:
for target in (data_target, options.jsoutput):
for target in targets:
if target:
f.write(escape_for_makefile(target))
f.write(' \\\n')
Expand Down Expand Up @@ -656,12 +682,13 @@ def generate_js(data_target, data_files, metadata):
# XHRs which has overhead.
start = 0
with open(data_target, 'wb') as data:
for file_ in data_files:
file_.data_start = start
curr = utils.read_binary(file_.srcpath)
file_.data_end = start + len(curr)
start += len(curr)
data.write(curr)
if file_.mode == 'preload':
for file_ in data_files:
file_.data_start = start
curr = utils.read_binary(file_.srcpath)
file_.data_end = start + len(curr)
start += len(curr)
data.write(curr)

if start > 256 * 1024 * 1024:
diagnostics.warn('file packager is creating an asset bundle of %d MB. '
Expand Down