From 5c9d701beecd2443b76d3bface8fe2939a2592aa Mon Sep 17 00:00:00 2001 From: longze chen Date: Thu, 29 Mar 2018 15:55:28 -0400 Subject: [PATCH 1/2] Further improve memory usage for CSV rendering - Dump sheets to a json var, delete the sheets before rendering. - Delete reader when exception happens - Update error message for TableTooBigError - Update comment and log for garbage collection --- mfr/extensions/tabular/libs/stdlib_tools.py | 6 +++--- mfr/extensions/tabular/render.py | 16 +++++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/mfr/extensions/tabular/libs/stdlib_tools.py b/mfr/extensions/tabular/libs/stdlib_tools.py index 3d3611984..5af87b261 100644 --- a/mfr/extensions/tabular/libs/stdlib_tools.py +++ b/mfr/extensions/tabular/libs/stdlib_tools.py @@ -19,8 +19,8 @@ def csv_stdlib(fp): dialect = csv.excel else: _set_dialect_quote_attrs(dialect, data) - del data + reader = csv.DictReader(fp, dialect=dialect) columns = [] # update the reader field names to avoid duplicate column names when performing row extraction @@ -41,6 +41,7 @@ def csv_stdlib(fp): try: rows = [row for row in reader] except csv.Error as e: + del reader if any("field larger than field limit" in errorMsg for errorMsg in e.args): raise TabularRendererError( 'This file contains a field too large to render. ' @@ -51,10 +52,9 @@ def csv_stdlib(fp): else: raise TabularRendererError('csv.Error: {}'.format(e), extension='csv') from e + del reader if not columns and not rows: raise EmptyTableError('Table empty or corrupt.', extension='csv') - - del reader return {'Sheet 1': (columns, rows)} diff --git a/mfr/extensions/tabular/render.py b/mfr/extensions/tabular/render.py index 2932aa584..0ccddd234 100644 --- a/mfr/extensions/tabular/render.py +++ b/mfr/extensions/tabular/render.py @@ -33,21 +33,27 @@ def render(self): with open(self.file_path, errors='replace') as fp: sheets, size, nbr_rows, nbr_cols = self._render_grid(fp, self.metadata.ext) - # Force GC - gc.collect() - if sheets and size: + json_sheets = json.dumps(sheets) + del sheets + # Forcing garbage collection accelerates the memory free/release process with a small + # but acceptable cost in performance + nbr_objs = gc.collect() + logger.debug('Number of unreachable objects collected: {}'.format(nbr_objs)) return self.TEMPLATE.render( base=self.assets_url, width=settings.TABLE_WIDTH, height=settings.TABLE_HEIGHT, - sheets=json.dumps(sheets), + sheets=json_sheets, options=json.dumps(size), ) + nbr_objs = gc.collect() + logger.debug('Number of unreachable objects collected: {}'.format(nbr_objs)) assert nbr_rows and nbr_cols raise exceptions.TableTooBigError( - 'Table is too large to render.', + 'Table with more than {} rows or columns are not rendered. Please download the file to ' + 'view.'.format(settings.MAX_SIZE), extension=self.metadata.ext, nbr_cols=nbr_cols, nbr_rows=nbr_rows From ca1199d473e72f58bf6160fe3de5ce9dc3f207d3 Mon Sep 17 00:00:00 2001 From: longze chen Date: Thu, 29 Mar 2018 16:04:55 -0400 Subject: [PATCH 2/2] Ignore false-positive flake8 undefined for del --- mfr/extensions/tabular/libs/stdlib_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mfr/extensions/tabular/libs/stdlib_tools.py b/mfr/extensions/tabular/libs/stdlib_tools.py index 5af87b261..fa0e4545f 100644 --- a/mfr/extensions/tabular/libs/stdlib_tools.py +++ b/mfr/extensions/tabular/libs/stdlib_tools.py @@ -52,7 +52,7 @@ def csv_stdlib(fp): else: raise TabularRendererError('csv.Error: {}'.format(e), extension='csv') from e - del reader + del reader # noqa if not columns and not rows: raise EmptyTableError('Table empty or corrupt.', extension='csv') return {'Sheet 1': (columns, rows)}