From ab46578975ce08d42f66b9be3cb3e4fb0deac0e0 Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Wed, 22 Oct 2025 14:39:33 +0100 Subject: [PATCH] api: Add (rudimentary) autopickling support --- devito/__init__.py | 12 ++++ devito/operator/operator.py | 108 +++++++++++++++++++++++++++++++++--- devito/parameters.py | 1 + tests/test_pickle.py | 28 +++++++++- 4 files changed, 141 insertions(+), 8 deletions(-) diff --git a/devito/__init__.py b/devito/__init__.py index dc6110c65a..3aaae3d2e4 100644 --- a/devito/__init__.py +++ b/devito/__init__.py @@ -107,6 +107,18 @@ def reinit_compiler(val): # and will instead use the custom kernel configuration.add('jit-backdoor', 0, [0, 1], preprocessor=bool, impacts_jit=False) +# Enable/disable automatic pickling of named Operators. When enabled, named +# Operators (that is, Operators instantiated with a name kwarg) are automatically +# pickled to disk upon creation, and loaded from disk upon subsequent creations, +# thus bypassing code generation and compilation. This is to be used with caution, +# since it assumes that things such as Operator creation arguments (e.g., the +# equations themselves), the Devito configuration, the compiler/runtime, etc, +# have not changed between runs. Further, data carried by any of the input +# objects is pickled as well, which may lead to large files on disk. Currently, no +# safeguards are in place to deal with any of these cases, so... you have been +# warned! Use at your own risk +configuration.add('autopickling', 0, [0, 1], preprocessor=bool, impacts_jit=False) + # By default unsafe math is allowed as most applications are insensitive to # floating-point roundoff errors. Enabling this disables unsafe math # optimisations. diff --git a/devito/operator/operator.py b/devito/operator/operator.py index 0d473fe6a2..5de046a08e 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -1,14 +1,18 @@ from collections import OrderedDict, namedtuple from functools import cached_property -import ctypes -import shutil from operator import attrgetter from math import ceil from tempfile import gettempdir +from time import time +import ctypes +import glob +import os +import shutil from sympy import sympify import sympy import numpy as np +import cloudpickle as pickle from devito.arch import (ANYCPU, Device, compiler_registry, platform_registry, get_visible_devices) @@ -32,10 +36,11 @@ minimize_symbols, unevaluate, error_mapper, is_on_device, lower_dtypes ) from devito.symbolics import estimate_cost, subs_op_args -from devito.tools import (DAG, OrderedSet, Signer, ReducerMap, as_mapper, as_tuple, - flatten, filter_sorted, frozendict, is_integer, - split, timed_pass, timed_region, contains_val, - CacheInstances, MemoryEstimate) +from devito.tools import ( + DAG, OrderedSet, Signer, ReducerMap, as_mapper, as_tuple, flatten, + filter_sorted, frozendict, is_integer, split, timed_pass, timed_region, + contains_val, CacheInstances, MemoryEstimate, make_tempdir +) from devito.types import (Buffer, Evaluable, host_layer, device_layer, disk_layer) from devito.types.dimension import Thickness @@ -157,6 +162,12 @@ def __new__(cls, expressions, **kwargs): # can't do anything useful with it return super().__new__(cls, **kwargs) + # Maybe lookup an existing Operator from disk + name = kwargs.get('name', default_op_name) + op = autopickler.maybe_load(name) + if op is not None: + return op + # Parse input arguments kwargs = parse_kwargs(**kwargs) @@ -176,6 +187,9 @@ def __new__(cls, expressions, **kwargs): # Emit info about how long it took to perform the lowering op._emit_build_profiling() + # Maybe save the Operator to disk + autopickler.maybe_save(op) + return op @classmethod @@ -479,7 +493,7 @@ def _lower_iet(cls, uiet, profiler=None, **kwargs): * Introduce optimizations for data locality; * Finalize (e.g., symbol definitions, array casts) """ - name = kwargs.get("name", "Kernel") + name = kwargs.get("name", default_op_name) # Wrap the IET with an EntryFunction (a special Callable representing # the entry point of the generated library) @@ -1216,6 +1230,10 @@ def __setstate__(self, state): f'{type(self._compiler).__name__}.{self._language}.{self._platform}' ) + # Tag this Operator as unpickled -- might come in handy at the call site + # for sanity checks + self._unpickled = True + # *** Recursive compilation ("rcompile") machinery @@ -1701,3 +1719,79 @@ def parse_kwargs(**kwargs): kwargs['subs'] = {k: sympify(v) for k, v in kwargs.get('subs', {}).items()} return kwargs + + +# The name assigned to an Operator when the user does not provide one +default_op_name = "Kernel" + + +class Autopickler: + + def __init__(self): + self._initialized = False + self.registry = {} + + @property + def _directory(self): + return make_tempdir('autopickling') + + def _initialize(self): + # Search the `autopickling` temporary directory for pickled Operators + # and maintain a registry of them. Each pickled Operator is uniquely + # identified by a name; this might not be enough to avoid name clashes, + # but for now it is what we have. Thus, a generic filename is + # `.pkl`. + pkl_files = glob.glob(os.path.join(self._directory, '*.pkl')) + + self.registry.update({ + os.path.basename(pkl_file)[:-4]: pkl_file for pkl_file in pkl_files + }) + + self._initialized = True + + def maybe_load(self, name): + if not configuration['autopickling']: + return None + + tic = time() + + if not self._initialized: + self._initialize() + + if name is None or name == default_op_name: + return None + + pkl_file = self.registry.get(name) + if pkl_file is None: + return None + + with open(pkl_file, 'rb') as f: + op = pickle.load(f) + + toc = time() + + perf(f"Operator `{name}` unpickled from disk in {toc - tic:.2f} s") + + return op + + def maybe_save(self, op): + if not configuration['autopickling']: + return + + if op.name == default_op_name: + return + + assert self._initialized is not None + + pkl_file = os.path.join(self._directory, f"{op.name}.pkl") + with open(pkl_file, 'wb') as f: + pickle.dump(op, f) + + # Update the registry, in most cases this is unnecessary since + # autopickling is about saving time on subsequent runs, but just in case + self.registry[op.name] = pkl_file + + debug(f"Operator `{op.name}` pickled to disk at `{pkl_file}`") + + +autopickler = Autopickler() diff --git a/devito/parameters.py b/devito/parameters.py index be9fdd2e88..ece2c7f64a 100644 --- a/devito/parameters.py +++ b/devito/parameters.py @@ -152,6 +152,7 @@ def _signature_items(self): 'DEVITO_AUTOTUNING': 'autotuning', 'DEVITO_LOGGING': 'log-level', 'DEVITO_FIRST_TOUCH': 'first-touch', + 'DEVITO_AUTOPICKLING': 'autopickling', 'DEVITO_JIT_BACKDOOR': 'jit-backdoor', 'DEVITO_IGNORE_UNKNOWN_PARAMS': 'ignore-unknowns', 'DEVITO_SAFE_MATH': 'safe-math' diff --git a/tests/test_pickle.py b/tests/test_pickle.py index 40e4c14ffe..b797d6e80c 100644 --- a/tests/test_pickle.py +++ b/tests/test_pickle.py @@ -1,5 +1,6 @@ import ctypes import pickle as pickle0 +import shutil import cloudpickle as pickle1 import pytest @@ -9,10 +10,11 @@ from devito import (Constant, Eq, Function, TimeFunction, SparseFunction, Grid, Dimension, SubDimension, ConditionalDimension, IncrDimension, TimeDimension, SteppingDimension, Operator, MPI, Min, solve, - PrecomputedSparseTimeFunction, SubDomain) + PrecomputedSparseTimeFunction, SubDomain, switchconfig) from devito.ir import Backward, Forward, GuardFactor, GuardBound, GuardBoundNext from devito.data import LEFT, OWNED from devito.finite_differences.tools import direct, transpose, left, right, centered +from devito.operator.operator import autopickler from devito.mpi.halo_scheme import Halo from devito.mpi.routines import (MPIStatusObject, MPIMsgEnriched, MPIRequestObject, MPIRegion) @@ -1074,3 +1076,27 @@ def test_usave_sampled(self, pickle, subs): op_new = pickle.load(open(tmp_pickle_op_fn, "rb")) assert str(op_fwd) == str(op_new) + + +@pytest.fixture +def purged_autopickling_dir(): + # Erase the content of the autopickling dir before and after the test + shutil.rmtree(autopickler._directory, ignore_errors=True) + yield + shutil.rmtree(autopickler._directory, ignore_errors=True) + + +class TestAutopickling: + + @switchconfig(autopickling=True) + def test_basic(self, purged_autopickling_dir): + grid = Grid(shape=(3, 3, 3)) + f = TimeFunction(name='f', grid=grid) + eqn = Eq(f.forward, f + 1) + op0 = Operator(eqn, name='TestOp') + + # Expected to be unpickled from the autopickling dir + op1 = Operator(eqn, name='TestOp') + + assert not getattr(op0, '_unpickled', False) + assert op1._unpickled is True