-
Notifications
You must be signed in to change notification settings - Fork 245
api: Add (rudimentary) autopickling support #2775
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,18 @@ | ||
| from collections import OrderedDict, namedtuple | ||
| from functools import cached_property | ||
| import ctypes | ||
| import shutil | ||
| from operator import attrgetter | ||
| from math import ceil | ||
| from tempfile import gettempdir | ||
| from time import time | ||
| import ctypes | ||
| import glob | ||
| import os | ||
| import shutil | ||
|
|
||
| from sympy import sympify | ||
| import sympy | ||
| import numpy as np | ||
| import cloudpickle as pickle | ||
|
|
||
| from devito.arch import (ANYCPU, Device, compiler_registry, platform_registry, | ||
| get_visible_devices) | ||
|
|
@@ -32,10 +36,11 @@ | |
| minimize_symbols, unevaluate, error_mapper, is_on_device, lower_dtypes | ||
| ) | ||
| from devito.symbolics import estimate_cost, subs_op_args | ||
| from devito.tools import (DAG, OrderedSet, Signer, ReducerMap, as_mapper, as_tuple, | ||
| flatten, filter_sorted, frozendict, is_integer, | ||
| split, timed_pass, timed_region, contains_val, | ||
| CacheInstances, MemoryEstimate) | ||
| from devito.tools import ( | ||
| DAG, OrderedSet, Signer, ReducerMap, as_mapper, as_tuple, flatten, | ||
| filter_sorted, frozendict, is_integer, split, timed_pass, timed_region, | ||
| contains_val, CacheInstances, MemoryEstimate, make_tempdir | ||
| ) | ||
| from devito.types import (Buffer, Evaluable, host_layer, device_layer, | ||
| disk_layer) | ||
| from devito.types.dimension import Thickness | ||
|
|
@@ -157,6 +162,12 @@ def __new__(cls, expressions, **kwargs): | |
| # can't do anything useful with it | ||
| return super().__new__(cls, **kwargs) | ||
|
|
||
| # Maybe lookup an existing Operator from disk | ||
| name = kwargs.get('name', default_op_name) | ||
| op = autopickler.maybe_load(name) | ||
| if op is not None: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this "log" at all?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I agree that it should log that it is using a pickled version (and which one it found) |
||
| return op | ||
|
|
||
| # Parse input arguments | ||
| kwargs = parse_kwargs(**kwargs) | ||
|
|
||
|
|
@@ -176,6 +187,9 @@ def __new__(cls, expressions, **kwargs): | |
| # Emit info about how long it took to perform the lowering | ||
| op._emit_build_profiling() | ||
|
|
||
| # Maybe save the Operator to disk | ||
| autopickler.maybe_save(op) | ||
|
|
||
| return op | ||
|
|
||
| @classmethod | ||
|
|
@@ -479,7 +493,7 @@ def _lower_iet(cls, uiet, profiler=None, **kwargs): | |
| * Introduce optimizations for data locality; | ||
| * Finalize (e.g., symbol definitions, array casts) | ||
| """ | ||
| name = kwargs.get("name", "Kernel") | ||
| name = kwargs.get("name", default_op_name) | ||
|
|
||
| # Wrap the IET with an EntryFunction (a special Callable representing | ||
| # the entry point of the generated library) | ||
|
|
@@ -1216,6 +1230,10 @@ def __setstate__(self, state): | |
| f'{type(self._compiler).__name__}.{self._language}.{self._platform}' | ||
| ) | ||
|
|
||
| # Tag this Operator as unpickled -- might come in handy at the call site | ||
| # for sanity checks | ||
| self._unpickled = True | ||
|
|
||
|
|
||
| # *** Recursive compilation ("rcompile") machinery | ||
|
|
||
|
|
@@ -1701,3 +1719,79 @@ def parse_kwargs(**kwargs): | |
| kwargs['subs'] = {k: sympify(v) for k, v in kwargs.get('subs', {}).items()} | ||
|
|
||
| return kwargs | ||
|
|
||
|
|
||
| # The name assigned to an Operator when the user does not provide one | ||
| default_op_name = "Kernel" | ||
|
|
||
|
|
||
| class Autopickler: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this live in its own file? |
||
|
|
||
| def __init__(self): | ||
| self._initialized = False | ||
| self.registry = {} | ||
|
|
||
| @property | ||
| def _directory(self): | ||
| return make_tempdir('autopickling') | ||
|
|
||
| def _initialize(self): | ||
| # Search the `autopickling` temporary directory for pickled Operators | ||
| # and maintain a registry of them. Each pickled Operator is uniquely | ||
| # identified by a name; this might not be enough to avoid name clashes, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about |
||
| # but for now it is what we have. Thus, a generic filename is | ||
| # `<operator_name>.pkl`. | ||
| pkl_files = glob.glob(os.path.join(self._directory, '*.pkl')) | ||
|
|
||
| self.registry.update({ | ||
| os.path.basename(pkl_file)[:-4]: pkl_file for pkl_file in pkl_files | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. os.path.splitext(pkl_file) |
||
| }) | ||
|
|
||
| self._initialized = True | ||
|
|
||
| def maybe_load(self, name): | ||
| if not configuration['autopickling']: | ||
| return None | ||
|
|
||
| tic = time() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. process_time is better. |
||
|
|
||
| if not self._initialized: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should just be in init |
||
| self._initialize() | ||
|
|
||
| if name is None or name == default_op_name: | ||
| return None | ||
|
|
||
| pkl_file = self.registry.get(name) | ||
| if pkl_file is None: | ||
| return None | ||
|
|
||
| with open(pkl_file, 'rb') as f: | ||
| op = pickle.load(f) | ||
|
|
||
| toc = time() | ||
|
|
||
| perf(f"Operator `{name}` unpickled from disk in {toc - tic:.2f} s") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah ok, this is fine, then I was expecting sth like that, ignore the above |
||
|
|
||
| return op | ||
|
|
||
| def maybe_save(self, op): | ||
| if not configuration['autopickling']: | ||
| return | ||
|
|
||
| if op.name == default_op_name: | ||
| return | ||
|
|
||
| assert self._initialized is not None | ||
|
|
||
| pkl_file = os.path.join(self._directory, f"{op.name}.pkl") | ||
| with open(pkl_file, 'wb') as f: | ||
| pickle.dump(op, f) | ||
|
|
||
| # Update the registry, in most cases this is unnecessary since | ||
| # autopickling is about saving time on subsequent runs, but just in case | ||
| self.registry[op.name] = pkl_file | ||
|
|
||
| debug(f"Operator `{op.name}` pickled to disk at `{pkl_file}`") | ||
|
|
||
|
|
||
| autopickler = Autopickler() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,6 @@ | ||
| import ctypes | ||
| import pickle as pickle0 | ||
| import shutil | ||
|
|
||
| import cloudpickle as pickle1 | ||
| import pytest | ||
|
|
@@ -9,10 +10,11 @@ | |
| from devito import (Constant, Eq, Function, TimeFunction, SparseFunction, Grid, | ||
| Dimension, SubDimension, ConditionalDimension, IncrDimension, | ||
| TimeDimension, SteppingDimension, Operator, MPI, Min, solve, | ||
| PrecomputedSparseTimeFunction, SubDomain) | ||
| PrecomputedSparseTimeFunction, SubDomain, switchconfig) | ||
| from devito.ir import Backward, Forward, GuardFactor, GuardBound, GuardBoundNext | ||
| from devito.data import LEFT, OWNED | ||
| from devito.finite_differences.tools import direct, transpose, left, right, centered | ||
| from devito.operator.operator import autopickler | ||
| from devito.mpi.halo_scheme import Halo | ||
| from devito.mpi.routines import (MPIStatusObject, MPIMsgEnriched, MPIRequestObject, | ||
| MPIRegion) | ||
|
|
@@ -1074,3 +1076,27 @@ def test_usave_sampled(self, pickle, subs): | |
| op_new = pickle.load(open(tmp_pickle_op_fn, "rb")) | ||
|
|
||
| assert str(op_fwd) == str(op_new) | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def purged_autopickling_dir(): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe make a helper function to do this, akin to |
||
| # Erase the content of the autopickling dir before and after the test | ||
| shutil.rmtree(autopickler._directory, ignore_errors=True) | ||
| yield | ||
| shutil.rmtree(autopickler._directory, ignore_errors=True) | ||
|
|
||
|
|
||
| class TestAutopickling: | ||
|
|
||
| @switchconfig(autopickling=True) | ||
| def test_basic(self, purged_autopickling_dir): | ||
| grid = Grid(shape=(3, 3, 3)) | ||
| f = TimeFunction(name='f', grid=grid) | ||
| eqn = Eq(f.forward, f + 1) | ||
| op0 = Operator(eqn, name='TestOp') | ||
|
|
||
| # Expected to be unpickled from the autopickling dir | ||
| op1 = Operator(eqn, name='TestOp') | ||
|
|
||
| assert not getattr(op0, '_unpickled', False) | ||
| assert op1._unpickled is True | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure using the name is the ideal approach, given that this is often left as the default "kernel". Maybe hash the args instead somehow?
Side note: is this behaviour turned off by default?
Side side note: do the pickled operators get cleared automatically, or is this left to the user?
Yet another side note: can the user specify a path to their pickled operator dir?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
see the word "rudimentary" in the title and the PR description which I'm reporting below
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair enough, in that case as long as you don't see it as a potential source of mistakes/bother, I'm not overly concerned