Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions devito/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,18 @@ def reinit_compiler(val):
# and will instead use the custom kernel
configuration.add('jit-backdoor', 0, [0, 1], preprocessor=bool, impacts_jit=False)

# Enable/disable automatic pickling of named Operators. When enabled, named
# Operators (that is, Operators instantiated with a name kwarg) are automatically
# pickled to disk upon creation, and loaded from disk upon subsequent creations,
# thus bypassing code generation and compilation. This is to be used with caution,
# since it assumes that things such as Operator creation arguments (e.g., the
# equations themselves), the Devito configuration, the compiler/runtime, etc,
# have not changed between runs. Further, data carried by any of the input
# objects is pickled as well, which may lead to large files on disk. Currently, no
# safeguards are in place to deal with any of these cases, so... you have been
# warned! Use at your own risk
configuration.add('autopickling', 0, [0, 1], preprocessor=bool, impacts_jit=False)

# By default unsafe math is allowed as most applications are insensitive to
# floating-point roundoff errors. Enabling this disables unsafe math
# optimisations.
Expand Down
108 changes: 101 additions & 7 deletions devito/operator/operator.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
from collections import OrderedDict, namedtuple
from functools import cached_property
import ctypes
import shutil
from operator import attrgetter
from math import ceil
from tempfile import gettempdir
from time import time
import ctypes
import glob
import os
import shutil

from sympy import sympify
import sympy
import numpy as np
import cloudpickle as pickle

from devito.arch import (ANYCPU, Device, compiler_registry, platform_registry,
get_visible_devices)
Expand All @@ -32,10 +36,11 @@
minimize_symbols, unevaluate, error_mapper, is_on_device, lower_dtypes
)
from devito.symbolics import estimate_cost, subs_op_args
from devito.tools import (DAG, OrderedSet, Signer, ReducerMap, as_mapper, as_tuple,
flatten, filter_sorted, frozendict, is_integer,
split, timed_pass, timed_region, contains_val,
CacheInstances, MemoryEstimate)
from devito.tools import (
DAG, OrderedSet, Signer, ReducerMap, as_mapper, as_tuple, flatten,
filter_sorted, frozendict, is_integer, split, timed_pass, timed_region,
contains_val, CacheInstances, MemoryEstimate, make_tempdir
)
from devito.types import (Buffer, Evaluable, host_layer, device_layer,
disk_layer)
from devito.types.dimension import Thickness
Expand Down Expand Up @@ -157,6 +162,12 @@ def __new__(cls, expressions, **kwargs):
# can't do anything useful with it
return super().__new__(cls, **kwargs)

# Maybe lookup an existing Operator from disk
name = kwargs.get('name', default_op_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure using the name is the ideal approach, given that this is often left as the default "kernel". Maybe hash the args instead somehow?

Side note: is this behaviour turned off by default?
Side side note: do the pickled operators get cleared automatically, or is this left to the user?
Yet another side note: can the user specify a path to their pickled operator dir?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see the word "rudimentary" in the title and the PR description which I'm reporting below

This is intended mostly for developers... for now

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough, in that case as long as you don't see it as a potential source of mistakes/bother, I'm not overly concerned

op = autopickler.maybe_load(name)
if op is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this "log" at all?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I agree that it should log that it is using a pickled version (and which one it found)

return op

# Parse input arguments
kwargs = parse_kwargs(**kwargs)

Expand All @@ -176,6 +187,9 @@ def __new__(cls, expressions, **kwargs):
# Emit info about how long it took to perform the lowering
op._emit_build_profiling()

# Maybe save the Operator to disk
autopickler.maybe_save(op)

return op

@classmethod
Expand Down Expand Up @@ -479,7 +493,7 @@ def _lower_iet(cls, uiet, profiler=None, **kwargs):
* Introduce optimizations for data locality;
* Finalize (e.g., symbol definitions, array casts)
"""
name = kwargs.get("name", "Kernel")
name = kwargs.get("name", default_op_name)

# Wrap the IET with an EntryFunction (a special Callable representing
# the entry point of the generated library)
Expand Down Expand Up @@ -1216,6 +1230,10 @@ def __setstate__(self, state):
f'{type(self._compiler).__name__}.{self._language}.{self._platform}'
)

# Tag this Operator as unpickled -- might come in handy at the call site
# for sanity checks
self._unpickled = True


# *** Recursive compilation ("rcompile") machinery

Expand Down Expand Up @@ -1701,3 +1719,79 @@ def parse_kwargs(**kwargs):
kwargs['subs'] = {k: sympify(v) for k, v in kwargs.get('subs', {}).items()}

return kwargs


# The name assigned to an Operator when the user does not provide one
default_op_name = "Kernel"


class Autopickler:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this live in its own file? Operator.py is getting pretty big


def __init__(self):
self._initialized = False
self.registry = {}

@property
def _directory(self):
return make_tempdir('autopickling')

def _initialize(self):
# Search the `autopickling` temporary directory for pickled Operators
# and maintain a registry of them. Each pickled Operator is uniquely
# identified by a name; this might not be enough to avoid name clashes,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about name_dsf8s7 where the suffix is the first n characters of the hash of the args the operator was constructed with?

# but for now it is what we have. Thus, a generic filename is
# `<operator_name>.pkl`.
pkl_files = glob.glob(os.path.join(self._directory, '*.pkl'))

self.registry.update({
os.path.basename(pkl_file)[:-4]: pkl_file for pkl_file in pkl_files
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

os.path.splitext(pkl_file)

})

self._initialized = True

def maybe_load(self, name):
if not configuration['autopickling']:
return None

tic = time()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

process_time is better.


if not self._initialized:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should just be in init

self._initialize()

if name is None or name == default_op_name:
return None

pkl_file = self.registry.get(name)
if pkl_file is None:
return None

with open(pkl_file, 'rb') as f:
op = pickle.load(f)

toc = time()

perf(f"Operator `{name}` unpickled from disk in {toc - tic:.2f} s")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah ok, this is fine, then I was expecting sth like that, ignore the above


return op

def maybe_save(self, op):
if not configuration['autopickling']:
return

if op.name == default_op_name:
return

assert self._initialized is not None

pkl_file = os.path.join(self._directory, f"{op.name}.pkl")
with open(pkl_file, 'wb') as f:
pickle.dump(op, f)

# Update the registry, in most cases this is unnecessary since
# autopickling is about saving time on subsequent runs, but just in case
self.registry[op.name] = pkl_file

debug(f"Operator `{op.name}` pickled to disk at `{pkl_file}`")


autopickler = Autopickler()
1 change: 1 addition & 0 deletions devito/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def _signature_items(self):
'DEVITO_AUTOTUNING': 'autotuning',
'DEVITO_LOGGING': 'log-level',
'DEVITO_FIRST_TOUCH': 'first-touch',
'DEVITO_AUTOPICKLING': 'autopickling',
'DEVITO_JIT_BACKDOOR': 'jit-backdoor',
'DEVITO_IGNORE_UNKNOWN_PARAMS': 'ignore-unknowns',
'DEVITO_SAFE_MATH': 'safe-math'
Expand Down
28 changes: 27 additions & 1 deletion tests/test_pickle.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import ctypes
import pickle as pickle0
import shutil

import cloudpickle as pickle1
import pytest
Expand All @@ -9,10 +10,11 @@
from devito import (Constant, Eq, Function, TimeFunction, SparseFunction, Grid,
Dimension, SubDimension, ConditionalDimension, IncrDimension,
TimeDimension, SteppingDimension, Operator, MPI, Min, solve,
PrecomputedSparseTimeFunction, SubDomain)
PrecomputedSparseTimeFunction, SubDomain, switchconfig)
from devito.ir import Backward, Forward, GuardFactor, GuardBound, GuardBoundNext
from devito.data import LEFT, OWNED
from devito.finite_differences.tools import direct, transpose, left, right, centered
from devito.operator.operator import autopickler
from devito.mpi.halo_scheme import Halo
from devito.mpi.routines import (MPIStatusObject, MPIMsgEnriched, MPIRequestObject,
MPIRegion)
Expand Down Expand Up @@ -1074,3 +1076,27 @@ def test_usave_sampled(self, pickle, subs):
op_new = pickle.load(open(tmp_pickle_op_fn, "rb"))

assert str(op_fwd) == str(op_new)


@pytest.fixture
def purged_autopickling_dir():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe make a helper function to do this, akin to clear_cache()?

# Erase the content of the autopickling dir before and after the test
shutil.rmtree(autopickler._directory, ignore_errors=True)
yield
shutil.rmtree(autopickler._directory, ignore_errors=True)


class TestAutopickling:

@switchconfig(autopickling=True)
def test_basic(self, purged_autopickling_dir):
grid = Grid(shape=(3, 3, 3))
f = TimeFunction(name='f', grid=grid)
eqn = Eq(f.forward, f + 1)
op0 = Operator(eqn, name='TestOp')

# Expected to be unpickled from the autopickling dir
op1 = Operator(eqn, name='TestOp')

assert not getattr(op0, '_unpickled', False)
assert op1._unpickled is True
Loading