devitocodes · mloubout · Nov 21, 2025 · Nov 13, 2025 · Nov 14, 2025 · Nov 14, 2025
diff --git a/devito/core/cpu.py b/devito/core/cpu.py
@@ -61,6 +61,7 @@ def _normalize_kwargs(cls, **kwargs):
         o['cire-maxpar'] = oo.pop('cire-maxpar', False)
         o['cire-ftemps'] = oo.pop('cire-ftemps', False)
         o['cire-mingain'] = oo.pop('cire-mingain', cls.CIRE_MINGAIN)
+        o['cire-minmem'] = oo.pop('cire-minmem', cls.CIRE_MINMEM)
         o['cire-schedule'] = oo.pop('cire-schedule', cls.CIRE_SCHEDULE)
 
         # Shared-memory parallelism
@@ -75,6 +76,7 @@ def _normalize_kwargs(cls, **kwargs):
 
         # Code generation options for derivatives
         o['expand'] = oo.pop('expand', cls.EXPAND)
+        o['deriv-collect'] = oo.pop('deriv-collect', cls.DERIV_COLLECT)
         o['deriv-schedule'] = oo.pop('deriv-schedule', cls.DERIV_SCHEDULE)
         o['deriv-unroll'] = oo.pop('deriv-unroll', False)
 
@@ -150,7 +152,7 @@ class Cpu64AdvOperator(Cpu64OperatorMixin, CoreOperator):
     @classmethod
     @timed_pass(name='specializing.DSL')
     def _specialize_dsl(cls, expressions, **kwargs):
-        expressions = collect_derivatives(expressions)
+        expressions = collect_derivatives(expressions, **kwargs)
 
         return expressions
 
@@ -253,7 +255,7 @@ class Cpu64CustomOperator(Cpu64OperatorMixin, CustomOperator):
     @classmethod
     def _make_dsl_passes_mapper(cls, **kwargs):
         return {
-            'collect-derivs': collect_derivatives,
+            'deriv-collect': collect_derivatives,
         }
 
     @classmethod
@@ -308,7 +310,7 @@ def _make_iet_passes_mapper(cls, **kwargs):
 
     _known_passes = (
         # DSL
-        'collect-derivs',
+        'deriv-collect',
         # Expressions
         'buffering',
         # Clusters

diff --git a/devito/core/gpu.py b/devito/core/gpu.py
@@ -68,6 +68,7 @@ def _normalize_kwargs(cls, **kwargs):
         o['cire-maxpar'] = oo.pop('cire-maxpar', True)
         o['cire-ftemps'] = oo.pop('cire-ftemps', False)
         o['cire-mingain'] = oo.pop('cire-mingain', cls.CIRE_MINGAIN)
+        o['cire-minmem'] = oo.pop('cire-minmem', cls.CIRE_MINMEM)
         o['cire-schedule'] = oo.pop('cire-schedule', cls.CIRE_SCHEDULE)
 
         # GPU parallelism
@@ -88,6 +89,7 @@ def _normalize_kwargs(cls, **kwargs):
 
         # Code generation options for derivatives
         o['expand'] = oo.pop('expand', cls.EXPAND)
+        o['deriv-collect'] = oo.pop('deriv-collect', cls.DERIV_COLLECT)
         o['deriv-schedule'] = oo.pop('deriv-schedule', cls.DERIV_SCHEDULE)
         o['deriv-unroll'] = oo.pop('deriv-unroll', False)
 
@@ -188,7 +190,7 @@ class DeviceAdvOperator(DeviceOperatorMixin, CoreOperator):
     @classmethod
     @timed_pass(name='specializing.DSL')
     def _specialize_dsl(cls, expressions, **kwargs):
-        expressions = collect_derivatives(expressions)
+        expressions = collect_derivatives(expressions, **kwargs)
 
         return expressions
 
@@ -280,7 +282,7 @@ class DeviceCustomOperator(DeviceOperatorMixin, CustomOperator):
     @classmethod
     def _make_dsl_passes_mapper(cls, **kwargs):
         return {
-            'collect-derivs': collect_derivatives,
+            'deriv-collect': collect_derivatives,
         }
 
     @classmethod
@@ -330,7 +332,7 @@ def _make_iet_passes_mapper(cls, **kwargs):
 
     _known_passes = (
         # DSL
-        'collect-derivs',
+        'deriv-collect',
         # Expressions
         'buffering',
         # Clusters

diff --git a/devito/core/operator.py b/devito/core/operator.py
@@ -69,6 +69,14 @@ class BasicOperator(Operator):
     intensity of the generated kernel.
     """
 
+    CIRE_MINMEM = True
+    """
+    Minimize memory consumption when allocating temporaries for CIRE-optimized
+    expressions. This may come at the cost of slighly worse performance due to
+    the potential need for extra registers to hold a greater number of support
+    variables (e.g., strides).
+    """
+
     SCALAR_MIN_TYPE = np.float16
     """
     Minimum datatype for a scalar arising from a common sub-expression or CIRE temp.
@@ -115,6 +123,12 @@ class BasicOperator(Operator):
     finite-difference derivatives.
     """
 
+    DERIV_COLLECT = True
+    """
+    Factorize finite-difference derivatives exploiting the linearity of the FD
+    operators.
+    """
+
     DERIV_SCHEDULE = 'basic'
     """
     The schedule to use for the computation of finite-difference derivatives.
@@ -288,7 +302,7 @@ def _specialize_dsl(cls, expressions, **kwargs):
         # Call passes
         for i in passes:
             try:
-                expressions = passes_mapper[i](expressions)
+                expressions = passes_mapper[i](expressions, **kwargs)
             except KeyError:
                 pass
 

diff --git a/devito/finite_differences/differentiable.py b/devito/finite_differences/differentiable.py
@@ -948,6 +948,20 @@ def _evaluate(self, **kwargs):
 
         return EvalDerivative(*expr.args, base=self.base)
 
+    def _subs(self, old, new, **hints):
+        # We have to work around SymPy's weak implementation of `subs` when
+        # it gets to replacing sub-operations such as `a*b*c` (i.e., potentially
+        # `self`'s `base`) within say `a*b*c*w[i0]` (i.e., the corresponding
+        # `self.expr`), because depending on the complexity of `a/b/c`, SymPy
+        # may fail to identify the sub-expression to be replaced (note: if
+        # `a/b/c` are atoms or Indexeds, it's generally fine)
+
+        if not old.is_Mul or \
+           old is not self.base:
+            return super()._subs(old, new, **hints)
+
+        return self._rebuild(new * self.weights)
+
 
 class DiffDerivative(IndexDerivative, DifferentiableOp):
     pass

diff --git a/devito/ir/clusters/cluster.py b/devito/ir/clusters/cluster.py
@@ -470,7 +470,16 @@ class ClusterGroup(tuple):
 
     def __new__(cls, clusters, ispace=None):
         obj = super().__new__(cls, flatten(as_tuple(clusters)))
-        obj._ispace = ispace
+
+        if ispace is not None:
+            obj._ispace = ispace
+        else:
+            # Best effort attempt to infer a common IterationSpace
+            try:
+                obj._ispace, = {c.ispace for c in obj}
+            except ValueError:
+                obj._ispace = None
+
         return obj
 
     @classmethod