pytorch
diff --git a/‎docs/source/reference/envs.rst‎
Lines changed: 71 additions & 0 deletions b/‎docs/source/reference/envs.rst‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎docs/source/reference/utils.rst‎
Lines changed: 3 additions & 1 deletion b/‎docs/source/reference/utils.rst‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎test/_utils_internal.py‎
Lines changed: 11 additions & 2 deletions b/‎test/_utils_internal.py‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎test/test_collector.py‎
Lines changed: 14 additions & 5 deletions b/‎test/test_collector.py‎
Lines changed: 14 additions & 5 deletions
@@ -865,6 +865,8 @@ The inverse process is executed with the output tensordict, where the `in_keys`
 
    Rename transform logic
 
+.. note:: During a call to `inv`, the transforms are executed in reversed order (compared to the forward / step mode).
+
 Transforming Tensors and Specs
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -900,6 +902,74 @@ tensor that should not be generated when using :meth:`~torchrl.envs.EnvBase.rand
 environment. Instead, `"action_discrete"` should be generated, and its continuous counterpart obtained from the
 transform. Therefore, the user should see the `"action_discrete"` entry being exposed, but not `"action"`.
 
+Designing your own Transform
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To create a basic, custom transform, you need to subclass the `Transform` class and implement the
+:meth:`~torchrl.envs._apply_transform` method. Here's an example of a simple transform that adds 1 to the observation
+tensor:
+
+    >>> class AddOneToObs(Transform):
+    ...     """A transform that adds 1 to the observation tensor."""
+    ...
+    ...     def __init__(self):
+    ...         super().__init__(in_keys=["observation"], out_keys=["observation"])
+    ...
+    ...     def _apply_transform(self, obs: torch.Tensor) -> torch.Tensor:
+    ...         return obs + 1
+
+
+Tips for subclassing `Transform`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+There are various ways of subclassing a transform. The things to take into considerations are:
+
+- Is the transform identical for each tensor / item being transformed? Use
+  :meth:`~torchrl.envs.Transform._apply_transform` and :meth:`~torchrl.envs.Transform._inv_apply_transform`.
+- The transform needs access to the input data to env.step as well as output? Rewrite
+  :meth:`~torchrl.envs.Transform._step`.
+  Otherwise, rewrite :meth:`~torchrl.envs.Transform._call` (or :meth:`~torchrl.envs.Transform._inv_call`).
+- Is the transform to be used within a replay buffer? Overwrite :meth:`~torchrl.envs.Transform.forward`,
+  :meth:`~torchrl.envs.Transform.inv`, :meth:`~torchrl.envs.Transform._apply_transform` or
+  :meth:`~torchrl.envs.Transform._inv_apply_transform`.
+- Within a transform, you can access (and make calls to) the parent environment using
+  :attr:`~torchrl.envs.Transform.parent` (the base env + all transforms till this one) or
+  :meth:`~torchrl.envs.Transform.container` (The object that encapsulates the transform).
+- Don't forget to edits the specs if needed: top level: :meth:`~torchrl.envs.Transform.transform_output_spec`,
+  :meth:`~torchrl.envs.Transform.transform_input_spec`.
+  Leaf level: :meth:`~torchrl.envs.Transform.transform_observation_spec`,
+  :meth:`~torchrl.envs.Transform.transform_action_spec`, :meth:`~torchrl.envs.Transform.transform_state_spec`,
+  :meth:`~torchrl.envs.Transform.transform_reward_spec` and
+  :meth:`~torchrl.envs.Transform.transform_reward_spec`.
+
+For practical examples, see the methods listed above.
+
+You can use a transform in an environment by passing it to the TransformedEnv constructor:
+
+    >>> env = TransformedEnv(GymEnv("Pendulum-v1"), AddOneToObs())
+
+You can compose multiple transforms together using the Compose class:
+
+    >>> transform = Compose(AddOneToObs(), RewardSum())
+    >>> env = TransformedEnv(GymEnv("Pendulum-v1"), transform)
+
+Inverse Transforms
+^^^^^^^^^^^^^^^^^^
+
+Some transforms have an inverse transform that can be used to undo the transformation. For example, the AddOneToAction
+transform has an inverse transform that subtracts 1 from the action tensor:
+
+    >>> class AddOneToAction(Transform):
+    ...     """A transform that adds 1 to the action tensor."""
+    ...     def __init__(self):
+    ...         super().__init__(in_keys=[], out_keys=[], in_keys_inv=["action"], out_keys_inv=["action"])
+    ...     def _inv_apply_transform(self, action: torch.Tensor) -> torch.Tensor:
+    ...         return action + 1
+
+Using a Transform with a Replay Buffer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You can use a transform with a replay buffer by passing it to the ReplayBuffer constructor:
 
 Cloning transforms
 ~~~~~~~~~~~~~~~~~~
@@ -1000,6 +1070,7 @@ to be able to create this other composition:
     TargetReturn
     TensorDictPrimer
     TimeMaxPool
+    Timer
     Tokenizer
     ToTensorImage
     TrajCounter
 
@@ -1,4 +1,4 @@
-.. currentmodule:: torchrl._utils
+.. currentmodule:: torchrl
 
 torchrl._utils package
 ====================
@@ -11,3 +11,5 @@ Set of utility methods that are used internally by the library.
     :template: rl_template.rst
 
     implement_for
+    set_auto_unwrap_transformed_env
+    auto_unwrap_transformed_env
@@ -9,6 +9,7 @@
 import os
 
 import os.path
+import sys
 import time
 import unittest
 import warnings
@@ -42,12 +43,17 @@
     ToTensorImage,
     TransformedEnv,
 )
+from torchrl.modules import MLP
 from torchrl.objectives.value.advantages import _vmap_func
 
 # Specified for test_utils.py
 __version__ = "0.3"
 
-from torchrl.modules import MLP
+IS_WIN = sys.platform == "win32"
+if IS_WIN:
+    mp_ctx = "spawn"
+else:
+    mp_ctx = "fork"
 
 
 def CARTPOLE_VERSIONED():
@@ -265,6 +271,7 @@ def _make_envs(
     N,
     device="cpu",
     kwargs=None,
+    local_mp_ctx=mp_ctx,
 ):
     torch.manual_seed(0)
     if not transformed_in:
@@ -299,7 +306,9 @@ def create_env_fn():
                 )
 
     env0 = create_env_fn()
-    env_parallel = ParallelEnv(N, create_env_fn, create_env_kwargs=kwargs)
+    env_parallel = ParallelEnv(
+        N, create_env_fn, create_env_kwargs=kwargs, mp_start_method=local_mp_ctx
+    )
     env_serial = SerialEnv(N, create_env_fn, create_env_kwargs=kwargs)
 
     for key in env0.observation_spec.keys(True, True):
 
@@ -3093,16 +3093,23 @@ def test_dynamic_sync_collector(self):
             assert isinstance(data, LazyStackedTensorDict)
             assert data.names[-1] == "time"
 
-    def test_dynamic_multisync_collector(self):
+    @pytest.mark.parametrize("policy_device", [None, *get_default_devices()])
+    def test_dynamic_multisync_collector(self, policy_device):
         env = EnvWithDynamicSpec
-        policy = RandomPolicy(env().action_spec)
+        spec = env().action_spec
+        if policy_device is not None:
+            spec = spec.to(policy_device)
+        policy = RandomPolicy(spec)
         collector = MultiSyncDataCollector(
             [env],
             policy,
             frames_per_batch=20,
             total_frames=100,
             use_buffers=False,
             cat_results="stack",
+            policy_device=policy_device,
+            env_device="cpu",
+            storing_device="cpu",
         )
         for data in collector:
             assert isinstance(data, LazyStackedTensorDict)
@@ -3213,9 +3220,11 @@ def test_cudagraph_policy(self, collector_cls, cudagraph_policy):
 @pytest.mark.skipif(not _has_gym, reason="gym required for this test")
 class TestCollectorsNonTensor:
     class AddNontTensorData(Transform):
-        def _call(self, tensordict: TensorDictBase) -> TensorDictBase:
-            tensordict["nt"] = f"a string! - {tensordict.get('step_count').item()}"
-            return tensordict
+        def _call(self, next_tensordict: TensorDictBase) -> TensorDictBase:
+            next_tensordict[
+                "nt"
+            ] = f"a string! - {next_tensordict.get('step_count').item()}"
+            return next_tensordict
 
         def _reset(
             self, tensordict: TensorDictBase, tensordict_reset: TensorDictBase