[BugFix] Fix collector with no buffers and devices

Vincent Moens · Vincent Moens · commit d4f88460abf0 · 2025-02-24T20:59:27.000Z
ghstack-source-id: 5367df9 Pull Request resolved: #2809
diff --git a/test/test_collector.py b/test/test_collector.py
@@ -3093,16 +3093,23 @@ def test_dynamic_sync_collector(self):
             assert isinstance(data, LazyStackedTensorDict)
             assert data.names[-1] == "time"
 
-    def test_dynamic_multisync_collector(self):
+    @pytest.mark.parametrize("policy_device", [None, *get_default_devices()])
+    def test_dynamic_multisync_collector(self, policy_device):
         env = EnvWithDynamicSpec
-        policy = RandomPolicy(env().action_spec)
+        spec = env().action_spec
+        if policy_device is not None:
+            spec = spec.to(policy_device)
+        policy = RandomPolicy(spec)
         collector = MultiSyncDataCollector(
             [env],
             policy,
             frames_per_batch=20,
             total_frames=100,
             use_buffers=False,
             cat_results="stack",
+            policy_device=policy_device,
+            env_device="cpu",
+            storing_device="cpu",
         )
         for data in collector:
             assert isinstance(data, LazyStackedTensorDict)
diff --git a/torchrl/collectors/collectors.py b/torchrl/collectors/collectors.py
@@ -768,8 +768,7 @@ def __init__(
         self.set_truncated = set_truncated
 
         self._make_shuttle()
-        if self._use_buffers:
-            self._make_final_rollout()
+        self._maybe_make_final_rollout(make_rollout=self._use_buffers)
         self._set_truncated_keys()
 
         if split_trajs is None:
@@ -806,28 +805,30 @@ def _make_shuttle(self):
             traj_ids,
         )
 
-    def _make_final_rollout(self):
-        with torch.no_grad():
-            self._final_rollout = self.env.fake_tensordict()
-
-        # If storing device is not None, we use this to cast the storage.
-        # If it is None and the env and policy are on the same device,
-        # the storing device is already the same as those, so we don't need
-        # to consider this use case.
-        # In all other cases, we can't really put a device on the storage,
-        # since at least one data source has a device that is not clear.
-        if self.storing_device:
-            self._final_rollout = self._final_rollout.to(
-                self.storing_device, non_blocking=True
-            )
-        else:
-            # erase all devices
-            self._final_rollout.clear_device_()
+    def _maybe_make_final_rollout(self, make_rollout: bool):
+        if make_rollout:
+            with torch.no_grad():
+                self._final_rollout = self.env.fake_tensordict()
+
+            # If storing device is not None, we use this to cast the storage.
+            # If it is None and the env and policy are on the same device,
+            # the storing device is already the same as those, so we don't need
+            # to consider this use case.
+            # In all other cases, we can't really put a device on the storage,
+            # since at least one data source has a device that is not clear.
+            if self.storing_device:
+                self._final_rollout = self._final_rollout.to(
+                    self.storing_device, non_blocking=True
+                )
+            else:
+                # erase all devices
+                self._final_rollout.clear_device_()
 
         # If the policy has a valid spec, we use it
         self._policy_output_keys = set()
         if (
-            hasattr(self.policy, "spec")
+            make_rollout
+            and hasattr(self.policy, "spec")
             and self.policy.spec is not None
             and all(v is not None for v in self.policy.spec.values(True, True))
         ):
@@ -846,14 +847,20 @@ def _make_final_rollout(self):
                     if key in self._final_rollout.keys(True):
                         continue
                     self._final_rollout.set(key, spec.zero())
-
+        elif (
+            not make_rollout
+            and hasattr(self.policy, "out_keys")
+            and self.policy.out_keys
+        ):
+            self._policy_output_keys = list(self.policy.out_keys)
         else:
-            # otherwise, we perform a small number of steps with the policy to
-            # determine the relevant keys with which to pre-populate _final_rollout.
-            # This is the safest thing to do if the spec has None fields or if there is
-            # no spec at all.
-            # See #505 for additional context.
-            self._final_rollout.update(self._shuttle.copy())
+            if make_rollout:
+                # otherwise, we perform a small number of steps with the policy to
+                # determine the relevant keys with which to pre-populate _final_rollout.
+                # This is the safest thing to do if the spec has None fields or if there is
+                # no spec at all.
+                # See #505 for additional context.
+                self._final_rollout.update(self._shuttle.copy())
             with torch.no_grad():
                 policy_input = self._shuttle.copy()
                 if self.policy_device:
@@ -911,33 +918,35 @@ def filter_policy(name, value_output, value_input, value_input_clone):
                         set(filtered_policy_output.keys(True, True))
                     )
                 )
-                self._final_rollout.update(
-                    policy_output.select(*self._policy_output_keys)
-                )
+                if make_rollout:
+                    self._final_rollout.update(
+                        policy_output.select(*self._policy_output_keys)
+                    )
                 del filtered_policy_output, policy_output, policy_input
 
         _env_output_keys = []
         for spec in ["full_observation_spec", "full_done_spec", "full_reward_spec"]:
             _env_output_keys += list(self.env.output_spec[spec].keys(True, True))
         self._env_output_keys = _env_output_keys
-        self._final_rollout = (
-            self._final_rollout.unsqueeze(-1)
-            .expand(*self.env.batch_size, self.frames_per_batch)
-            .clone()
-            .zero_()
-        )
+        if make_rollout:
+            self._final_rollout = (
+                self._final_rollout.unsqueeze(-1)
+                .expand(*self.env.batch_size, self.frames_per_batch)
+                .clone()
+                .zero_()
+            )
 
-        # in addition to outputs of the policy, we add traj_ids to
-        # _final_rollout which will be collected during rollout
-        self._final_rollout.set(
-            ("collector", "traj_ids"),
-            torch.zeros(
-                *self._final_rollout.batch_size,
-                dtype=torch.int64,
-                device=self.storing_device,
-            ),
-        )
-        self._final_rollout.refine_names(..., "time")
+            # in addition to outputs of the policy, we add traj_ids to
+            # _final_rollout which will be collected during rollout
+            self._final_rollout.set(
+                ("collector", "traj_ids"),
+                torch.zeros(
+                    *self._final_rollout.batch_size,
+                    dtype=torch.int64,
+                    device=self.storing_device,
+                ),
+            )
+            self._final_rollout.refine_names(..., "time")
 
     def _set_truncated_keys(self):
         self._truncated_keys = []