Update

vmoens · vmoens · commit ba43867d4e47 · 2025-03-13T21:11:08.000Z
[ghstack-poisoned]
diff --git a/torchrl/collectors/distributed/ray.py b/torchrl/collectors/distributed/ray.py
@@ -456,8 +456,9 @@ def check_list_length_consistency(*lists):
             policy_weights = TensorDict.from_module(self._local_policy)
             policy_weights = policy_weights.data.lock_()
         else:
-            warnings.warn(_NON_NN_POLICY_WEIGHTS)
             policy_weights = TensorDict(lock=True)
+            if remote_weights_updater is None:
+                warnings.warn(_NON_NN_POLICY_WEIGHTS)
         self.policy_weights = policy_weights
         self.collector_class = collector_class
         self.collected_frames = 0
@@ -467,11 +468,6 @@ def check_list_length_consistency(*lists):
 
         self.update_after_each_batch = update_after_each_batch
         self.max_weight_update_interval = max_weight_update_interval
-        self.remote_weights_updater = RayRemoteWeightUpdater(
-            policy_weights=policy_weights,
-            remote_collectors=self.remote_collectors,
-            max_interval=self.max_weight_update_interval,
-        )
 
         self.collector_kwargs = (
             collector_kwargs if collector_kwargs is not None else [{}]
@@ -529,6 +525,14 @@ def check_list_length_consistency(*lists):
                 collector_kwargs,
                 remote_configs,
             )
+        if remote_weights_updater is None:
+            remote_weights_updater = RayRemoteWeightUpdater(
+                policy_weights=policy_weights,
+                remote_collectors=self.remote_collectors,
+                max_interval=self.max_weight_update_interval,
+            )
+        self.remote_weights_updater = remote_weights_updater
+        self.local_weights_updater = local_weights_updater
 
         # Print info of all remote workers
         pending_samples = [
diff --git a/torchrl/collectors/distributed/rpc.py b/torchrl/collectors/distributed/rpc.py
@@ -873,7 +873,7 @@ def update_weights(
         if workers is None:
             workers = list(range(self.num_workers))
         futures = []
-        weights = self.policy_weights.data if weights is None else weights
+        weights = self.policy_weights if weights is None else weights
         for i in workers:
             if self._VERBOSE:
                 torchrl_logger.info(f"calling update on worker {i}")
@@ -884,7 +884,7 @@ def update_weights(
                     args=(self.collector_rrefs[i], weights),
                 )
             )
-        if kwargs.get("wait"):
+        if kwargs.get("wait", True):
             for i in workers:
                 if self._VERBOSE:
                     torchrl_logger.info(f"waiting for worker {i}")
diff --git a/torchrl/collectors/utils.py b/torchrl/collectors/utils.py
@@ -13,7 +13,8 @@
 
 _NON_NN_POLICY_WEIGHTS = (
     "The policy is not an nn.Module. TorchRL will assume that the parameter set is empty and "
-    "update_policy_weights_ will be a no-op."
+    "update_policy_weights_ will be a no-op. Consider passing a local/remote_weight_updater object "
+    "to your collector to handle the weight updates."
 )
 
 

Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,8 @@`
`13`	`13`
`14`	`14`	`_NON_NN_POLICY_WEIGHTS = (`
`15`	`15`	`"The policy is not an nn.Module. TorchRL will assume that the parameter set is empty and "`
`16`		`- "update_policy_weights_ will be a no-op."`
	`16`	`+ "update_policy_weights_ will be a no-op. Consider passing a local/remote_weight_updater object "`
	`17`	`+ "to your collector to handle the weight updates."`
`17`	`18`	`)`
`18`	`19`
`19`	`20`