fix(pu): fix encoder dormant_ratio

puyuan · puyuan · commit b7c9295e205d · 2025-02-20T17:55:19.000+08:00
diff --git a/lzero/model/unizero_world_models/world_model.py b/lzero/model/unizero_world_models/world_model.py
@@ -98,12 +98,12 @@ def __init__(self, config: TransformerConfig, tokenizer) -> None:
         self.head_value = self._create_head(self.value_policy_tokens_pattern, self.support_size)
 
         # 对于 head 部分，查找所有以 "head_" 开头的子模块
-        self.head_modules = {}
+        self.head_dict = {}
         for name, module in self.named_children():
             if name.startswith("head_"):
-                self.head_modules[name] = module
-        if self.head_modules:
-            self.head_modules = nn.ModuleDict(self.head_modules)
+                self.head_dict[name] = module
+        if self.head_dict:
+            self.head_dict = nn.ModuleDict(self.head_dict)
 
         # Apply weight initialization, the order is important
         self.apply(lambda module: init_weights(module, norm_type=self.config.norm_type))
@@ -1171,8 +1171,8 @@ def compute_loss(self, batch, target_tokenizer: Tokenizer = None, inverse_scalar
             # 计算全局平均权重绝对值
             avg_weight_mag_transformer = compute_average_weight_magnitude(self.transformer)
             # print("Average Weight Magnitude of transformer:", avg_weight_mag_transformer)
-            # print(f"self.head_modules:{self.head_modules}")
-            avg_weight_mag_head = compute_average_weight_magnitude(self.head_modules)
+            # print(f"self.head_dict:{self.head_dict}")
+            avg_weight_mag_head = compute_average_weight_magnitude(self.head_dict)
             # print("Average Weight Magnitude of head:", avg_weight_mag_head)
 
             # 计算 effective rank，对于 representation 层，注意：
diff --git a/lzero/model/utils.py b/lzero/model/utils.py
@@ -187,18 +187,19 @@ def cal_dormant_ratio(
         parts["transformer"] = model.transformer
     
     # 对于 head 部分，查找所有以 "head_" 开头的子模块
-    # head_modules = {}
+    # head_dict = {}
     # for name, module in model.named_children():
     #     if name.startswith("head_"):
-    #         head_modules[name] = module
-    # if head_modules:
-    #     parts["head"] = nn.ModuleDict(head_modules)
+    #         head_dict[name] = module
+    # if head_dict:
+    #     parts["head"] = nn.ModuleDict(head_dict)
     
-    if hasattr(model, "head_modules"):
-        parts["head"] = model.head_modules
+    if hasattr(model, "head_dict"):
+        parts["head"] = model.head_dict
 
-    # if not hasattr(model, "encoder") and not hasattr(model, "transformer") and not hasattr(model, "head"):
-    #     parts["model"] = model
+    if not hasattr(model, "encoder") and not hasattr(model, "transformer") and not hasattr(model, "head"):
+        # 如果传入的是self.tokenizer.encoder
+        parts["model"] = model
 
     # 定义要捕获的目标模块类型 TODO: 增加更多模块
     target_modules = (nn.Conv2d, nn.Linear)
@@ -235,6 +236,8 @@ def cal_dormant_ratio(
         part_dormant = 0
         for full_name, hook in hooks:
             layer_total, layer_dormant = compute_dormant_stats(hook.outputs, dormant_threshold)
+            # if part == "model":
+            #     print(hook.outputs)
             # 可打印日志，也可记录更详细信息
             # print(f"{full_name}: {layer_dormant}/{layer_total} -> {layer_dormant / layer_total * 100.0 if layer_total > 0 else 0.0}%")
             part_total += layer_total
diff --git a/zoo/atari/config/atari_unizero_segment_config.py b/zoo/atari/config/atari_unizero_segment_config.py
@@ -49,8 +49,8 @@ def main(env_id, seed):
             n_evaluator_episode=evaluator_env_num,
             manager=dict(shared_memory=False, ),
             # TODO: only for debug
-            # collect_max_episode_steps=int(50),
-            # eval_max_episode_steps=int(50),
+            # collect_max_episode_steps=int(20),
+            # eval_max_episode_steps=int(20),
         ),
         policy=dict(
             learn=dict(learner=dict(hook=dict(save_ckpt_after_iter=1000000, ), ), ),  # default is 10000
@@ -102,6 +102,7 @@ def main(env_id, seed):
             num_simulations=num_simulations,
             num_segments=num_segments,
             td_steps=5,
+            # train_start_after_envsteps=0, # only for debug
             train_start_after_envsteps=2000,
             game_segment_length=game_segment_length,
             grad_clip_value=5,
@@ -137,6 +138,7 @@ def main(env_id, seed):
 
     # ============ use muzero_segment_collector instead of muzero_collector =============
     from lzero.entry import train_unizero_segment
+    # TODO: only for debug
     main_config.exp_name = f'data_unizero_atari_st_lop/{env_id[:-14]}/{env_id[:-14]}_uz_brf{buffer_reanalyze_freq}-rbs{reanalyze_batch_size}-rp{reanalyze_partition}_nlayer{num_layers}_numsegments-{num_segments}_gsl{game_segment_length}_rr{replay_ratio}_Htrain{num_unroll_steps}-Hinfer{infer_context_length}_bs{batch_size}_seed{seed}'
     train_unizero_segment([main_config, create_config], seed=seed, model_path=main_config.policy.model_path, max_env_step=max_env_step)