Store and retrieve hash_zch_runtime_meta within mc_embbedding submodules (#3599)

Joey Yang · facebook-github-bot · commit bfe3db34d459 · 2025-12-10T12:42:34.000-08:00
Summary: See https://fb.workplace.com/groups/1404957374198553/permalink/1610214197006202/ Similar to D85999577, we store the `hash_zch_runtime_meta` when it is being look up in `mc_modules.py` in `raw_id_tracker` then access it in `batched_embedding_kernel.py` which will then be streamed to the inference side (see the other diffs in this stack D87810125) Reviewed By: chouxi Differential Revision: D88623165
diff --git a/torchrec/distributed/batched_embedding_kernel.py b/torchrec/distributed/batched_embedding_kernel.py
@@ -1758,19 +1758,29 @@ def init_parameters(self) -> None:
                     weight_init_max,
                 )
 
-    def forward(self, features: KeyedJaggedTensor) -> torch.Tensor:
-        hash_zch_identities = self._get_hash_zch_identities(features)
-        if hash_zch_identities is None:
+    def forward(
+        self,
+        features: KeyedJaggedTensor,
+    ) -> torch.Tensor:
+        forward_args: Dict[str, Any] = {}
+        identities_and_metadata = self._get_hash_zch_identities_and_metadata(features)
+        if identities_and_metadata is not None:
+            hash_zch_identities, hash_zch_runtime_meta = identities_and_metadata
+            forward_args["hash_zch_identities"] = hash_zch_identities
+            if hash_zch_runtime_meta is not None:
+                forward_args["hash_zch_runtime_meta"] = hash_zch_runtime_meta
+
+        if len(forward_args) == 0:
             return self.emb_module(
                 indices=features.values().long(),
                 offsets=features.offsets().long(),
             )
-
-        return self.emb_module(
-            indices=features.values().long(),
-            offsets=features.offsets().long(),
-            hash_zch_identities=hash_zch_identities,
-        )
+        else:
+            return self.emb_module(
+                indices=features.values().long(),
+                offsets=features.offsets().long(),
+                **forward_args,
+            )
 
     # pyre-fixme[14]: `state_dict` overrides method defined in `Module` inconsistently.
     def state_dict(
@@ -2832,9 +2842,12 @@ def forward(
         features: KeyedJaggedTensor,
     ) -> torch.Tensor:
         forward_args: Dict[str, Any] = {}
-        hash_zch_identities = self._get_hash_zch_identities(features)
-        if hash_zch_identities is not None:
+        identities_and_metadata = self._get_hash_zch_identities_and_metadata(features)
+        if identities_and_metadata is not None:
+            hash_zch_identities, hash_zch_runtime_meta = identities_and_metadata
             forward_args["hash_zch_identities"] = hash_zch_identities
+            if hash_zch_runtime_meta is not None:
+                forward_args["hash_zch_runtime_meta"] = hash_zch_runtime_meta
 
         weights = features.weights_or_none()
         if weights is not None and not torch.is_floating_point(weights):
diff --git a/torchrec/distributed/embedding_kernel.py b/torchrec/distributed/embedding_kernel.py
@@ -110,9 +110,9 @@ def init_raw_id_tracker(
                 get_indexed_lookups, delete
             )
 
-    def _get_hash_zch_identities(
+    def _get_hash_zch_identities_and_metadata(
         self, features: KeyedJaggedTensor
-    ) -> Optional[torch.Tensor]:
+    ) -> Optional[Tuple[torch.Tensor, Optional[torch.Tensor]]]:
         if self._raw_id_tracker_wrapper is None or not isinstance(
             self.emb_module, SplitTableBatchedEmbeddingBagsCodegen
         ):
@@ -131,7 +131,7 @@ def _get_hash_zch_identities(
         # across multiple training iterations. Current logic appends raw_ids from
         # all batches sequentially. This may cause misalignment with
         # features.values() which only contains the current batch.
-        raw_ids_dict = raw_id_tracker_wrapper.get_indexed_lookups(
+        indexed_lookups_dict = raw_id_tracker_wrapper.get_indexed_lookups(
             table_names, emb_module.uuid
         )
 
@@ -148,11 +148,14 @@ def _get_hash_zch_identities(
         # raw_ids are included. If some tables lack identity while others have them,
         # padding with -1 may be needed to maintain alignment.
         all_raw_ids = []
+        all_runtime_meta = []
         for table_name in table_names:
-            if table_name in raw_ids_dict:
-                raw_ids_list = raw_ids_dict[table_name]
+            if table_name in indexed_lookups_dict:
+                raw_ids_list, runtime_meta_list = indexed_lookups_dict[table_name]
                 for raw_ids in raw_ids_list:
                     all_raw_ids.append(raw_ids)
+                for runtime_meta in runtime_meta_list:
+                    all_runtime_meta.append(runtime_meta)
 
         if not all_raw_ids:
             return None
@@ -162,7 +165,16 @@ def _get_hash_zch_identities(
             f"hash_zch_identities row count ({hash_zch_identities.size(0)}) must match "
             f"features.values() length ({features.values().numel()}) to maintain 1-to-1 alignment"
         )
-        return hash_zch_identities
+
+        if all_runtime_meta:
+            hash_zch_runtime_meta = torch.cat(all_runtime_meta)
+            assert hash_zch_runtime_meta.size(0) == hash_zch_identities.size(0), (
+                f"hash_zch_runtime_meta row count ({hash_zch_runtime_meta.size(0)}) must match "
+                f"hash_zch_identities length ({hash_zch_identities.size(0)}) to maintain 1-to-1 alignment"
+            )
+            return (hash_zch_identities, hash_zch_runtime_meta)
+        else:
+            return (hash_zch_identities, None)
 
 
 def create_virtual_table_local_metadata(
diff --git a/torchrec/distributed/mc_modules.py b/torchrec/distributed/mc_modules.py
@@ -245,6 +245,7 @@ def __init__(
                     torch.Tensor,
                     Optional[nn.Module],
                     Optional[torch.Tensor],
+                    Optional[torch.Tensor],
                 ],
                 None,
             ]
@@ -716,6 +717,31 @@ def global_to_local_index(
             jt._values = jt.values() - self._table_to_offset[table]
         return jt_dict
 
+    def _retrieve_and_track_hash_zch_identities_and_metadata(
+        self,
+        mcm: nn.Module,
+        mc_input: Dict[str, JaggedTensor],
+        indices: torch.Tensor,
+    ) -> None:
+        if self.post_lookup_tracker_fn is None:
+            return
+        if not hasattr(mcm, "_hash_zch_identities"):
+            return
+        # _hash_zch_identities should always exist but _hash_zch_runtime_meta is optional
+        runtime_meta = None
+        if (
+            hasattr(mcm, "_hash_zch_runtime_meta")
+            and mcm._hash_zch_runtime_meta is not None
+        ):
+            runtime_meta = mcm._hash_zch_runtime_meta.index_select(dim=0, index=indices)
+        self.post_lookup_tracker_fn(
+            KeyedJaggedTensor.from_jt_dict(mc_input),
+            torch.empty(0),
+            None,
+            mcm._hash_zch_identities.index_select(dim=0, index=indices),
+            runtime_meta,
+        )
+
     def compute(
         self,
         ctx: ManagedCollisionCollectionContext,
@@ -758,19 +784,9 @@ def compute(
                     mc_input = mcm.remap(mc_input)
                     mc_input = self.global_to_local_index(mc_input)
                     output.update(mc_input)
-                    if hasattr(
-                        mcm,
-                        "_hash_zch_identities",
-                    ):
-                        if self.post_lookup_tracker_fn is not None:
-                            self.post_lookup_tracker_fn(
-                                KeyedJaggedTensor.from_jt_dict(mc_input),
-                                torch.empty(0),
-                                None,
-                                mcm._hash_zch_identities.index_select(
-                                    dim=0, index=mc_input[table].values()
-                                ),
-                            )
+                    self._retrieve_and_track_hash_zch_identities_and_metadata(
+                        mcm, mc_input, mc_input[table].values()
+                    )
                 values = torch.cat([jt.values() for jt in output.values()])
             else:
                 table: str = tables[0]
@@ -789,14 +805,9 @@ def compute(
                 mc_input = mcm.remap(mc_input)
                 mc_input = self.global_to_local_index(mc_input)
                 values = mc_input[table].values()
-                if hasattr(mcm, "_hash_zch_identities"):
-                    if self.post_lookup_tracker_fn is not None:
-                        self.post_lookup_tracker_fn(
-                            KeyedJaggedTensor.from_jt_dict(mc_input),
-                            torch.empty(0),
-                            None,
-                            mcm._hash_zch_identities.index_select(dim=0, index=values),
-                        )
+                self._retrieve_and_track_hash_zch_identities_and_metadata(
+                    mcm, mc_input, values
+                )
 
             remapped_kjts.append(
                 KeyedJaggedTensor(
@@ -895,6 +906,7 @@ def register_post_lookup_tracker_fn(
                 torch.Tensor,
                 Optional[nn.Module],
                 Optional[torch.Tensor],
+                Optional[torch.Tensor],
             ],
             None,
         ],