diff --git a/docs/performance-summary.md b/docs/performance-summary.md
index 3876e485..791eb7b9 100644
--- a/docs/performance-summary.md
+++ b/docs/performance-summary.md
@@ -46,14 +46,20 @@ The performance data includes:
 
 | Model | #-GPUs | GBS | MBS | Sequence Length | FSDP | TP | SP | PP | CP | VP | EP | Model TFLOP / sec / GPU |
 |-------|--------|-----|-----|-----------------|------|----|----|----|----|----|----|-------------------------|
-|Wan 2.1 14B|32|64|1|37440|0|1|0|1|4|0|0|787.59|
+|Wan 2.1 14B|32|64|1|37440|1|1|0|1|2|0|0|899.62|
 
 
 #### System: DGX-GB300
 
 | Model | #-GPUs | GBS | MBS | Sequence Length | FSDP | TP | SP | PP | CP | VP | EP | Model TFLOP / sec / GPU |
 |-------|--------|-----|-----|-----------------|------|----|----|----|----|----|----|-------------------------|
-|Wan 2.1 14B|32|64|1|37440|0|1|0|1|2|0|0|1,022.26|
+|Wan 2.1 14B|32|64|1|37440|0|1|0|1|2|0|0|1,030.67|
+
+#### System: DGX-B200
+
+| Model | #-GPUs | GBS | MBS | Sequence Length | FSDP | TP | SP | PP | CP | VP | EP | Model TFLOP / sec / GPU |
+|-------|--------|-----|-----|-----------------|------|----|----|----|----|----|----|-------------------------|
+|Wan 2.1 14B|32|64|1|37440|1|1|0|1|2|0|0|804.02|
 
 #### System: DGX-H100
 
diff --git a/examples/megatron/recipes/wan/README_perf_test.md b/examples/megatron/recipes/wan/README_perf_test.md
index d95a4c14..d101ca33 100644
--- a/examples/megatron/recipes/wan/README_perf_test.md
+++ b/examples/megatron/recipes/wan/README_perf_test.md
@@ -5,7 +5,7 @@ This guide provides concise steps to set up the environment and run WAN pretrain
 ## Container Launch
 
 ```bash
-CONT="nvcr.io/nvidia/nemo:25.09.00"
+CONT="nvcr.io/nvidia/nemo:25.11"
 MOUNT="/lustre/fsw/:/lustre/fsw/"
 
 srun -t 02:00:00 \
@@ -28,18 +28,18 @@ cd /opt/
 
 # DFM (pinned)
 git clone --no-checkout https://github.com/NVIDIA-NeMo/DFM.git
-git -C DFM checkout 174bb7b34de002ebbbcae1ba8e2b12363c7dee01
+git -C DFM checkout 9eaace14995a724c982fe53726a909be2edc93cb
 export DFM_PATH=/opt/DFM
 
 # Megatron-Bridge (pinned)
 rm -rf /opt/Megatron-Bridge
-git clone --no-checkout https://github.com/huvunvidia/Megatron-Bridge.git
-git -C Megatron-Bridge checkout 713ab548e4bfee307eb94a7bb3f57c17dbb31b50
+git clone --no-checkout https://github.com/NVIDIA-NeMo/Megatron-Bridge.git
+git -C Megatron-Bridge checkout 953aabf75c0500180dc14a6a76cf9e7e7c4baec7
 
 # Megatron-LM (pinned)
 rm -rf /opt/Megatron-LM
 git clone --no-checkout https://github.com/NVIDIA/Megatron-LM.git
-git -C Megatron-LM checkout ce8185cbbe04f38beb74360e878450f2e8525885
+git -C Megatron-LM checkout 2d398b42fd4237fffb553109563d73ac099751c3
 
 # Python path
 export PYTHONPATH="${DFM_PATH}/.:/opt/Megatron-Bridge/.:/opt/Megatron-LM"
@@ -141,7 +141,13 @@ NVTE_FUSED_ATTN=1 torchrun --nproc_per_node=8 examples/megatron/recipes/wan/pret
 ### Using mock data (optional, for debugging)
 
 - Using `--mock` argument.
-- Adjust `video_size` (F_latents, H_latents, W_latents) and `number_packed_samples` of `WanMockDataModuleConfig` in `wan.py`. Total `seq_len = F * H * W * number_packed_samples`.
+- Adjust `video_size` (F_latents, H_latents, W_latents) and `number_packed_samples` of `WanMockDataModuleConfig` in `wan.py`. Total `seq_len = (F_latents // patch_temporal) * (H_latents // patch_temporal) * (W_latents // patch_temporal) * number_packed_samples`.
+
+### Reproducing performance recipes
+
+- Please use the appropriate system config recipe in `examples/megatron/recipes/wan/conf/<h100/gb200/gb300>_perf_pretrain_mock.yaml`
+- Usage example `examples/megatron/recipes/wan/pretrain_wan.py --mock --training-mode pretrain --config-file examples/megatron/recipes/wan/conf/gb300_perf_pretrain_mock.yaml`
+- Note that the FLOPs calculation for Wan 2.1 is not currently supported in Megatron-Bridge. Please use a manual calculator until a fix is made.
 
 ## Inference
 
diff --git a/examples/megatron/recipes/wan/conf/b200_perf_pretrain_mock.yaml b/examples/megatron/recipes/wan/conf/b200_perf_pretrain_mock.yaml
new file mode 100644
index 00000000..13fc0c8d
--- /dev/null
+++ b/examples/megatron/recipes/wan/conf/b200_perf_pretrain_mock.yaml
@@ -0,0 +1,43 @@
+model:
+  tensor_model_parallel_size: 1
+  sequence_parallel: false
+  pipeline_model_parallel_size: 1
+  context_parallel_size: 2
+  crossattn_emb_size: 5120
+  hidden_size: 5120
+  ffn_hidden_size: 13824
+  num_attention_heads: 40
+  num_layers: 40
+  qkv_format: thd
+  seq_length: 2048 # This is not used
+
+train:
+  global_batch_size: 64
+  micro_batch_size: 1
+  eval_iters: 0
+
+scheduler:
+  lr_decay_style: constant
+  lr_warmup_iters: 0
+
+optimizer:
+  lr: 5e-6
+  min_lr: 5e-6
+
+dataset:
+  seq_length: 2048 # This is not used
+  global_batch_size: 64
+  micro_batch_size: 1
+
+logger:
+  log_interval: 1
+
+ddp:
+  use_megatron_fsdp: true
+  data_parallel_sharding_strategy: "optim_grads_params"
+
+dist:
+  use_megatron_fsdp: true
+
+checkpoint:
+  ckpt_format: "fsdp_dtensor"
diff --git a/examples/megatron/recipes/wan/conf/gb200_perf_pretrain_mock.yaml b/examples/megatron/recipes/wan/conf/gb200_perf_pretrain_mock.yaml
index 7b170d36..13fc0c8d 100644
--- a/examples/megatron/recipes/wan/conf/gb200_perf_pretrain_mock.yaml
+++ b/examples/megatron/recipes/wan/conf/gb200_perf_pretrain_mock.yaml
@@ -2,7 +2,7 @@ model:
   tensor_model_parallel_size: 1
   sequence_parallel: false
   pipeline_model_parallel_size: 1
-  context_parallel_size: 4
+  context_parallel_size: 2
   crossattn_emb_size: 5120
   hidden_size: 5120
   ffn_hidden_size: 13824
@@ -31,3 +31,13 @@ dataset:
 
 logger:
   log_interval: 1
+
+ddp:
+  use_megatron_fsdp: true
+  data_parallel_sharding_strategy: "optim_grads_params"
+
+dist:
+  use_megatron_fsdp: true
+
+checkpoint:
+  ckpt_format: "fsdp_dtensor"