diff --git a/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py b/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py index 7b74a6879ad..0578b10524a 100644 --- a/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py +++ b/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py @@ -1,3 +1,4 @@ +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. import os os.environ["OPENBLAS_NUM_THREADS"] = "1" @@ -63,6 +64,9 @@ def collect_train_test_metrics( "lm loss", "num-zeros", "mtp_1 loss", + "load_balancing_loss", + "seq_load_balancing_loss", + "global_load_balancing_loss", ] } diff --git a/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py b/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py index 58311542ee9..ff5d113adf9 100644 --- a/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py +++ b/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. import logging from typing import Dict, List, Optional @@ -19,6 +19,15 @@ "num-zeros": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)], "generated_tokens": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)], "logprobs": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)], + "load_balancing_loss": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)], + "seq_load_balancing_loss": [ + common.DeterministicTest(), + common.ApproximateTest(atol=0, rtol=0.05), + ], + "global_load_balancing_loss": [ + common.DeterministicTest(), + common.ApproximateTest(atol=0, rtol=0.05), + ], } diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp1pp1ep8/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp1pp1ep8/golden_values_dev_dgx_h100.json index e69de29bb2d..ed26aeeb715 100644 --- a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp1pp1ep8/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp1pp1ep8/golden_values_dev_dgx_h100.json @@ -0,0 +1,858 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.10395, + "2": 11.12719, + "3": 9.79737, + "4": 9.78134, + "5": 9.94524, + "6": 9.48234, + "7": 9.32716, + "8": 8.85523, + "9": 8.76756, + "10": 8.98682, + "11": 8.45161, + "12": 8.47042, + "13": 8.37702, + "14": 7.82258, + "15": 8.41901, + "16": 8.01239, + "17": 7.94826, + "18": 7.69058, + "19": 8.06159, + "20": 7.78236, + "21": 7.46686, + "22": 7.44589, + "23": 7.30457, + "24": 7.29311, + "25": 7.58017, + "26": 6.98366, + "27": 7.4971, + "28": 7.22841, + "29": 7.40124, + "30": 7.51308, + "31": 7.294, + "32": 7.49667, + "33": 7.53405, + "34": 7.57605, + "35": 7.11551, + "36": 6.98266, + "37": 7.3547, + "38": 7.10146, + "39": 7.45514, + "40": 7.47661, + "41": 7.38493, + "42": 7.16196, + "43": 7.16146, + "44": 7.32321, + "45": 7.06075, + "46": 6.85869, + "47": 7.1873, + "48": 7.0107, + "49": 7.499, + "50": 6.91727, + "51": 6.99178, + "52": 7.33037, + "53": 7.29667, + "54": 7.19029, + "55": 6.90141, + "56": 7.26158, + "57": 6.96528, + "58": 7.23665, + "59": 7.13426, + "60": 6.55776, + "61": 6.78483, + "62": 7.21469, + "63": 7.27801, + "64": 6.68571, + "65": 7.20837, + "66": 7.41631, + "67": 7.34739, + "68": 6.92584, + "69": 6.90426, + "70": 6.83209, + "71": 6.82068, + "72": 6.93784, + "73": 7.01123, + "74": 6.98841, + "75": 6.91686, + "76": 6.38111, + "77": 7.27777, + "78": 6.82414, + "79": 6.71131, + "80": 6.882, + "81": 6.68271, + "82": 7.19003, + "83": 6.86414, + "84": 6.81403, + "85": 7.02441, + "86": 6.89644, + "87": 6.986, + "88": 6.9439, + "89": 6.72755, + "90": 6.90594, + "91": 6.53608, + "92": 6.53464, + "93": 6.64445, + "94": 6.86298, + "95": 7.01716, + "96": 7.20844, + "97": 7.00836, + "98": 6.83151, + "99": 6.91421, + "100": 6.88296 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 38804132.0, + "2": 38545096.0, + "3": 38740992.0, + "4": 191860448.0, + "5": 469945088.0, + "6": 554567872.0, + "7": 828155904.0, + "8": 696552320.0, + "9": 711859520.0, + "10": 693895808.0, + "11": 677958720.0, + "12": 557933504.0, + "13": 598245696.0, + "14": 522736704.0, + "15": 274421376.0, + "16": 617690496.0, + "17": 592303488.0, + "18": 661689984.0, + "19": 935433728.0, + "20": 769077888.0, + "21": 605350016.0, + "22": 541961024.0, + "23": 577050304.0, + "24": 669105408.0, + "25": 780257472.0, + "26": 837892032.0, + "27": 813196736.0, + "28": 755681920.0, + "29": 770925056.0, + "30": 816536448.0, + "31": 822088000.0, + "32": 812650432.0, + "33": 620913152.0, + "34": 784546752.0, + "35": 787994368.0, + "36": 750190400.0, + "37": 748699328.0, + "38": 501445568.0, + "39": 717859200.0, + "40": 667057408.0, + "41": 654946368.0, + "42": 501043680.0, + "43": 636546304.0, + "44": 639180672.0, + "45": 602875008.0, + "46": 617602112.0, + "47": 551064384.0, + "48": 548162048.0, + "49": 522391168.0, + "50": 510662592.0, + "51": 517637248.0, + "52": 479262816.0, + "53": 471618368.0, + "54": 451708448.0, + "55": 430039712.0, + "56": 442478208.0, + "57": 459896384.0, + "58": 422629184.0, + "59": 420309376.0, + "60": 359403680.0, + "61": 319651520.0, + "62": 271950464.0, + "63": 240061840.0, + "64": 168316704.0, + "65": 224401456.0, + "66": 208531360.0, + "67": 131824448.0, + "68": 183079904.0, + "69": 173435008.0, + "70": 179471232.0, + "71": 152666272.0, + "72": 170013696.0, + "73": 164094544.0, + "74": 149156896.0, + "75": 142684336.0, + "76": 90571040.0, + "77": 125834840.0, + "78": 130033592.0, + "79": 120469488.0, + "80": 127397512.0, + "81": 116401728.0, + "82": 76397656.0, + "83": 113964200.0, + "84": 98199616.0, + "85": 98272368.0, + "86": 91851520.0, + "87": 89356744.0, + "88": 89715232.0, + "89": 89418272.0, + "90": 85301760.0, + "91": 82131936.0, + "92": 74720312.0, + "93": 93503488.0, + "94": 57433660.0, + "95": 86022384.0, + "96": 78318800.0, + "97": 79639360.0, + "98": 86005352.0, + "99": 79678312.0, + "100": 73739608.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 7793740800.0, + "2": 7793742848.0, + "3": 7793742848.0, + "4": 7793742848.0, + "5": 7793742848.0, + "6": 7793742848.0, + "7": 7793742848.0, + "8": 7793742848.0, + "9": 7793742848.0, + "10": 7793742848.0, + "11": 7793742848.0, + "12": 7793742848.0, + "13": 7793742848.0, + "14": 7793742848.0, + "15": 7793742848.0, + "16": 7793742848.0, + "17": 7793742848.0, + "18": 7793742848.0, + "19": 7793742848.0, + "20": 7793742848.0, + "21": 7793742848.0, + "22": 7793742848.0, + "23": 7793742848.0, + "24": 7793742848.0, + "25": 7793742848.0, + "26": 7793742848.0, + "27": 7793742848.0, + "28": 7793742848.0, + "29": 7793742848.0, + "30": 7793742848.0, + "31": 7793742848.0, + "32": 7793742848.0, + "33": 7793742848.0, + "34": 7793742848.0, + "35": 7793742848.0, + "36": 7793742848.0, + "37": 7793742848.0, + "38": 7793742848.0, + "39": 7793742848.0, + "40": 7793742848.0, + "41": 7793742848.0, + "42": 7793742848.0, + "43": 7793742848.0, + "44": 7793742848.0, + "45": 7793742848.0, + "46": 7793742848.0, + "47": 7793742848.0, + "48": 7793742848.0, + "49": 7793742848.0, + "50": 7793742848.0, + "51": 7793742848.0, + "52": 7793742848.0, + "53": 7793742848.0, + "54": 7793742848.0, + "55": 7793742848.0, + "56": 7793742848.0, + "57": 7793742848.0, + "58": 7793742848.0, + "59": 7793742848.0, + "60": 7793742848.0, + "61": 7793742848.0, + "62": 7793742848.0, + "63": 7793742848.0, + "64": 7793742848.0, + "65": 7793742848.0, + "66": 7793742848.0, + "67": 7793742848.0, + "68": 7793742848.0, + "69": 7793742848.0, + "70": 7793742848.0, + "71": 7793742848.0, + "72": 7793742848.0, + "73": 7793742848.0, + "74": 7793742848.0, + "75": 7793742848.0, + "76": 7793742848.0, + "77": 7793742848.0, + "78": 7793742848.0, + "79": 7793742848.0, + "80": 7793742848.0, + "81": 7793742848.0, + "82": 7793742848.0, + "83": 7793742848.0, + "84": 7793742848.0, + "85": 7793742848.0, + "86": 7793742848.0, + "87": 7793742848.0, + "88": 7793742848.0, + "89": 7793742848.0, + "90": 7793742848.0, + "91": 7793742848.0, + "92": 7793742848.0, + "93": 7793742848.0, + "94": 7793742848.0, + "95": 7793742848.0, + "96": 7793742848.0, + "97": 7793742848.0, + "98": 7793742848.0, + "99": 7793742848.0, + "100": 7793742848.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 43992092672.0, + "2": 45668999168.0, + "3": 45668999168.0, + "4": 45668999168.0, + "5": 45668999168.0, + "6": 45668999168.0, + "7": 45668999168.0, + "8": 45668999168.0, + "9": 45668999168.0, + "10": 45668999168.0, + "11": 45668999168.0, + "12": 45668999168.0, + "13": 45668999168.0, + "14": 45668999168.0, + "15": 45668999168.0, + "16": 45668999168.0, + "17": 45668999168.0, + "18": 45668999168.0, + "19": 45668999168.0, + "20": 45668999168.0, + "21": 45668999168.0, + "22": 45668999168.0, + "23": 46092484608.0, + "24": 46092484608.0, + "25": 46951522304.0, + "26": 46951522304.0, + "27": 47083143168.0, + "28": 47083143168.0, + "29": 47083143168.0, + "30": 47083143168.0, + "31": 47748059136.0, + "32": 47748059136.0, + "33": 47748059136.0, + "34": 47748059136.0, + "35": 47748059136.0, + "36": 47748059136.0, + "37": 47748059136.0, + "38": 47748059136.0, + "39": 47748059136.0, + "40": 47748059136.0, + "41": 47748059136.0, + "42": 47748059136.0, + "43": 47748059136.0, + "44": 47748059136.0, + "45": 47748059136.0, + "46": 47748059136.0, + "47": 47748059136.0, + "48": 47748059136.0, + "49": 47748059136.0, + "50": 47748059136.0, + "51": 47748059136.0, + "52": 47748059136.0, + "53": 47748059136.0, + "54": 47748059136.0, + "55": 47748059136.0, + "56": 47748059136.0, + "57": 47748059136.0, + "58": 47748059136.0, + "59": 47748059136.0, + "60": 47748059136.0, + "61": 47748059136.0, + "62": 47748059136.0, + "63": 47748059136.0, + "64": 47748059136.0, + "65": 47748059136.0, + "66": 47748059136.0, + "67": 47748059136.0, + "68": 47748059136.0, + "69": 47748059136.0, + "70": 47748059136.0, + "71": 47748059136.0, + "72": 47748059136.0, + "73": 47748059136.0, + "74": 47748059136.0, + "75": 47748059136.0, + "76": 47748059136.0, + "77": 47748059136.0, + "78": 47748059136.0, + "79": 47748059136.0, + "80": 47748059136.0, + "81": 47748059136.0, + "82": 47748059136.0, + "83": 47748059136.0, + "84": 47748059136.0, + "85": 47748059136.0, + "86": 47748059136.0, + "87": 47748059136.0, + "88": 47748059136.0, + "89": 47748059136.0, + "90": 47748059136.0, + "91": 47748059136.0, + "92": 47748059136.0, + "93": 47748059136.0, + "94": 47748059136.0, + "95": 47748059136.0, + "96": 47748059136.0, + "97": 47748059136.0, + "98": 47748059136.0, + "99": 47748059136.0, + "100": 47748059136.0 + } + }, + "seq_load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 1.31167, + "2": 1.30926, + "3": 1.43115, + "4": 1.4523, + "5": 1.43234, + "6": 1.4088, + "7": 1.38983, + "8": 1.37384, + "9": 1.35668, + "10": 1.33865, + "11": 1.3262, + "12": 1.31507, + "13": 1.29226, + "14": 1.2764, + "15": 1.26163, + "16": 1.24752, + "17": 1.23291, + "18": 1.21817, + "19": 1.21405, + "20": 1.20285, + "21": 1.19283, + "22": 1.18555, + "23": 1.17712, + "24": 1.17645, + "25": 1.17224, + "26": 1.17145, + "27": 1.16931, + "28": 1.16745, + "29": 1.1672, + "30": 1.16742, + "31": 1.1701, + "32": 1.17163, + "33": 1.16678, + "34": 1.16448, + "35": 1.16211, + "36": 1.16691, + "37": 1.16747, + "38": 1.164, + "39": 1.16224, + "40": 1.16698, + "41": 1.16882, + "42": 1.16984, + "43": 1.1639, + "44": 1.16284, + "45": 1.1701, + "46": 1.16358, + "47": 1.16599, + "48": 1.16776, + "49": 1.16046, + "50": 1.16608, + "51": 1.16425, + "52": 1.15275, + "53": 1.15782, + "54": 1.15722, + "55": 1.15861, + "56": 1.16001, + "57": 1.16516, + "58": 1.15489, + "59": 1.1547, + "60": 1.1595, + "61": 1.16096, + "62": 1.15559, + "63": 1.16035, + "64": 1.16627, + "65": 1.15181, + "66": 1.15714, + "67": 1.1505, + "68": 1.15997, + "69": 1.14824, + "70": 1.15338, + "71": 1.16226, + "72": 1.15278, + "73": 1.15102, + "74": 1.15109, + "75": 1.15299, + "76": 1.14941, + "77": 1.14957, + "78": 1.14798, + "79": 1.14389, + "80": 1.14489, + "81": 1.14861, + "82": 1.14822, + "83": 1.14897, + "84": 1.1325, + "85": 1.14769, + "86": 1.14473, + "87": 1.15004, + "88": 1.15466, + "89": 1.15394, + "90": 1.13727, + "91": 1.14661, + "92": 1.15443, + "93": 1.15798, + "94": 1.14635, + "95": 1.14644, + "96": 1.15251, + "97": 1.15175, + "98": 1.14105, + "99": 1.13608, + "100": 1.13854 + } + }, + "load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 0.0, + "2": 0.0, + "3": 0.0, + "4": 0.0, + "5": 0.0, + "6": 0.0, + "7": 0.0, + "8": 0.0, + "9": 0.0, + "10": 0.0, + "11": 0.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 0.0, + "17": 0.0, + "18": 0.0, + "19": 0.0, + "20": 0.0, + "21": 0.0, + "22": 0.0, + "23": 0.0, + "24": 0.0, + "25": 0.0, + "26": 0.0, + "27": 0.0, + "28": 0.0, + "29": 0.0, + "30": 0.0, + "31": 0.0, + "32": 0.0, + "33": 0.0, + "34": 0.0, + "35": 0.0, + "36": 0.0, + "37": 0.0, + "38": 0.0, + "39": 0.0, + "40": 0.0, + "41": 0.0, + "42": 0.0, + "43": 0.0, + "44": 0.0, + "45": 0.0, + "46": 0.0, + "47": 0.0, + "48": 0.0, + "49": 0.0, + "50": 0.0, + "51": 0.0, + "52": 0.0, + "53": 0.0, + "54": 0.0, + "55": 0.0, + "56": 0.0, + "57": 0.0, + "58": 0.0, + "59": 0.0, + "60": 0.0, + "61": 0.0, + "62": 0.0, + "63": 0.0, + "64": 0.0, + "65": 0.0, + "66": 0.0, + "67": 0.0, + "68": 0.0, + "69": 0.0, + "70": 0.0, + "71": 0.0, + "72": 0.0, + "73": 0.0, + "74": 0.0, + "75": 0.0, + "76": 0.0, + "77": 0.0, + "78": 0.0, + "79": 0.0, + "80": 0.0, + "81": 0.0, + "82": 0.0, + "83": 0.0, + "84": 0.0, + "85": 0.0, + "86": 0.0, + "87": 0.0, + "88": 0.0, + "89": 0.0, + "90": 0.0, + "91": 0.0, + "92": 0.0, + "93": 0.0, + "94": 0.0, + "95": 0.0, + "96": 0.0, + "97": 0.0, + "98": 0.0, + "99": 0.0, + "100": 0.0 + } + }, + "mtp_1 loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.04145, + "2": 11.08739, + "3": 10.61668, + "4": 9.99926, + "5": 9.75955, + "6": 9.46552, + "7": 9.54322, + "8": 8.89647, + "9": 8.73896, + "10": 9.05513, + "11": 8.41543, + "12": 8.43994, + "13": 8.33875, + "14": 7.77286, + "15": 8.08646, + "16": 7.96007, + "17": 7.90493, + "18": 7.6478, + "19": 8.01922, + "20": 7.73356, + "21": 7.40287, + "22": 7.38068, + "23": 7.23358, + "24": 7.23351, + "25": 7.49754, + "26": 6.90513, + "27": 7.42035, + "28": 7.14383, + "29": 7.31982, + "30": 7.42647, + "31": 7.18569, + "32": 7.37901, + "33": 7.42072, + "34": 7.46728, + "35": 7.00228, + "36": 6.86059, + "37": 7.21489, + "38": 6.9813, + "39": 7.33525, + "40": 7.32636, + "41": 7.24717, + "42": 7.01148, + "43": 7.00395, + "44": 7.17351, + "45": 6.89802, + "46": 6.68257, + "47": 7.02913, + "48": 6.85296, + "49": 7.29132, + "50": 6.76494, + "51": 6.80934, + "52": 7.13279, + "53": 7.09017, + "54": 7.00164, + "55": 6.72661, + "56": 7.09721, + "57": 6.82634, + "58": 7.06382, + "59": 6.96457, + "60": 6.41357, + "61": 6.65804, + "62": 7.03102, + "63": 7.09771, + "64": 6.5355, + "65": 7.03566, + "66": 7.23339, + "67": 7.19156, + "68": 6.78603, + "69": 6.74922, + "70": 6.69599, + "71": 6.67986, + "72": 6.79873, + "73": 6.87904, + "74": 6.82716, + "75": 6.801, + "76": 6.21161, + "77": 7.12782, + "78": 6.67592, + "79": 6.5781, + "80": 6.73393, + "81": 6.53107, + "82": 7.05013, + "83": 6.76328, + "84": 6.6967, + "85": 6.90902, + "86": 6.75866, + "87": 6.84258, + "88": 6.83459, + "89": 6.62726, + "90": 6.81391, + "91": 6.40106, + "92": 6.42113, + "93": 6.51575, + "94": 6.74897, + "95": 6.88457, + "96": 7.05681, + "97": 6.8961, + "98": 6.70371, + "99": 6.78429, + "100": 6.77422 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": "nan", + "2": 29.20842, + "3": 0.90408, + "4": 0.79392, + "5": 0.98455, + "6": 0.92658, + "7": 0.87081, + "8": 0.58812, + "9": 0.58921, + "10": 0.59382, + "11": 0.59422, + "12": 0.64475, + "13": 0.6309, + "14": 0.58156, + "15": 0.58459, + "16": 0.56547, + "17": 0.55768, + "18": 0.60495, + "19": 0.58795, + "20": 0.5731, + "21": 0.60997, + "22": 0.57299, + "23": 0.6361, + "24": 0.64423, + "25": 0.60363, + "26": 0.56674, + "27": 0.59934, + "28": 0.56335, + "29": 0.55038, + "30": 0.55164, + "31": 0.56835, + "32": 0.55198, + "33": 0.5536, + "34": 0.55786, + "35": 0.57304, + "36": 0.5642, + "37": 0.56948, + "38": 0.58947, + "39": 0.55441, + "40": 0.5543, + "41": 0.54155, + "42": 0.54251, + "43": 0.54028, + "44": 0.53438, + "45": 0.53459, + "46": 0.52824, + "47": 0.52012, + "48": 0.52134, + "49": 0.5343, + "50": 0.51867, + "51": 0.51689, + "52": 0.51542, + "53": 0.51033, + "54": 0.51783, + "55": 0.51485, + "56": 0.52317, + "57": 0.52653, + "58": 0.524, + "59": 0.52576, + "60": 0.50615, + "61": 0.50858, + "62": 0.50825, + "63": 0.50068, + "64": 0.50985, + "65": 0.5015, + "66": 0.50584, + "67": 0.50815, + "68": 0.49342, + "69": 0.49008, + "70": 0.48751, + "71": 0.49095, + "72": 0.48433, + "73": 0.48575, + "74": 0.48409, + "75": 0.49508, + "76": 0.49142, + "77": 0.48295, + "78": 0.48614, + "79": 0.48561, + "80": 0.48939, + "81": 0.48319, + "82": 0.49521, + "83": 0.48456, + "84": 0.49245, + "85": 0.47886, + "86": 0.48679, + "87": 0.48639, + "88": 0.4889, + "89": 0.48387, + "90": 0.48797, + "91": 0.48463, + "92": 0.50283, + "93": 0.48255, + "94": 0.48359, + "95": 0.46856, + "96": 0.47002, + "97": 0.46896, + "98": 0.47559, + "99": 0.46956, + "100": 0.47554 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4/golden_values_dev_dgx_h100.json index e69de29bb2d..4bf5ada1664 100644 --- a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4/golden_values_dev_dgx_h100.json @@ -0,0 +1,858 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.07142, + "2": 11.05413, + "3": 9.41422, + "4": 9.29981, + "5": 9.25564, + "6": 9.30969, + "7": 9.18591, + "8": 8.81179, + "9": 8.65551, + "10": 8.87463, + "11": 8.43057, + "12": 8.40999, + "13": 8.31283, + "14": 7.84594, + "15": 7.95449, + "16": 7.96506, + "17": 7.93124, + "18": 7.64066, + "19": 8.01007, + "20": 7.74226, + "21": 7.42989, + "22": 7.41017, + "23": 7.29166, + "24": 7.26189, + "25": 7.54958, + "26": 6.95772, + "27": 7.46791, + "28": 7.21967, + "29": 7.37, + "30": 7.46901, + "31": 7.26375, + "32": 7.44362, + "33": 7.48341, + "34": 7.5155, + "35": 7.07913, + "36": 6.93245, + "37": 7.27953, + "38": 7.05234, + "39": 7.39657, + "40": 7.41941, + "41": 7.34297, + "42": 7.09536, + "43": 7.09448, + "44": 7.25638, + "45": 6.98753, + "46": 6.77374, + "47": 7.11204, + "48": 6.92353, + "49": 7.42381, + "50": 6.83272, + "51": 6.91704, + "52": 7.23123, + "53": 7.20727, + "54": 7.07761, + "55": 6.79735, + "56": 7.17516, + "57": 6.8886, + "58": 7.13828, + "59": 7.04353, + "60": 6.46753, + "61": 6.69176, + "62": 7.1224, + "63": 7.16815, + "64": 6.58276, + "65": 7.11279, + "66": 7.30825, + "67": 7.24415, + "68": 6.83509, + "69": 6.80037, + "70": 6.7352, + "71": 6.72572, + "72": 6.85652, + "73": 6.90321, + "74": 6.88074, + "75": 6.82241, + "76": 6.28543, + "77": 7.17591, + "78": 6.73455, + "79": 6.62453, + "80": 6.79539, + "81": 6.58539, + "82": 7.10337, + "83": 6.7708, + "84": 6.73138, + "85": 6.94173, + "86": 6.79154, + "87": 6.89479, + "88": 6.84261, + "89": 6.61959, + "90": 6.82293, + "91": 6.44479, + "92": 6.43616, + "93": 6.54708, + "94": 6.77269, + "95": 6.91905, + "96": 7.10722, + "97": 6.91774, + "98": 6.73085, + "99": 6.81612, + "100": 6.78873 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 38802304.0, + "2": 38543316.0, + "3": 38739716.0, + "4": 254770176.0, + "5": 350425600.0, + "6": 425638016.0, + "7": 623703296.0, + "8": 806625152.0, + "9": 721296384.0, + "10": 637289920.0, + "11": 652775296.0, + "12": 576820096.0, + "13": 739812992.0, + "14": 667453312.0, + "15": 683353728.0, + "16": 715193856.0, + "17": 674052480.0, + "18": 677406848.0, + "19": 781333632.0, + "20": 872815680.0, + "21": 621074432.0, + "22": 645730432.0, + "23": 649419008.0, + "24": 766614784.0, + "25": 638704448.0, + "26": 611412288.0, + "27": 548973952.0, + "28": 588975296.0, + "29": 660812544.0, + "30": 464224448.0, + "31": 645924160.0, + "32": 551573312.0, + "33": 529652736.0, + "34": 397634976.0, + "35": 379052736.0, + "36": 419916000.0, + "37": 522206816.0, + "38": 485687488.0, + "39": 488229504.0, + "40": 475196160.0, + "41": 554277248.0, + "42": 479008096.0, + "43": 457256128.0, + "44": 488179392.0, + "45": 455024832.0, + "46": 441437728.0, + "47": 450395840.0, + "48": 469518272.0, + "49": 453178848.0, + "50": 409987936.0, + "51": 401248992.0, + "52": 384889312.0, + "53": 377247616.0, + "54": 319588928.0, + "55": 291620960.0, + "56": 278896608.0, + "57": 299464352.0, + "58": 315668032.0, + "59": 278745696.0, + "60": 249305328.0, + "61": 215837888.0, + "62": 199600496.0, + "63": 186579840.0, + "64": 168300384.0, + "65": 170928288.0, + "66": 183364480.0, + "67": 188439072.0, + "68": 173639616.0, + "69": 148267152.0, + "70": 132280824.0, + "71": 121204288.0, + "72": 113390472.0, + "73": 113757880.0, + "74": 114551080.0, + "75": 111222240.0, + "76": 99998992.0, + "77": 84941728.0, + "78": 60830400.0, + "79": 63845180.0, + "80": 70773288.0, + "81": 62921688.0, + "82": 70094696.0, + "83": 63630304.0, + "84": 60446480.0, + "85": 63673132.0, + "86": 54102528.0, + "87": 54751776.0, + "88": 51964280.0, + "89": 45373792.0, + "90": 47550896.0, + "91": 47524304.0, + "92": 46403888.0, + "93": 49461412.0, + "94": 41698740.0, + "95": 51417496.0, + "96": 43717368.0, + "97": 45034096.0, + "98": 51400200.0, + "99": 45072824.0, + "100": 51712180.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 6622729728.0, + "2": 6625564672.0, + "3": 6626720768.0, + "4": 6623774208.0, + "5": 6629797888.0, + "6": 6626633728.0, + "7": 6621863936.0, + "8": 6618180608.0, + "9": 6617389568.0, + "10": 6616993792.0, + "11": 6622841344.0, + "12": 6625548800.0, + "13": 6631774208.0, + "14": 6626592768.0, + "15": 6622616064.0, + "16": 6625446400.0, + "17": 6625302016.0, + "18": 6625773056.0, + "19": 6629479424.0, + "20": 6633257472.0, + "21": 6630202368.0, + "22": 6628777984.0, + "23": 6633977856.0, + "24": 6628515840.0, + "25": 6631868416.0, + "26": 6628221952.0, + "27": 6631277568.0, + "28": 6631871488.0, + "29": 6634893312.0, + "30": 6636029952.0, + "31": 6635680256.0, + "32": 6636499456.0, + "33": 6636167680.0, + "34": 6633662464.0, + "35": 6628590080.0, + "36": 6626011648.0, + "37": 6625255424.0, + "38": 6625512448.0, + "39": 6626657280.0, + "40": 6628428288.0, + "41": 6625785856.0, + "42": 6627491840.0, + "43": 6628086784.0, + "44": 6628125696.0, + "45": 6629608448.0, + "46": 6629067264.0, + "47": 6631359488.0, + "48": 6631199232.0, + "49": 6628061696.0, + "50": 6626971648.0, + "51": 6628839424.0, + "52": 6626890240.0, + "53": 6625215488.0, + "54": 6624706048.0, + "55": 6624573440.0, + "56": 6623612928.0, + "57": 6624032256.0, + "58": 6624287744.0, + "59": 6622080000.0, + "60": 6623175680.0, + "61": 6623362048.0, + "62": 6624685568.0, + "63": 6623989248.0, + "64": 6624293376.0, + "65": 6623753728.0, + "66": 6623067648.0, + "67": 6625096704.0, + "68": 6624457728.0, + "69": 6624243712.0, + "70": 6626114560.0, + "71": 6625946624.0, + "72": 6626314752.0, + "73": 6626580480.0, + "74": 6627305984.0, + "75": 6627757056.0, + "76": 6625823232.0, + "77": 6626132480.0, + "78": 6626560512.0, + "79": 6626621952.0, + "80": 6627099136.0, + "81": 6628377088.0, + "82": 6628811776.0, + "83": 6627793920.0, + "84": 6627970560.0, + "85": 6627587584.0, + "86": 6627512832.0, + "87": 6627816448.0, + "88": 6629760512.0, + "89": 6628537344.0, + "90": 6627757056.0, + "91": 6629210112.0, + "92": 6629846528.0, + "93": 6628005888.0, + "94": 6628604416.0, + "95": 6627637248.0, + "96": 6627229184.0, + "97": 6630202880.0, + "98": 6626966528.0, + "99": 6629649408.0, + "100": 6629208064.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 20054853632.0, + "2": 22790113280.0, + "3": 24304586752.0, + "4": 24304586752.0, + "5": 24304586752.0, + "6": 24304586752.0, + "7": 24304586752.0, + "8": 24304586752.0, + "9": 24304586752.0, + "10": 24304586752.0, + "11": 24304586752.0, + "12": 24304586752.0, + "13": 24304586752.0, + "14": 24304586752.0, + "15": 24304586752.0, + "16": 24304586752.0, + "17": 24304586752.0, + "18": 24304586752.0, + "19": 24304586752.0, + "20": 24304586752.0, + "21": 24304586752.0, + "22": 24304586752.0, + "23": 24304586752.0, + "24": 24304586752.0, + "25": 24304586752.0, + "26": 24304586752.0, + "27": 24304586752.0, + "28": 24423684096.0, + "29": 25248888832.0, + "30": 25704962048.0, + "31": 25708345344.0, + "32": 25708345344.0, + "33": 25708345344.0, + "34": 25708345344.0, + "35": 25708345344.0, + "36": 25708345344.0, + "37": 25708345344.0, + "38": 25708345344.0, + "39": 25708345344.0, + "40": 25708345344.0, + "41": 25708345344.0, + "42": 25708345344.0, + "43": 25708345344.0, + "44": 25708345344.0, + "45": 25708345344.0, + "46": 25708345344.0, + "47": 25708345344.0, + "48": 25708345344.0, + "49": 25708345344.0, + "50": 25708345344.0, + "51": 25708345344.0, + "52": 25708345344.0, + "53": 25708345344.0, + "54": 25708345344.0, + "55": 25708345344.0, + "56": 25708345344.0, + "57": 25708345344.0, + "58": 25708345344.0, + "59": 25708345344.0, + "60": 25708345344.0, + "61": 25708345344.0, + "62": 25708345344.0, + "63": 25708345344.0, + "64": 25708345344.0, + "65": 25708345344.0, + "66": 25708345344.0, + "67": 25708345344.0, + "68": 25708345344.0, + "69": 25708345344.0, + "70": 25708345344.0, + "71": 25708345344.0, + "72": 25708345344.0, + "73": 25708345344.0, + "74": 25708345344.0, + "75": 25708345344.0, + "76": 25708345344.0, + "77": 25708345344.0, + "78": 25708345344.0, + "79": 25708345344.0, + "80": 25708345344.0, + "81": 25708345344.0, + "82": 25708345344.0, + "83": 25708345344.0, + "84": 25708345344.0, + "85": 25708345344.0, + "86": 25708345344.0, + "87": 25708345344.0, + "88": 25708345344.0, + "89": 25708345344.0, + "90": 25708345344.0, + "91": 25708345344.0, + "92": 25708345344.0, + "93": 25708345344.0, + "94": 25708345344.0, + "95": 25708345344.0, + "96": 25708345344.0, + "97": 25708345344.0, + "98": 25708345344.0, + "99": 25708345344.0, + "100": 25708345344.0 + } + }, + "seq_load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 1.32179, + "2": 1.32081, + "3": 1.40544, + "4": 1.44542, + "5": 1.42633, + "6": 1.42782, + "7": 1.3981, + "8": 1.38322, + "9": 1.36748, + "10": 1.34171, + "11": 1.3347, + "12": 1.31512, + "13": 1.28998, + "14": 1.27381, + "15": 1.26177, + "16": 1.23403, + "17": 1.22591, + "18": 1.21757, + "19": 1.21335, + "20": 1.20301, + "21": 1.19458, + "22": 1.19029, + "23": 1.17493, + "24": 1.184, + "25": 1.18292, + "26": 1.17969, + "27": 1.17468, + "28": 1.17074, + "29": 1.1687, + "30": 1.17149, + "31": 1.1727, + "32": 1.17442, + "33": 1.16811, + "34": 1.1674, + "35": 1.16249, + "36": 1.17582, + "37": 1.16965, + "38": 1.17008, + "39": 1.17312, + "40": 1.18051, + "41": 1.18106, + "42": 1.17656, + "43": 1.16793, + "44": 1.17484, + "45": 1.18229, + "46": 1.17324, + "47": 1.17681, + "48": 1.1822, + "49": 1.17302, + "50": 1.18087, + "51": 1.18084, + "52": 1.16689, + "53": 1.16932, + "54": 1.16765, + "55": 1.16703, + "56": 1.17193, + "57": 1.18044, + "58": 1.16039, + "59": 1.1594, + "60": 1.16355, + "61": 1.16596, + "62": 1.14838, + "63": 1.15829, + "64": 1.16945, + "65": 1.14575, + "66": 1.15327, + "67": 1.14786, + "68": 1.15491, + "69": 1.13819, + "70": 1.1518, + "71": 1.15888, + "72": 1.14719, + "73": 1.14296, + "74": 1.14365, + "75": 1.14958, + "76": 1.14487, + "77": 1.13468, + "78": 1.1409, + "79": 1.13758, + "80": 1.13782, + "81": 1.14212, + "82": 1.1399, + "83": 1.14285, + "84": 1.12154, + "85": 1.14184, + "86": 1.13242, + "87": 1.14226, + "88": 1.15319, + "89": 1.15288, + "90": 1.1302, + "91": 1.14227, + "92": 1.15494, + "93": 1.15648, + "94": 1.13536, + "95": 1.13617, + "96": 1.13846, + "97": 1.14565, + "98": 1.12899, + "99": 1.12504, + "100": 1.12607 + } + }, + "load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 0.0, + "2": 0.0, + "3": 0.0, + "4": 0.0, + "5": 0.0, + "6": 0.0, + "7": 0.0, + "8": 0.0, + "9": 0.0, + "10": 0.0, + "11": 0.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 0.0, + "17": 0.0, + "18": 0.0, + "19": 0.0, + "20": 0.0, + "21": 0.0, + "22": 0.0, + "23": 0.0, + "24": 0.0, + "25": 0.0, + "26": 0.0, + "27": 0.0, + "28": 0.0, + "29": 0.0, + "30": 0.0, + "31": 0.0, + "32": 0.0, + "33": 0.0, + "34": 0.0, + "35": 0.0, + "36": 0.0, + "37": 0.0, + "38": 0.0, + "39": 0.0, + "40": 0.0, + "41": 0.0, + "42": 0.0, + "43": 0.0, + "44": 0.0, + "45": 0.0, + "46": 0.0, + "47": 0.0, + "48": 0.0, + "49": 0.0, + "50": 0.0, + "51": 0.0, + "52": 0.0, + "53": 0.0, + "54": 0.0, + "55": 0.0, + "56": 0.0, + "57": 0.0, + "58": 0.0, + "59": 0.0, + "60": 0.0, + "61": 0.0, + "62": 0.0, + "63": 0.0, + "64": 0.0, + "65": 0.0, + "66": 0.0, + "67": 0.0, + "68": 0.0, + "69": 0.0, + "70": 0.0, + "71": 0.0, + "72": 0.0, + "73": 0.0, + "74": 0.0, + "75": 0.0, + "76": 0.0, + "77": 0.0, + "78": 0.0, + "79": 0.0, + "80": 0.0, + "81": 0.0, + "82": 0.0, + "83": 0.0, + "84": 0.0, + "85": 0.0, + "86": 0.0, + "87": 0.0, + "88": 0.0, + "89": 0.0, + "90": 0.0, + "91": 0.0, + "92": 0.0, + "93": 0.0, + "94": 0.0, + "95": 0.0, + "96": 0.0, + "97": 0.0, + "98": 0.0, + "99": 0.0, + "100": 0.0 + } + }, + "mtp_1 loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.10044, + "2": 11.12823, + "3": 10.55213, + "4": 10.04964, + "5": 9.7869, + "6": 9.49344, + "7": 9.57262, + "8": 8.86568, + "9": 8.67845, + "10": 8.98324, + "11": 8.35023, + "12": 8.37974, + "13": 8.28078, + "14": 7.73439, + "15": 7.87749, + "16": 7.92164, + "17": 7.86089, + "18": 7.59423, + "19": 7.97065, + "20": 7.69382, + "21": 7.37375, + "22": 7.3566, + "23": 7.21922, + "24": 7.23312, + "25": 7.48372, + "26": 6.89702, + "27": 7.42049, + "28": 7.14881, + "29": 7.32346, + "30": 7.43162, + "31": 7.20126, + "32": 7.39642, + "33": 7.44971, + "34": 7.48795, + "35": 7.01901, + "36": 6.8818, + "37": 7.23161, + "38": 7.00393, + "39": 7.35246, + "40": 7.35628, + "41": 7.28697, + "42": 7.03143, + "43": 7.01723, + "44": 7.18863, + "45": 6.91106, + "46": 6.6791, + "47": 7.03273, + "48": 6.84651, + "49": 7.30642, + "50": 6.767, + "51": 6.82337, + "52": 7.13373, + "53": 7.10574, + "54": 6.9858, + "55": 6.7203, + "56": 7.09525, + "57": 6.82247, + "58": 7.05035, + "59": 6.94837, + "60": 6.40764, + "61": 6.63637, + "62": 7.02197, + "63": 7.0827, + "64": 6.50953, + "65": 7.03034, + "66": 7.22802, + "67": 7.16716, + "68": 6.76767, + "69": 6.73838, + "70": 6.68015, + "71": 6.65566, + "72": 6.78079, + "73": 6.86062, + "74": 6.81465, + "75": 6.77854, + "76": 6.19405, + "77": 7.1145, + "78": 6.67143, + "79": 6.56991, + "80": 6.72563, + "81": 6.51905, + "82": 7.03744, + "83": 6.73509, + "84": 6.68434, + "85": 6.88553, + "86": 6.73052, + "87": 6.83497, + "88": 6.80328, + "89": 6.5957, + "90": 6.7944, + "91": 6.37762, + "92": 6.38758, + "93": 6.4994, + "94": 6.72621, + "95": 6.86145, + "96": 7.04502, + "97": 6.85741, + "98": 6.68516, + "99": 6.77496, + "100": 6.75409 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": "nan", + "2": 33.60398, + "3": 1.15557, + "4": 4.71858, + "5": 1.15955, + "6": 7.82597, + "7": 0.96936, + "8": 4.33743, + "9": 0.90016, + "10": 0.90072, + "11": 0.89275, + "12": 0.91431, + "13": 0.88638, + "14": 0.89245, + "15": 6.27044, + "16": 0.88754, + "17": 4.09088, + "18": 4.20666, + "19": 0.90246, + "20": 4.17963, + "21": 0.90344, + "22": 0.99545, + "23": 0.88976, + "24": 0.88146, + "25": 0.8868, + "26": 0.92177, + "27": 0.87902, + "28": 0.87033, + "29": 0.88946, + "30": 0.93099, + "31": 0.87277, + "32": 0.86588, + "33": 0.86378, + "34": 0.87212, + "35": 0.86452, + "36": 0.87398, + "37": 0.86625, + "38": 0.86523, + "39": 0.86842, + "40": 0.86705, + "41": 0.86881, + "42": 0.87315, + "43": 0.86897, + "44": 0.88076, + "45": 0.86036, + "46": 0.85693, + "47": 0.85712, + "48": 0.85769, + "49": 0.85933, + "50": 0.85794, + "51": 0.86073, + "52": 0.86415, + "53": 0.86542, + "54": 0.86158, + "55": 0.85237, + "56": 0.85583, + "57": 0.85582, + "58": 0.85935, + "59": 0.86041, + "60": 0.8536, + "61": 0.8498, + "62": 0.85278, + "63": 0.85055, + "64": 0.84784, + "65": 0.85647, + "66": 0.85027, + "67": 0.85019, + "68": 0.84861, + "69": 0.84997, + "70": 0.84624, + "71": 0.8458, + "72": 0.84757, + "73": 0.84421, + "74": 0.84409, + "75": 0.84376, + "76": 0.84174, + "77": 0.8436, + "78": 0.83875, + "79": 0.84065, + "80": 0.84095, + "81": 0.84102, + "82": 0.84129, + "83": 0.83751, + "84": 0.83908, + "85": 0.84061, + "86": 0.85312, + "87": 0.8354, + "88": 0.84082, + "89": 0.83683, + "90": 0.83975, + "91": 0.89816, + "92": 0.83989, + "93": 0.83765, + "94": 0.83708, + "95": 0.83886, + "96": 0.83415, + "97": 0.83783, + "98": 0.835, + "99": 0.83557, + "100": 0.83903 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_a2aOverlap/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_a2aOverlap/golden_values_dev_dgx_h100.json new file mode 100644 index 00000000000..bc881bd9fb0 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_a2aOverlap/golden_values_dev_dgx_h100.json @@ -0,0 +1,858 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.07155, + "2": 11.0538, + "3": 9.41359, + "4": 9.29933, + "5": 9.25373, + "6": 9.32801, + "7": 9.18878, + "8": 8.82264, + "9": 8.6687, + "10": 8.87185, + "11": 8.42081, + "12": 8.40786, + "13": 8.31239, + "14": 7.8467, + "15": 7.94982, + "16": 7.95841, + "17": 7.93815, + "18": 7.64124, + "19": 8.00725, + "20": 7.75468, + "21": 7.431, + "22": 7.41103, + "23": 7.29534, + "24": 7.26211, + "25": 7.54429, + "26": 6.96248, + "27": 7.46437, + "28": 7.21471, + "29": 7.36571, + "30": 7.47084, + "31": 7.25541, + "32": 7.4408, + "33": 7.48197, + "34": 7.51051, + "35": 7.07108, + "36": 6.92477, + "37": 7.27187, + "38": 7.04636, + "39": 7.38826, + "40": 7.4151, + "41": 7.33767, + "42": 7.09182, + "43": 7.08153, + "44": 7.25573, + "45": 6.98279, + "46": 6.7733, + "47": 7.10516, + "48": 6.91707, + "49": 7.41064, + "50": 6.83055, + "51": 6.89878, + "52": 7.22238, + "53": 7.1994, + "54": 7.0764, + "55": 6.79205, + "56": 7.17089, + "57": 6.88796, + "58": 7.13266, + "59": 7.04285, + "60": 6.46095, + "61": 6.69145, + "62": 7.11075, + "63": 7.16571, + "64": 6.57966, + "65": 7.11448, + "66": 7.2995, + "67": 7.23233, + "68": 6.81897, + "69": 6.79505, + "70": 6.72937, + "71": 6.71594, + "72": 6.85402, + "73": 6.89826, + "74": 6.8662, + "75": 6.81135, + "76": 6.27045, + "77": 7.16606, + "78": 6.72821, + "79": 6.61863, + "80": 6.78822, + "81": 6.57628, + "82": 7.10216, + "83": 6.76698, + "84": 6.72461, + "85": 6.93101, + "86": 6.78695, + "87": 6.89045, + "88": 6.83805, + "89": 6.61319, + "90": 6.80872, + "91": 6.43815, + "92": 6.42821, + "93": 6.5412, + "94": 6.76304, + "95": 6.91174, + "96": 7.09613, + "97": 6.91391, + "98": 6.72196, + "99": 6.80635, + "100": 6.77592 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 38802352.0, + "2": 38543272.0, + "3": 38739640.0, + "4": 261081696.0, + "5": 337838368.0, + "6": 428731968.0, + "7": 639386880.0, + "8": 759476224.0, + "9": 661533888.0, + "10": 568064512.0, + "11": 602437504.0, + "12": 617705728.0, + "13": 686334400.0, + "14": 588796672.0, + "15": 642460224.0, + "16": 727852800.0, + "17": 661466752.0, + "18": 623943168.0, + "19": 598841408.0, + "20": 614885568.0, + "21": 564443456.0, + "22": 614253312.0, + "23": 573911424.0, + "24": 568464000.0, + "25": 528610080.0, + "26": 504440960.0, + "27": 486060096.0, + "28": 434807360.0, + "29": 506708352.0, + "30": 395021152.0, + "31": 501240160.0, + "32": 432017920.0, + "33": 444718752.0, + "34": 413344800.0, + "35": 344452416.0, + "36": 378999712.0, + "37": 481309984.0, + "38": 388162720.0, + "39": 321501824.0, + "40": 399666912.0, + "41": 459905728.0, + "42": 453838848.0, + "43": 447805312.0, + "44": 453574944.0, + "45": 420421952.0, + "46": 359649952.0, + "47": 368608960.0, + "48": 381441088.0, + "49": 355652672.0, + "50": 321915392.0, + "51": 303729504.0, + "52": 296816224.0, + "53": 276580032.0, + "54": 234652448.0, + "55": 231855920.0, + "56": 234858464.0, + "57": 255419360.0, + "58": 240173792.0, + "59": 206393568.0, + "60": 186386304.0, + "61": 171797792.0, + "62": 161849472.0, + "63": 155122736.0, + "64": 143128336.0, + "65": 142604368.0, + "66": 129885072.0, + "67": 116081640.0, + "68": 113870160.0, + "69": 104227816.0, + "70": 91386144.0, + "71": 92892736.0, + "72": 88221664.0, + "73": 91738088.0, + "74": 89383352.0, + "75": 95494344.0, + "76": 96853184.0, + "77": 81791432.0, + "78": 76553912.0, + "79": 70137440.0, + "80": 67626544.0, + "81": 62921908.0, + "82": 66948868.0, + "83": 57338936.0, + "84": 57300892.0, + "85": 51090064.0, + "86": 47809052.0, + "87": 51604860.0, + "88": 51962264.0, + "89": 48519612.0, + "90": 50696728.0, + "91": 50669904.0, + "92": 52695168.0, + "93": 46315552.0, + "94": 41700796.0, + "95": 51417432.0, + "96": 43713208.0, + "97": 41888148.0, + "98": 45108548.0, + "99": 41926928.0, + "100": 45420824.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 6867703296.0, + "2": 6868497920.0, + "3": 6869290496.0, + "4": 6867110912.0, + "5": 6867903488.0, + "6": 6867507200.0, + "7": 6868299776.0, + "8": 6869092352.0, + "9": 6869884928.0, + "10": 6870677504.0, + "11": 6871470080.0, + "12": 6872262656.0, + "13": 6873055232.0, + "14": 6873847808.0, + "15": 6874640384.0, + "16": 6875432960.0, + "17": 6876225536.0, + "18": 6877018112.0, + "19": 6877810688.0, + "20": 6878603264.0, + "21": 6879395840.0, + "22": 6880188416.0, + "23": 6880980992.0, + "24": 6881773568.0, + "25": 6882566144.0, + "26": 6883358720.0, + "27": 6884151296.0, + "28": 6884943872.0, + "29": 6885736448.0, + "30": 6886529024.0, + "31": 6887321600.0, + "32": 6888114176.0, + "33": 6888906752.0, + "34": 6889699328.0, + "35": 6890491904.0, + "36": 6891284480.0, + "37": 6892077056.0, + "38": 6892869632.0, + "39": 6893662208.0, + "40": 6894454784.0, + "41": 6895247360.0, + "42": 6896039936.0, + "43": 6896832512.0, + "44": 6897625088.0, + "45": 6898417664.0, + "46": 6899210240.0, + "47": 6900002816.0, + "48": 6900795392.0, + "49": 6901587968.0, + "50": 6902380544.0, + "51": 6903173120.0, + "52": 6903965696.0, + "53": 6904758272.0, + "54": 6905550848.0, + "55": 6906343424.0, + "56": 6907136000.0, + "57": 6907928576.0, + "58": 6908721152.0, + "59": 6909513728.0, + "60": 6910306304.0, + "61": 6911098880.0, + "62": 6911891456.0, + "63": 6912684032.0, + "64": 6913476608.0, + "65": 6914269184.0, + "66": 6915061760.0, + "67": 6915854336.0, + "68": 6916646912.0, + "69": 6917439488.0, + "70": 6918232064.0, + "71": 6919024640.0, + "72": 6919817216.0, + "73": 6920609792.0, + "74": 6921402368.0, + "75": 6922194944.0, + "76": 6922987520.0, + "77": 6923780096.0, + "78": 6924572672.0, + "79": 6925365248.0, + "80": 6926157824.0, + "81": 6926950400.0, + "82": 6927742976.0, + "83": 6928535552.0, + "84": 6929328128.0, + "85": 6930120704.0, + "86": 6930913280.0, + "87": 6931705856.0, + "88": 6932498432.0, + "89": 6933291008.0, + "90": 6934083584.0, + "91": 6934876160.0, + "92": 6935668736.0, + "93": 6936461312.0, + "94": 6937253888.0, + "95": 6938046464.0, + "96": 6938839040.0, + "97": 6939631616.0, + "98": 6940424192.0, + "99": 6941216768.0, + "100": 6942009344.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 20506583040.0, + "2": 23119278080.0, + "3": 24628068352.0, + "4": 24628068352.0, + "5": 24628068352.0, + "6": 24628068352.0, + "7": 24628068352.0, + "8": 24628068352.0, + "9": 24628068352.0, + "10": 24628068352.0, + "11": 24628068352.0, + "12": 24628068352.0, + "13": 24628068352.0, + "14": 24628068352.0, + "15": 24628068352.0, + "16": 24628068352.0, + "17": 24628068352.0, + "18": 24628068352.0, + "19": 24628068352.0, + "20": 24628068352.0, + "21": 24628068352.0, + "22": 24628068352.0, + "23": 24628068352.0, + "24": 24628068352.0, + "25": 24628068352.0, + "26": 24628068352.0, + "27": 24628068352.0, + "28": 24628068352.0, + "29": 24628068352.0, + "30": 24629035008.0, + "31": 24629035008.0, + "32": 24629035008.0, + "33": 24629035008.0, + "34": 24629035008.0, + "35": 24629035008.0, + "36": 24629035008.0, + "37": 24629035008.0, + "38": 24629035008.0, + "39": 24629035008.0, + "40": 24629035008.0, + "41": 24629035008.0, + "42": 24629035008.0, + "43": 24629035008.0, + "44": 24629035008.0, + "45": 24629035008.0, + "46": 24629035008.0, + "47": 24629035008.0, + "48": 24629035008.0, + "49": 24629035008.0, + "50": 24629035008.0, + "51": 24629035008.0, + "52": 24629035008.0, + "53": 24629035008.0, + "54": 24629035008.0, + "55": 24629035008.0, + "56": 24629035008.0, + "57": 24629035008.0, + "58": 24629035008.0, + "59": 24629035008.0, + "60": 24629035008.0, + "61": 24629035008.0, + "62": 24629035008.0, + "63": 24629035008.0, + "64": 24629035008.0, + "65": 24629035008.0, + "66": 24629035008.0, + "67": 24629035008.0, + "68": 24629035008.0, + "69": 24629035008.0, + "70": 24629035008.0, + "71": 24629035008.0, + "72": 24629035008.0, + "73": 24629035008.0, + "74": 24629035008.0, + "75": 24629035008.0, + "76": 24629035008.0, + "77": 24629035008.0, + "78": 24629035008.0, + "79": 24629035008.0, + "80": 24629035008.0, + "81": 24629035008.0, + "82": 24629035008.0, + "83": 24629035008.0, + "84": 24629035008.0, + "85": 24629035008.0, + "86": 24629035008.0, + "87": 24629035008.0, + "88": 24629035008.0, + "89": 24629035008.0, + "90": 24629035008.0, + "91": 24629035008.0, + "92": 24629035008.0, + "93": 24629035008.0, + "94": 24629035008.0, + "95": 24629035008.0, + "96": 24629035008.0, + "97": 24629035008.0, + "98": 24629035008.0, + "99": 24629035008.0, + "100": 24629035008.0 + } + }, + "seq_load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 1.32179, + "2": 1.32081, + "3": 1.40551, + "4": 1.44539, + "5": 1.42648, + "6": 1.42849, + "7": 1.39922, + "8": 1.38456, + "9": 1.3708, + "10": 1.34355, + "11": 1.33687, + "12": 1.31783, + "13": 1.29105, + "14": 1.27268, + "15": 1.26032, + "16": 1.23228, + "17": 1.22112, + "18": 1.21077, + "19": 1.20929, + "20": 1.19815, + "21": 1.1896, + "22": 1.18499, + "23": 1.16813, + "24": 1.17492, + "25": 1.17331, + "26": 1.17242, + "27": 1.17055, + "28": 1.16628, + "29": 1.16389, + "30": 1.17014, + "31": 1.17492, + "32": 1.17728, + "33": 1.16903, + "34": 1.16965, + "35": 1.16339, + "36": 1.17797, + "37": 1.16778, + "38": 1.16917, + "39": 1.17158, + "40": 1.17531, + "41": 1.17282, + "42": 1.16843, + "43": 1.16097, + "44": 1.16487, + "45": 1.1714, + "46": 1.16061, + "47": 1.16338, + "48": 1.1724, + "49": 1.15836, + "50": 1.16796, + "51": 1.16749, + "52": 1.15103, + "53": 1.15077, + "54": 1.15251, + "55": 1.15237, + "56": 1.15645, + "57": 1.17019, + "58": 1.14885, + "59": 1.14798, + "60": 1.15822, + "61": 1.15807, + "62": 1.14387, + "63": 1.15159, + "64": 1.16172, + "65": 1.13721, + "66": 1.14434, + "67": 1.13667, + "68": 1.14812, + "69": 1.12948, + "70": 1.14515, + "71": 1.15425, + "72": 1.14493, + "73": 1.14122, + "74": 1.1404, + "75": 1.14609, + "76": 1.14229, + "77": 1.13392, + "78": 1.13761, + "79": 1.13323, + "80": 1.13408, + "81": 1.14091, + "82": 1.13579, + "83": 1.13752, + "84": 1.11626, + "85": 1.13678, + "86": 1.12957, + "87": 1.14103, + "88": 1.14888, + "89": 1.14802, + "90": 1.12678, + "91": 1.13922, + "92": 1.14994, + "93": 1.15224, + "94": 1.13085, + "95": 1.1296, + "96": 1.13436, + "97": 1.14062, + "98": 1.12599, + "99": 1.12056, + "100": 1.11941 + } + }, + "load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 0.0, + "2": 0.0, + "3": 0.0, + "4": 0.0, + "5": 0.0, + "6": 0.0, + "7": 0.0, + "8": 0.0, + "9": 0.0, + "10": 0.0, + "11": 0.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 0.0, + "17": 0.0, + "18": 0.0, + "19": 0.0, + "20": 0.0, + "21": 0.0, + "22": 0.0, + "23": 0.0, + "24": 0.0, + "25": 0.0, + "26": 0.0, + "27": 0.0, + "28": 0.0, + "29": 0.0, + "30": 0.0, + "31": 0.0, + "32": 0.0, + "33": 0.0, + "34": 0.0, + "35": 0.0, + "36": 0.0, + "37": 0.0, + "38": 0.0, + "39": 0.0, + "40": 0.0, + "41": 0.0, + "42": 0.0, + "43": 0.0, + "44": 0.0, + "45": 0.0, + "46": 0.0, + "47": 0.0, + "48": 0.0, + "49": 0.0, + "50": 0.0, + "51": 0.0, + "52": 0.0, + "53": 0.0, + "54": 0.0, + "55": 0.0, + "56": 0.0, + "57": 0.0, + "58": 0.0, + "59": 0.0, + "60": 0.0, + "61": 0.0, + "62": 0.0, + "63": 0.0, + "64": 0.0, + "65": 0.0, + "66": 0.0, + "67": 0.0, + "68": 0.0, + "69": 0.0, + "70": 0.0, + "71": 0.0, + "72": 0.0, + "73": 0.0, + "74": 0.0, + "75": 0.0, + "76": 0.0, + "77": 0.0, + "78": 0.0, + "79": 0.0, + "80": 0.0, + "81": 0.0, + "82": 0.0, + "83": 0.0, + "84": 0.0, + "85": 0.0, + "86": 0.0, + "87": 0.0, + "88": 0.0, + "89": 0.0, + "90": 0.0, + "91": 0.0, + "92": 0.0, + "93": 0.0, + "94": 0.0, + "95": 0.0, + "96": 0.0, + "97": 0.0, + "98": 0.0, + "99": 0.0, + "100": 0.0 + } + }, + "mtp_1 loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.10047, + "2": 11.12819, + "3": 10.5518, + "4": 10.04942, + "5": 9.78676, + "6": 9.4941, + "7": 9.57443, + "8": 8.86812, + "9": 8.68304, + "10": 8.98752, + "11": 8.3552, + "12": 8.38386, + "13": 8.28343, + "14": 7.73652, + "15": 7.87905, + "16": 7.92346, + "17": 7.86198, + "18": 7.59586, + "19": 7.97315, + "20": 7.69851, + "21": 7.37519, + "22": 7.36175, + "23": 7.22415, + "24": 7.23165, + "25": 7.48972, + "26": 6.89959, + "27": 7.4252, + "28": 7.16198, + "29": 7.32397, + "30": 7.44075, + "31": 7.20882, + "32": 7.40175, + "33": 7.45137, + "34": 7.49339, + "35": 7.02552, + "36": 6.88727, + "37": 7.23723, + "38": 7.00989, + "39": 7.35885, + "40": 7.35606, + "41": 7.29323, + "42": 7.03772, + "43": 7.01945, + "44": 7.19244, + "45": 6.92202, + "46": 6.68221, + "47": 7.03959, + "48": 6.85413, + "49": 7.30656, + "50": 6.76953, + "51": 6.82921, + "52": 7.13343, + "53": 7.10224, + "54": 6.99295, + "55": 6.72445, + "56": 7.10394, + "57": 6.82998, + "58": 7.05219, + "59": 6.95855, + "60": 6.41134, + "61": 6.63567, + "62": 7.0291, + "63": 7.09188, + "64": 6.51097, + "65": 7.03916, + "66": 7.23212, + "67": 7.16975, + "68": 6.77064, + "69": 6.74069, + "70": 6.68043, + "71": 6.6645, + "72": 6.79401, + "73": 6.85569, + "74": 6.81047, + "75": 6.78172, + "76": 6.1992, + "77": 7.11886, + "78": 6.67614, + "79": 6.56636, + "80": 6.72748, + "81": 6.52055, + "82": 7.04093, + "83": 6.74436, + "84": 6.6866, + "85": 6.89163, + "86": 6.73527, + "87": 6.83959, + "88": 6.81193, + "89": 6.60108, + "90": 6.79421, + "91": 6.38195, + "92": 6.3968, + "93": 6.50738, + "94": 6.72819, + "95": 6.86584, + "96": 7.04479, + "97": 6.86253, + "98": 6.68949, + "99": 6.77784, + "100": 6.75619 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": "nan", + "2": 38.42105, + "3": 1.2097, + "4": 1.39198, + "5": 1.15605, + "6": 1.1769, + "7": 0.86425, + "8": 0.85203, + "9": 0.93594, + "10": 0.88413, + "11": 0.8372, + "12": 0.84766, + "13": 0.84299, + "14": 0.84049, + "15": 0.82455, + "16": 0.83037, + "17": 0.83192, + "18": 0.84238, + "19": 0.85649, + "20": 0.85285, + "21": 0.87861, + "22": 0.86088, + "23": 0.85654, + "24": 0.84394, + "25": 0.84181, + "26": 0.84387, + "27": 0.85386, + "28": 0.89582, + "29": 0.87939, + "30": 0.86559, + "31": 0.85913, + "32": 0.84258, + "33": 0.85787, + "34": 0.84884, + "35": 0.86339, + "36": 0.8493, + "37": 0.84207, + "38": 0.84327, + "39": 0.84875, + "40": 0.84003, + "41": 0.85645, + "42": 0.83845, + "43": 0.84192, + "44": 0.84575, + "45": 0.8476, + "46": 0.85172, + "47": 0.85438, + "48": 0.84673, + "49": 0.84226, + "50": 0.84195, + "51": 0.8633, + "52": 0.8674, + "53": 0.85307, + "54": 0.8524, + "55": 0.87941, + "56": 0.85683, + "57": 0.87012, + "58": 0.87261, + "59": 0.85585, + "60": 0.85598, + "61": 0.89059, + "62": 0.87432, + "63": 0.84859, + "64": 0.84398, + "65": 0.84711, + "66": 0.84338, + "67": 0.84547, + "68": 0.84751, + "69": 0.8681, + "70": 0.84199, + "71": 0.84539, + "72": 0.87213, + "73": 0.84837, + "74": 0.84583, + "75": 0.84704, + "76": 0.84642, + "77": 0.84514, + "78": 0.85664, + "79": 0.84491, + "80": 0.84531, + "81": 0.85994, + "82": 0.85705, + "83": 0.84745, + "84": 0.85586, + "85": 0.87108, + "86": 0.84343, + "87": 0.84503, + "88": 0.84521, + "89": 0.85176, + "90": 0.85731, + "91": 0.88665, + "92": 0.90945, + "93": 0.88291, + "94": 0.85554, + "95": 0.85361, + "96": 0.84349, + "97": 0.85189, + "98": 0.86764, + "99": 0.85466, + "100": 0.85172 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_cp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_cp2/golden_values_dev_dgx_h100.json new file mode 100644 index 00000000000..a4431ab9e07 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_cp2/golden_values_dev_dgx_h100.json @@ -0,0 +1,858 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.06444, + "2": 11.04748, + "3": 9.50327, + "4": 9.35827, + "5": 10.03508, + "6": 9.48026, + "7": 9.29094, + "8": 8.84755, + "9": 8.71434, + "10": 9.01418, + "11": 8.43735, + "12": 8.43675, + "13": 8.33782, + "14": 7.81957, + "15": 7.97167, + "16": 7.95882, + "17": 7.90528, + "18": 7.64501, + "19": 7.99492, + "20": 7.72095, + "21": 7.41375, + "22": 7.39414, + "23": 7.27752, + "24": 7.26766, + "25": 7.55299, + "26": 6.94704, + "27": 7.46994, + "28": 7.22085, + "29": 7.37979, + "30": 7.48079, + "31": 7.26859, + "32": 7.45925, + "33": 7.49726, + "34": 7.53543, + "35": 7.09649, + "36": 6.94478, + "37": 7.30159, + "38": 7.07064, + "39": 7.41075, + "40": 7.44514, + "41": 7.35925, + "42": 7.12184, + "43": 7.10672, + "44": 7.27781, + "45": 7.03482, + "46": 6.78484, + "47": 7.13762, + "48": 6.94554, + "49": 7.44901, + "50": 6.85634, + "51": 6.93251, + "52": 7.26968, + "53": 7.22621, + "54": 7.10742, + "55": 6.8184, + "56": 7.21382, + "57": 6.90283, + "58": 7.17081, + "59": 7.07884, + "60": 6.50343, + "61": 6.73345, + "62": 7.1736, + "63": 7.22346, + "64": 6.63216, + "65": 7.15981, + "66": 7.36664, + "67": 7.29996, + "68": 6.87947, + "69": 6.85252, + "70": 6.7846, + "71": 6.77437, + "72": 6.89983, + "73": 6.95811, + "74": 6.94219, + "75": 6.87149, + "76": 6.34019, + "77": 7.23702, + "78": 6.78148, + "79": 6.67436, + "80": 6.84847, + "81": 6.64223, + "82": 7.15809, + "83": 6.81891, + "84": 6.78061, + "85": 6.98722, + "86": 6.84695, + "87": 6.94899, + "88": 6.89246, + "89": 6.66546, + "90": 6.86766, + "91": 6.49347, + "92": 6.48243, + "93": 6.59514, + "94": 6.82497, + "95": 6.9824, + "96": 7.16715, + "97": 6.96252, + "98": 6.78688, + "99": 6.87235, + "100": 6.84103 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 38802192.0, + "2": 38543216.0, + "3": 48176388.0, + "4": 176141424.0, + "5": 347274208.0, + "6": 482211584.0, + "7": 768343552.0, + "8": 888412800.0, + "9": 812539712.0, + "10": 829146048.0, + "11": 778615296.0, + "12": 737233920.0, + "13": 749244032.0, + "14": 566789184.0, + "15": 588982016.0, + "16": 680608384.0, + "17": 598550656.0, + "18": 649103680.0, + "19": 708972992.0, + "20": 643215552.0, + "21": 658854400.0, + "22": 859619904.0, + "23": 828700032.0, + "24": 741450880.0, + "25": 777109120.0, + "26": 749803904.0, + "27": 599291328.0, + "28": 620431936.0, + "29": 720584576.0, + "30": 703290304.0, + "31": 664802816.0, + "32": 513839904.0, + "33": 658616832.0, + "34": 652415168.0, + "35": 614973312.0, + "36": 574025472.0, + "37": 541078016.0, + "38": 523425568.0, + "39": 375000704.0, + "40": 541226752.0, + "41": 563718912.0, + "42": 381528896.0, + "43": 498131136.0, + "44": 459870336.0, + "45": 439300448.0, + "46": 416274208.0, + "47": 396919008.0, + "48": 400308512.0, + "49": 405988096.0, + "50": 337635968.0, + "51": 350914720.0, + "52": 359724704.0, + "53": 355225856.0, + "54": 344760160.0, + "55": 313641728.0, + "56": 322937056.0, + "57": 315195008.0, + "58": 299941408.0, + "59": 297619616.0, + "60": 309069280.0, + "61": 285048096.0, + "62": 262510560.0, + "63": 236911840.0, + "64": 171465728.0, + "65": 208667936.0, + "66": 208530480.0, + "67": 213599456.0, + "68": 183078192.0, + "69": 167142848.0, + "70": 160598576.0, + "71": 162098848.0, + "72": 173158352.0, + "73": 173528896.0, + "74": 177463696.0, + "75": 167847344.0, + "76": 162915456.0, + "77": 151003296.0, + "78": 133179304.0, + "79": 111029976.0, + "80": 114814624.0, + "81": 122691736.0, + "82": 98406048.0, + "83": 91941936.0, + "84": 88763984.0, + "85": 79399656.0, + "86": 76122864.0, + "87": 79916512.0, + "88": 80275856.0, + "89": 76831232.0, + "90": 75862664.0, + "91": 75836936.0, + "92": 62137636.0, + "93": 84064536.0, + "94": 66867712.0, + "95": 70291792.0, + "96": 65735652.0, + "97": 70199952.0, + "98": 73420392.0, + "99": 70238752.0, + "100": 58003912.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 7831847424.0, + "2": 7831849472.0, + "3": 7831849472.0, + "4": 7831849472.0, + "5": 7831849472.0, + "6": 7831849472.0, + "7": 7831849472.0, + "8": 7831849472.0, + "9": 7831849472.0, + "10": 7831849472.0, + "11": 7831849472.0, + "12": 7831849472.0, + "13": 7831849472.0, + "14": 7831849472.0, + "15": 7831849472.0, + "16": 7831849472.0, + "17": 7831849472.0, + "18": 7831849472.0, + "19": 7831849472.0, + "20": 7831849472.0, + "21": 7831849472.0, + "22": 7831849472.0, + "23": 7831849472.0, + "24": 7831849472.0, + "25": 7831849472.0, + "26": 7831849472.0, + "27": 7831849472.0, + "28": 7831849472.0, + "29": 7831849472.0, + "30": 7831849472.0, + "31": 7831849472.0, + "32": 7831849472.0, + "33": 7831849472.0, + "34": 7831849472.0, + "35": 7831849472.0, + "36": 7831849472.0, + "37": 7831849472.0, + "38": 7831849472.0, + "39": 7831849472.0, + "40": 7831849472.0, + "41": 7831849472.0, + "42": 7831849472.0, + "43": 7831849472.0, + "44": 7831849472.0, + "45": 7831849472.0, + "46": 7831849472.0, + "47": 7831849472.0, + "48": 7831849472.0, + "49": 7831849472.0, + "50": 7831849472.0, + "51": 7831849472.0, + "52": 7831849472.0, + "53": 7831849472.0, + "54": 7831849472.0, + "55": 7831849472.0, + "56": 7831849472.0, + "57": 7831849472.0, + "58": 7831849472.0, + "59": 7831849472.0, + "60": 7831849472.0, + "61": 7831849472.0, + "62": 7831849472.0, + "63": 7831849472.0, + "64": 7831849472.0, + "65": 7831849472.0, + "66": 7831849472.0, + "67": 7831849472.0, + "68": 7831849472.0, + "69": 7831849472.0, + "70": 7831849472.0, + "71": 7831849472.0, + "72": 7831849472.0, + "73": 7831849472.0, + "74": 7831849472.0, + "75": 7831849472.0, + "76": 7831849472.0, + "77": 7831849472.0, + "78": 7831849472.0, + "79": 7831849472.0, + "80": 7831849472.0, + "81": 7831849472.0, + "82": 7831849472.0, + "83": 7831849472.0, + "84": 7831849472.0, + "85": 7831849472.0, + "86": 7831849472.0, + "87": 7831849472.0, + "88": 7831849472.0, + "89": 7831849472.0, + "90": 7831849472.0, + "91": 7831849472.0, + "92": 7831849472.0, + "93": 7831849472.0, + "94": 7831849472.0, + "95": 7831849472.0, + "96": 7831849472.0, + "97": 7831849472.0, + "98": 7831849472.0, + "99": 7831849472.0, + "100": 7831849472.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11236730880.0, + "2": 13783534592.0, + "3": 13822900224.0, + "4": 13822900224.0, + "5": 13822900224.0, + "6": 13822900224.0, + "7": 13822900224.0, + "8": 13822900224.0, + "9": 13884104704.0, + "10": 13884104704.0, + "11": 13884104704.0, + "12": 13884104704.0, + "13": 13884104704.0, + "14": 13884104704.0, + "15": 13884104704.0, + "16": 13884104704.0, + "17": 13884104704.0, + "18": 13884104704.0, + "19": 13884104704.0, + "20": 13884104704.0, + "21": 13884104704.0, + "22": 13884104704.0, + "23": 13884104704.0, + "24": 13884104704.0, + "25": 13884104704.0, + "26": 13884104704.0, + "27": 13884104704.0, + "28": 13884104704.0, + "29": 13884104704.0, + "30": 13884104704.0, + "31": 13884104704.0, + "32": 13884104704.0, + "33": 13884104704.0, + "34": 13884104704.0, + "35": 13884104704.0, + "36": 13884104704.0, + "37": 13884104704.0, + "38": 14027618304.0, + "39": 14027618304.0, + "40": 14027618304.0, + "41": 14027618304.0, + "42": 14027618304.0, + "43": 14027618304.0, + "44": 14027618304.0, + "45": 14027618304.0, + "46": 14027618304.0, + "47": 14027618304.0, + "48": 14027618304.0, + "49": 14027618304.0, + "50": 14027618304.0, + "51": 14027618304.0, + "52": 14027618304.0, + "53": 14027618304.0, + "54": 14027618304.0, + "55": 14027618304.0, + "56": 14027618304.0, + "57": 14027618304.0, + "58": 14027618304.0, + "59": 14027618304.0, + "60": 14027618304.0, + "61": 14027618304.0, + "62": 14027618304.0, + "63": 14027618304.0, + "64": 14027618304.0, + "65": 14027618304.0, + "66": 14027618304.0, + "67": 14027618304.0, + "68": 14027618304.0, + "69": 14027618304.0, + "70": 14027618304.0, + "71": 14027618304.0, + "72": 14027618304.0, + "73": 14027618304.0, + "74": 14027618304.0, + "75": 14027618304.0, + "76": 14027618304.0, + "77": 14027618304.0, + "78": 14027618304.0, + "79": 14027618304.0, + "80": 14027618304.0, + "81": 14027618304.0, + "82": 14027618304.0, + "83": 14027618304.0, + "84": 14027618304.0, + "85": 14027618304.0, + "86": 14027618304.0, + "87": 14027618304.0, + "88": 14027618304.0, + "89": 14027618304.0, + "90": 14027618304.0, + "91": 14027618304.0, + "92": 14027618304.0, + "93": 14027618304.0, + "94": 14027618304.0, + "95": 14027618304.0, + "96": 14027618304.0, + "97": 14027618304.0, + "98": 14027618304.0, + "99": 14027618304.0, + "100": 14027618304.0 + } + }, + "seq_load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 1.32104, + "2": 1.32121, + "3": 1.42242, + "4": 1.43957, + "5": 1.43967, + "6": 1.43161, + "7": 1.41024, + "8": 1.39249, + "9": 1.37274, + "10": 1.34963, + "11": 1.33035, + "12": 1.31756, + "13": 1.29814, + "14": 1.27402, + "15": 1.26417, + "16": 1.23491, + "17": 1.22589, + "18": 1.21888, + "19": 1.21235, + "20": 1.19435, + "21": 1.18632, + "22": 1.18245, + "23": 1.17047, + "24": 1.17185, + "25": 1.16716, + "26": 1.17127, + "27": 1.17283, + "28": 1.16936, + "29": 1.16612, + "30": 1.16524, + "31": 1.16755, + "32": 1.17005, + "33": 1.16641, + "34": 1.16589, + "35": 1.16136, + "36": 1.16756, + "37": 1.16207, + "38": 1.15799, + "39": 1.16014, + "40": 1.16705, + "41": 1.16702, + "42": 1.16323, + "43": 1.15585, + "44": 1.15897, + "45": 1.16636, + "46": 1.16612, + "47": 1.17098, + "48": 1.17061, + "49": 1.1562, + "50": 1.16522, + "51": 1.16928, + "52": 1.15748, + "53": 1.16372, + "54": 1.16104, + "55": 1.16152, + "56": 1.16477, + "57": 1.17364, + "58": 1.16088, + "59": 1.16199, + "60": 1.16658, + "61": 1.16973, + "62": 1.15364, + "63": 1.16022, + "64": 1.16963, + "65": 1.15619, + "66": 1.15811, + "67": 1.15684, + "68": 1.1678, + "69": 1.15364, + "70": 1.16311, + "71": 1.17036, + "72": 1.16231, + "73": 1.15443, + "74": 1.15449, + "75": 1.15769, + "76": 1.14933, + "77": 1.1508, + "78": 1.15202, + "79": 1.14552, + "80": 1.14683, + "81": 1.15046, + "82": 1.14779, + "83": 1.14967, + "84": 1.12982, + "85": 1.14816, + "86": 1.14664, + "87": 1.15451, + "88": 1.15984, + "89": 1.16253, + "90": 1.1487, + "91": 1.15424, + "92": 1.16208, + "93": 1.16224, + "94": 1.14904, + "95": 1.14758, + "96": 1.152, + "97": 1.15888, + "98": 1.1407, + "99": 1.13511, + "100": 1.14214 + } + }, + "load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 0.0, + "2": 0.0, + "3": 0.0, + "4": 0.0, + "5": 0.0, + "6": 0.0, + "7": 0.0, + "8": 0.0, + "9": 0.0, + "10": 0.0, + "11": 0.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 0.0, + "17": 0.0, + "18": 0.0, + "19": 0.0, + "20": 0.0, + "21": 0.0, + "22": 0.0, + "23": 0.0, + "24": 0.0, + "25": 0.0, + "26": 0.0, + "27": 0.0, + "28": 0.0, + "29": 0.0, + "30": 0.0, + "31": 0.0, + "32": 0.0, + "33": 0.0, + "34": 0.0, + "35": 0.0, + "36": 0.0, + "37": 0.0, + "38": 0.0, + "39": 0.0, + "40": 0.0, + "41": 0.0, + "42": 0.0, + "43": 0.0, + "44": 0.0, + "45": 0.0, + "46": 0.0, + "47": 0.0, + "48": 0.0, + "49": 0.0, + "50": 0.0, + "51": 0.0, + "52": 0.0, + "53": 0.0, + "54": 0.0, + "55": 0.0, + "56": 0.0, + "57": 0.0, + "58": 0.0, + "59": 0.0, + "60": 0.0, + "61": 0.0, + "62": 0.0, + "63": 0.0, + "64": 0.0, + "65": 0.0, + "66": 0.0, + "67": 0.0, + "68": 0.0, + "69": 0.0, + "70": 0.0, + "71": 0.0, + "72": 0.0, + "73": 0.0, + "74": 0.0, + "75": 0.0, + "76": 0.0, + "77": 0.0, + "78": 0.0, + "79": 0.0, + "80": 0.0, + "81": 0.0, + "82": 0.0, + "83": 0.0, + "84": 0.0, + "85": 0.0, + "86": 0.0, + "87": 0.0, + "88": 0.0, + "89": 0.0, + "90": 0.0, + "91": 0.0, + "92": 0.0, + "93": 0.0, + "94": 0.0, + "95": 0.0, + "96": 0.0, + "97": 0.0, + "98": 0.0, + "99": 0.0, + "100": 0.0 + } + }, + "mtp_1 loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.10863, + "2": 11.11251, + "3": 10.55191, + "4": 10.09914, + "5": 9.93284, + "6": 9.47495, + "7": 9.54019, + "8": 8.85345, + "9": 8.66977, + "10": 8.98554, + "11": 8.3892, + "12": 8.38088, + "13": 8.26726, + "14": 7.72782, + "15": 7.87121, + "16": 7.90608, + "17": 7.84585, + "18": 7.57782, + "19": 7.95386, + "20": 7.67587, + "21": 7.35385, + "22": 7.33637, + "23": 7.20922, + "24": 7.20465, + "25": 7.47108, + "26": 6.89211, + "27": 7.40122, + "28": 7.14578, + "29": 7.32399, + "30": 7.42054, + "31": 7.1922, + "32": 7.38472, + "33": 7.43105, + "34": 7.47857, + "35": 7.0134, + "36": 6.87499, + "37": 7.2265, + "38": 6.99939, + "39": 7.34946, + "40": 7.34391, + "41": 7.28072, + "42": 7.03026, + "43": 7.01651, + "44": 7.19071, + "45": 6.91208, + "46": 6.68054, + "47": 7.03675, + "48": 6.84915, + "49": 7.30867, + "50": 6.76538, + "51": 6.82344, + "52": 7.1341, + "53": 7.0972, + "54": 6.98219, + "55": 6.71727, + "56": 7.10326, + "57": 6.81906, + "58": 7.05467, + "59": 6.95559, + "60": 6.41449, + "61": 6.6455, + "62": 7.0295, + "63": 7.0902, + "64": 6.52364, + "65": 7.04122, + "66": 7.24348, + "67": 7.17496, + "68": 6.77199, + "69": 6.74417, + "70": 6.68651, + "71": 6.66691, + "72": 6.79191, + "73": 6.86264, + "74": 6.81646, + "75": 6.78676, + "76": 6.20125, + "77": 7.11945, + "78": 6.67746, + "79": 6.57963, + "80": 6.73068, + "81": 6.52576, + "82": 7.04844, + "83": 6.74645, + "84": 6.6912, + "85": 6.89069, + "86": 6.73653, + "87": 6.84095, + "88": 6.82057, + "89": 6.60489, + "90": 6.79871, + "91": 6.38519, + "92": 6.39783, + "93": 6.51067, + "94": 6.73395, + "95": 6.86993, + "96": 7.05172, + "97": 6.86603, + "98": 6.69854, + "99": 6.78026, + "100": 6.76517 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": "nan", + "2": 38.98734, + "3": 1.76854, + "4": 1.48834, + "5": 1.66057, + "6": 1.4205, + "7": 1.2275, + "8": 1.22046, + "9": 1.4919, + "10": 1.24678, + "11": 1.25346, + "12": 1.27396, + "13": 1.31792, + "14": 1.29903, + "15": 1.32369, + "16": 1.3149, + "17": 1.3134, + "18": 1.28719, + "19": 1.27264, + "20": 1.31451, + "21": 1.40287, + "22": 1.29398, + "23": 1.32677, + "24": 1.3796, + "25": 1.35553, + "26": 1.36158, + "27": 1.36735, + "28": 1.37368, + "29": 1.35738, + "30": 1.33997, + "31": 1.38954, + "32": 1.41689, + "33": 1.48612, + "34": 1.39437, + "35": 1.36879, + "36": 1.42048, + "37": 1.48513, + "38": 1.40287, + "39": 1.59512, + "40": 1.38538, + "41": 1.37175, + "42": 1.3872, + "43": 1.39875, + "44": 1.47799, + "45": 1.39616, + "46": 1.42609, + "47": 1.45073, + "48": 1.42827, + "49": 1.4268, + "50": 1.41264, + "51": 1.42709, + "52": 1.42867, + "53": 1.42301, + "54": 1.42644, + "55": 1.45411, + "56": 1.46668, + "57": 1.45646, + "58": 1.47491, + "59": 1.47486, + "60": 1.45195, + "61": 1.44777, + "62": 1.45855, + "63": 1.4542, + "64": 1.46461, + "65": 1.47739, + "66": 1.46246, + "67": 1.47906, + "68": 1.49507, + "69": 1.48003, + "70": 1.48006, + "71": 1.47612, + "72": 1.47079, + "73": 1.47685, + "74": 1.45699, + "75": 1.45336, + "76": 1.47926, + "77": 1.48211, + "78": 1.48119, + "79": 1.47028, + "80": 1.46639, + "81": 1.47988, + "82": 1.49265, + "83": 1.47633, + "84": 1.45907, + "85": 1.4781, + "86": 1.85699, + "87": 1.47897, + "88": 1.47591, + "89": 1.48697, + "90": 1.47932, + "91": 1.47792, + "92": 1.45621, + "93": 1.46845, + "94": 1.52387, + "95": 1.49926, + "96": 1.57469, + "97": 1.4501, + "98": 1.48369, + "99": 1.45516, + "100": 1.45191 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_cudagraph/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_cudagraph/golden_values_dev_dgx_h100.json new file mode 100644 index 00000000000..95b4ad3e247 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_cudagraph/golden_values_dev_dgx_h100.json @@ -0,0 +1,858 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.06699, + "2": 11.04815, + "3": 9.39295, + "4": 9.31548, + "5": 9.64316, + "6": 9.07725, + "7": 9.21754, + "8": 8.6925, + "9": 8.53941, + "10": 8.84935, + "11": 8.33447, + "12": 8.412, + "13": 8.31506, + "14": 7.81959, + "15": 7.89442, + "16": 7.94181, + "17": 7.92232, + "18": 7.60755, + "19": 7.9889, + "20": 7.73762, + "21": 7.40989, + "22": 7.39989, + "23": 7.28567, + "24": 7.26261, + "25": 7.53876, + "26": 6.96573, + "27": 7.47203, + "28": 7.22265, + "29": 7.38415, + "30": 7.48648, + "31": 7.27833, + "32": 7.47792, + "33": 7.50923, + "34": 7.54182, + "35": 7.07947, + "36": 6.94193, + "37": 7.29918, + "38": 7.06497, + "39": 7.41164, + "40": 7.44424, + "41": 7.34744, + "42": 7.11582, + "43": 7.11462, + "44": 7.28885, + "45": 7.01695, + "46": 6.84283, + "47": 7.16053, + "48": 6.9582, + "49": 7.46769, + "50": 6.87634, + "51": 6.94449, + "52": 7.28619, + "53": 7.24308, + "54": 7.12866, + "55": 6.83506, + "56": 7.21612, + "57": 6.91849, + "58": 7.18696, + "59": 7.08358, + "60": 6.50937, + "61": 6.74289, + "62": 7.169, + "63": 7.22006, + "64": 6.62483, + "65": 7.15494, + "66": 7.35509, + "67": 7.29543, + "68": 6.87277, + "69": 6.84424, + "70": 6.78304, + "71": 6.76153, + "72": 6.89258, + "73": 6.94677, + "74": 6.92537, + "75": 6.85478, + "76": 6.33332, + "77": 7.21717, + "78": 6.77118, + "79": 6.66463, + "80": 6.82367, + "81": 6.62661, + "82": 7.14379, + "83": 6.80828, + "84": 6.76723, + "85": 6.9771, + "86": 6.82983, + "87": 6.93236, + "88": 6.88376, + "89": 6.65765, + "90": 6.85339, + "91": 6.47843, + "92": 6.47107, + "93": 6.58426, + "94": 6.81852, + "95": 6.96012, + "96": 7.14433, + "97": 6.95755, + "98": 6.76901, + "99": 6.8549, + "100": 6.82694 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 38802416.0, + "2": 38543400.0, + "3": 38762912.0, + "4": 255140544.0, + "5": 357146976.0, + "6": 463575424.0, + "7": 671087424.0, + "8": 778747584.0, + "9": 677639936.0, + "10": 574619456.0, + "11": 586867968.0, + "12": 526706560.0, + "13": 693157504.0, + "14": 589012608.0, + "15": 670992320.0, + "16": 722172288.0, + "17": 636609152.0, + "18": 655815936.0, + "19": 671456000.0, + "20": 628066880.0, + "21": 599462720.0, + "22": 608148032.0, + "23": 602799872.0, + "24": 556245696.0, + "25": 602467200.0, + "26": 561279232.0, + "27": 514789120.0, + "28": 456943488.0, + "29": 582812928.0, + "30": 558793600.0, + "31": 574029312.0, + "32": 523361184.0, + "33": 466839808.0, + "34": 444989664.0, + "35": 426657152.0, + "36": 401263072.0, + "37": 343086624.0, + "38": 344232832.0, + "39": 296523488.0, + "40": 318084832.0, + "41": 378402720.0, + "42": 303050048.0, + "43": 350552512.0, + "44": 340507552.0, + "45": 316859008.0, + "46": 325385184.0, + "47": 280750560.0, + "48": 419244000.0, + "49": 428102880.0, + "50": 362957472.0, + "51": 379329312.0, + "52": 249739680.0, + "53": 374185408.0, + "54": 279004448.0, + "55": 276044992.0, + "56": 172040992.0, + "57": 205161664.0, + "58": 268584640.0, + "59": 278810048.0, + "60": 268319920.0, + "61": 256818976.0, + "62": 231140784.0, + "63": 214976784.0, + "64": 187242624.0, + "65": 158399104.0, + "66": 155229968.0, + "67": 138208496.0, + "68": 135902640.0, + "69": 129428976.0, + "70": 122910512.0, + "71": 118087032.0, + "72": 110270944.0, + "73": 107558152.0, + "74": 108315480.0, + "75": 108085552.0, + "76": 71712536.0, + "77": 103826072.0, + "78": 89150656.0, + "79": 92163144.0, + "80": 92803128.0, + "81": 59791856.0, + "82": 92118064.0, + "83": 91943584.0, + "84": 76186248.0, + "85": 73109280.0, + "86": 72983504.0, + "87": 73632688.0, + "88": 67705384.0, + "89": 70542280.0, + "90": 66429104.0, + "91": 66403612.0, + "92": 68429088.0, + "93": 68340760.0, + "94": 66867936.0, + "95": 67148208.0, + "96": 65734696.0, + "97": 60765744.0, + "98": 57699748.0, + "99": 60808808.0, + "100": 64296228.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 8545626624.0, + "2": 8682221568.0, + "3": 8681863168.0, + "4": 8682221568.0, + "5": 8682221568.0, + "6": 8682221568.0, + "7": 8733077504.0, + "8": 8682221568.0, + "9": 8733077504.0, + "10": 8682145792.0, + "11": 8682221568.0, + "12": 8682221568.0, + "13": 8682149888.0, + "14": 8682221568.0, + "15": 8682221568.0, + "16": 8682221568.0, + "17": 8681764864.0, + "18": 8681764864.0, + "19": 8682221568.0, + "20": 8682221568.0, + "21": 8682221568.0, + "22": 8682221568.0, + "23": 8683040768.0, + "24": 8682221568.0, + "25": 8682221568.0, + "26": 8682221568.0, + "27": 8682221568.0, + "28": 8681994240.0, + "29": 8682221568.0, + "30": 8682221568.0, + "31": 8682221568.0, + "32": 8733077504.0, + "33": 8682221568.0, + "34": 8682221568.0, + "35": 8682221568.0, + "36": 8682221568.0, + "37": 8682221568.0, + "38": 8682221568.0, + "39": 8682006528.0, + "40": 8682221568.0, + "41": 8682221568.0, + "42": 8681764864.0, + "43": 8682221568.0, + "44": 8682221568.0, + "45": 8682221568.0, + "46": 8682221568.0, + "47": 8682221568.0, + "48": 8682991616.0, + "49": 8682221568.0, + "50": 8682221568.0, + "51": 8682328064.0, + "52": 8682221568.0, + "53": 8682221568.0, + "54": 8682221568.0, + "55": 8682221568.0, + "56": 8733077504.0, + "57": 8733077504.0, + "58": 8682221568.0, + "59": 8682221568.0, + "60": 8682221568.0, + "61": 8682221568.0, + "62": 8682221568.0, + "63": 8733077504.0, + "64": 8682221568.0, + "65": 8682221568.0, + "66": 8682221568.0, + "67": 8682289152.0, + "68": 8682221568.0, + "69": 8682221568.0, + "70": 8732620800.0, + "71": 8733077504.0, + "72": 8733669376.0, + "73": 8682221568.0, + "74": 8682221568.0, + "75": 8682287104.0, + "76": 8681764864.0, + "77": 8682420224.0, + "78": 8682221568.0, + "79": 8682221568.0, + "80": 8682221568.0, + "81": 8682221568.0, + "82": 8682221568.0, + "83": 8681764864.0, + "84": 8682221568.0, + "85": 8681895936.0, + "86": 8682221568.0, + "87": 8682221568.0, + "88": 8682483712.0, + "89": 8682221568.0, + "90": 8681764864.0, + "91": 8682289152.0, + "92": 8682221568.0, + "93": 8682221568.0, + "94": 8682221568.0, + "95": 8733077504.0, + "96": 8682221568.0, + "97": 8681764864.0, + "98": 8682221568.0, + "99": 8681764864.0, + "100": 8682221568.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 19083360256.0, + "2": 21032198144.0, + "3": 21032198144.0, + "4": 21032198144.0, + "5": 21032198144.0, + "6": 21032198144.0, + "7": 21032198144.0, + "8": 21032198144.0, + "9": 21032198144.0, + "10": 21032198144.0, + "11": 21032198144.0, + "12": 21032198144.0, + "13": 21032198144.0, + "14": 21032198144.0, + "15": 21032198144.0, + "16": 21032198144.0, + "17": 21032198144.0, + "18": 21032198144.0, + "19": 21032198144.0, + "20": 21032198144.0, + "21": 21032198144.0, + "22": 21032198144.0, + "23": 21032198144.0, + "24": 21032198144.0, + "25": 21032198144.0, + "26": 21032198144.0, + "27": 21032198144.0, + "28": 21032198144.0, + "29": 21032198144.0, + "30": 21032198144.0, + "31": 21032198144.0, + "32": 21032198144.0, + "33": 21032198144.0, + "34": 21032198144.0, + "35": 21032198144.0, + "36": 21032198144.0, + "37": 21032198144.0, + "38": 21032198144.0, + "39": 21032198144.0, + "40": 21032198144.0, + "41": 21032198144.0, + "42": 21032198144.0, + "43": 21032198144.0, + "44": 21032198144.0, + "45": 21032198144.0, + "46": 21032198144.0, + "47": 21032198144.0, + "48": 21032198144.0, + "49": 21032198144.0, + "50": 21032198144.0, + "51": 21032198144.0, + "52": 21032198144.0, + "53": 21032198144.0, + "54": 21032198144.0, + "55": 21032198144.0, + "56": 21032198144.0, + "57": 21032198144.0, + "58": 21032198144.0, + "59": 21032198144.0, + "60": 21032198144.0, + "61": 21032198144.0, + "62": 21032198144.0, + "63": 21032198144.0, + "64": 21032198144.0, + "65": 21032198144.0, + "66": 21032198144.0, + "67": 21032198144.0, + "68": 21032198144.0, + "69": 21032198144.0, + "70": 21032198144.0, + "71": 21032198144.0, + "72": 21032198144.0, + "73": 21032198144.0, + "74": 21032198144.0, + "75": 21032198144.0, + "76": 21032198144.0, + "77": 21032198144.0, + "78": 21032198144.0, + "79": 21032198144.0, + "80": 21032198144.0, + "81": 21032198144.0, + "82": 21032198144.0, + "83": 21032198144.0, + "84": 21032198144.0, + "85": 21032198144.0, + "86": 21032198144.0, + "87": 21032198144.0, + "88": 21032198144.0, + "89": 21032198144.0, + "90": 21032198144.0, + "91": 21032198144.0, + "92": 21032198144.0, + "93": 21032198144.0, + "94": 21032198144.0, + "95": 21032198144.0, + "96": 21032198144.0, + "97": 21032198144.0, + "98": 21032198144.0, + "99": 21032198144.0, + "100": 21032198144.0 + } + }, + "seq_load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 1.32088, + "2": 1.32018, + "3": 1.40628, + "4": 1.44918, + "5": 1.42923, + "6": 1.4048, + "7": 1.39129, + "8": 1.3704, + "9": 1.35458, + "10": 1.32879, + "11": 1.32744, + "12": 1.31091, + "13": 1.28932, + "14": 1.26727, + "15": 1.25235, + "16": 1.22846, + "17": 1.22063, + "18": 1.21677, + "19": 1.21563, + "20": 1.20204, + "21": 1.19368, + "22": 1.18924, + "23": 1.17264, + "24": 1.18222, + "25": 1.18059, + "26": 1.1765, + "27": 1.17265, + "28": 1.16515, + "29": 1.16318, + "30": 1.16073, + "31": 1.16178, + "32": 1.16222, + "33": 1.15651, + "34": 1.15554, + "35": 1.15213, + "36": 1.16267, + "37": 1.16189, + "38": 1.15717, + "39": 1.16194, + "40": 1.17248, + "41": 1.16867, + "42": 1.16372, + "43": 1.15678, + "44": 1.16046, + "45": 1.16613, + "46": 1.16552, + "47": 1.16862, + "48": 1.16815, + "49": 1.15112, + "50": 1.16682, + "51": 1.17185, + "52": 1.15176, + "53": 1.15433, + "54": 1.15459, + "55": 1.15887, + "56": 1.16196, + "57": 1.16673, + "58": 1.15029, + "59": 1.14945, + "60": 1.15494, + "61": 1.16046, + "62": 1.14619, + "63": 1.15355, + "64": 1.1622, + "65": 1.14569, + "66": 1.14672, + "67": 1.14604, + "68": 1.16074, + "69": 1.14459, + "70": 1.15124, + "71": 1.15809, + "72": 1.14816, + "73": 1.14146, + "74": 1.14364, + "75": 1.14892, + "76": 1.14288, + "77": 1.14106, + "78": 1.14124, + "79": 1.13492, + "80": 1.13702, + "81": 1.14035, + "82": 1.14029, + "83": 1.14331, + "84": 1.12364, + "85": 1.14439, + "86": 1.1379, + "87": 1.14695, + "88": 1.15234, + "89": 1.15365, + "90": 1.13792, + "91": 1.14643, + "92": 1.15734, + "93": 1.15541, + "94": 1.14101, + "95": 1.13876, + "96": 1.14384, + "97": 1.14794, + "98": 1.13155, + "99": 1.12784, + "100": 1.12992 + } + }, + "load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 0.0, + "2": 0.0, + "3": 0.0, + "4": 0.0, + "5": 0.0, + "6": 0.0, + "7": 0.0, + "8": 0.0, + "9": 0.0, + "10": 0.0, + "11": 0.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 0.0, + "17": 0.0, + "18": 0.0, + "19": 0.0, + "20": 0.0, + "21": 0.0, + "22": 0.0, + "23": 0.0, + "24": 0.0, + "25": 0.0, + "26": 0.0, + "27": 0.0, + "28": 0.0, + "29": 0.0, + "30": 0.0, + "31": 0.0, + "32": 0.0, + "33": 0.0, + "34": 0.0, + "35": 0.0, + "36": 0.0, + "37": 0.0, + "38": 0.0, + "39": 0.0, + "40": 0.0, + "41": 0.0, + "42": 0.0, + "43": 0.0, + "44": 0.0, + "45": 0.0, + "46": 0.0, + "47": 0.0, + "48": 0.0, + "49": 0.0, + "50": 0.0, + "51": 0.0, + "52": 0.0, + "53": 0.0, + "54": 0.0, + "55": 0.0, + "56": 0.0, + "57": 0.0, + "58": 0.0, + "59": 0.0, + "60": 0.0, + "61": 0.0, + "62": 0.0, + "63": 0.0, + "64": 0.0, + "65": 0.0, + "66": 0.0, + "67": 0.0, + "68": 0.0, + "69": 0.0, + "70": 0.0, + "71": 0.0, + "72": 0.0, + "73": 0.0, + "74": 0.0, + "75": 0.0, + "76": 0.0, + "77": 0.0, + "78": 0.0, + "79": 0.0, + "80": 0.0, + "81": 0.0, + "82": 0.0, + "83": 0.0, + "84": 0.0, + "85": 0.0, + "86": 0.0, + "87": 0.0, + "88": 0.0, + "89": 0.0, + "90": 0.0, + "91": 0.0, + "92": 0.0, + "93": 0.0, + "94": 0.0, + "95": 0.0, + "96": 0.0, + "97": 0.0, + "98": 0.0, + "99": 0.0, + "100": 0.0 + } + }, + "mtp_1 loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.10064, + "2": 11.12337, + "3": 10.56166, + "4": 10.03978, + "5": 9.82072, + "6": 9.46013, + "7": 9.54379, + "8": 8.82746, + "9": 8.63789, + "10": 8.95446, + "11": 8.3104, + "12": 8.34552, + "13": 8.2459, + "14": 7.70935, + "15": 7.85553, + "16": 7.89022, + "17": 7.83267, + "18": 7.57926, + "19": 7.94453, + "20": 7.66829, + "21": 7.35856, + "22": 7.34167, + "23": 7.21968, + "24": 7.2133, + "25": 7.48121, + "26": 6.90097, + "27": 7.41165, + "28": 7.15817, + "29": 7.32624, + "30": 7.41909, + "31": 7.19948, + "32": 7.40021, + "33": 7.44339, + "34": 7.48558, + "35": 7.02438, + "36": 6.88387, + "37": 7.23971, + "38": 7.00763, + "39": 7.36471, + "40": 7.35523, + "41": 7.29759, + "42": 7.0433, + "43": 7.02324, + "44": 7.2012, + "45": 6.91846, + "46": 6.68852, + "47": 7.0543, + "48": 6.86559, + "49": 7.31889, + "50": 6.78093, + "51": 6.84678, + "52": 7.14673, + "53": 7.1232, + "54": 7.01058, + "55": 6.73442, + "56": 7.13727, + "57": 6.83071, + "58": 7.06527, + "59": 6.97079, + "60": 6.43539, + "61": 6.66244, + "62": 7.04424, + "63": 7.11328, + "64": 6.54326, + "65": 7.05901, + "66": 7.25946, + "67": 7.19476, + "68": 6.79557, + "69": 6.7653, + "70": 6.70556, + "71": 6.68568, + "72": 6.80894, + "73": 6.8742, + "74": 6.82964, + "75": 6.79718, + "76": 6.21794, + "77": 7.13971, + "78": 6.69475, + "79": 6.58525, + "80": 6.73703, + "81": 6.54073, + "82": 7.06135, + "83": 6.76439, + "84": 6.70956, + "85": 6.9186, + "86": 6.7528, + "87": 6.85336, + "88": 6.83148, + "89": 6.62294, + "90": 6.81468, + "91": 6.39937, + "92": 6.41368, + "93": 6.52874, + "94": 6.74969, + "95": 6.88373, + "96": 7.06354, + "97": 6.88418, + "98": 6.7084, + "99": 6.79483, + "100": 6.78132 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": "nan", + "2": 39.49693, + "3": 1.07214, + "4": 1.23713, + "5": 0.92816, + "6": 1.0971, + "7": 0.85189, + "8": 0.83664, + "9": 0.85665, + "10": 0.85059, + "11": 0.87022, + "12": 0.87753, + "13": 0.87405, + "14": 0.88061, + "15": 0.88011, + "16": 0.87103, + "17": 0.87379, + "18": 0.87183, + "19": 0.87329, + "20": 0.86284, + "21": 0.84631, + "22": 0.8538, + "23": 0.85683, + "24": 0.85757, + "25": 0.83585, + "26": 0.83883, + "27": 0.84401, + "28": 0.83711, + "29": 0.83416, + "30": 0.83606, + "31": 0.84399, + "32": 0.82849, + "33": 0.83807, + "34": 0.84845, + "35": 0.84624, + "36": 0.8404, + "37": 0.85739, + "38": 0.83874, + "39": 0.85012, + "40": 0.8397, + "41": 0.85255, + "42": 0.84237, + "43": 0.83984, + "44": 0.8546, + "45": 0.85277, + "46": 0.83823, + "47": 0.84451, + "48": 0.85521, + "49": 0.84282, + "50": 0.84247, + "51": 0.83449, + "52": 0.83297, + "53": 0.83747, + "54": 0.82933, + "55": 0.82411, + "56": 0.81657, + "57": 0.81176, + "58": 0.81988, + "59": 0.81443, + "60": 0.82183, + "61": 0.81702, + "62": 0.81507, + "63": 0.81537, + "64": 0.81061, + "65": 0.82206, + "66": 0.81356, + "67": 0.80556, + "68": 0.81357, + "69": 0.82491, + "70": 0.80594, + "71": 0.80312, + "72": 0.80639, + "73": 0.80325, + "74": 0.80234, + "75": 0.80549, + "76": 0.81864, + "77": 0.80903, + "78": 0.80213, + "79": 0.80919, + "80": 0.79926, + "81": 0.80141, + "82": 0.79954, + "83": 0.7996, + "84": 0.79888, + "85": 0.79959, + "86": 0.79957, + "87": 0.79319, + "88": 0.79931, + "89": 0.79593, + "90": 0.79628, + "91": 0.79484, + "92": 0.79838, + "93": 0.79138, + "94": 0.79761, + "95": 0.79526, + "96": 0.79149, + "97": 0.79998, + "98": 0.79293, + "99": 0.79233, + "100": 0.7925 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_fp8/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_fp8/golden_values_dev_dgx_h100.json new file mode 100644 index 00000000000..e7da10d5af3 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_fp8/golden_values_dev_dgx_h100.json @@ -0,0 +1,858 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.06703, + "2": 11.04819, + "3": 9.3947, + "4": 9.31566, + "5": 9.62632, + "6": 9.05242, + "7": 9.27144, + "8": 8.71145, + "9": 8.56178, + "10": 8.85983, + "11": 8.3469, + "12": 8.38009, + "13": 8.29288, + "14": 7.7759, + "15": 7.91001, + "16": 7.93085, + "17": 7.87562, + "18": 7.62863, + "19": 7.98415, + "20": 7.70552, + "21": 7.40776, + "22": 7.39316, + "23": 7.26017, + "24": 7.26068, + "25": 7.5308, + "26": 6.94421, + "27": 7.45863, + "28": 7.20142, + "29": 7.36557, + "30": 7.46758, + "31": 7.25319, + "32": 7.44555, + "33": 7.48098, + "34": 7.51513, + "35": 7.07451, + "36": 6.93995, + "37": 7.28119, + "38": 7.04852, + "39": 7.4098, + "40": 7.44764, + "41": 7.33898, + "42": 7.10295, + "43": 7.10535, + "44": 7.2781, + "45": 6.99782, + "46": 6.79301, + "47": 7.13397, + "48": 6.94549, + "49": 7.44237, + "50": 6.84536, + "51": 6.92408, + "52": 7.25234, + "53": 7.21085, + "54": 7.09795, + "55": 6.80745, + "56": 7.19283, + "57": 6.89935, + "58": 7.14773, + "59": 7.06088, + "60": 6.48539, + "61": 6.71175, + "62": 7.13726, + "63": 7.1836, + "64": 6.59876, + "65": 7.13322, + "66": 7.32065, + "67": 7.26409, + "68": 6.84914, + "69": 6.81597, + "70": 6.75464, + "71": 6.7431, + "72": 6.86826, + "73": 6.9212, + "74": 6.89609, + "75": 6.83036, + "76": 6.30499, + "77": 7.18853, + "78": 6.74688, + "79": 6.63517, + "80": 6.7982, + "81": 6.60082, + "82": 7.11848, + "83": 6.78131, + "84": 6.74202, + "85": 6.95137, + "86": 6.79783, + "87": 6.90577, + "88": 6.86088, + "89": 6.63727, + "90": 6.8271, + "91": 6.45255, + "92": 6.44966, + "93": 6.56038, + "94": 6.78605, + "95": 6.93013, + "96": 7.12253, + "97": 6.93022, + "98": 6.73964, + "99": 6.83309, + "100": 6.80256 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 38802400.0, + "2": 38543428.0, + "3": 41899204.0, + "4": 255024032.0, + "5": 369559584.0, + "6": 491781440.0, + "7": 639554048.0, + "8": 709529024.0, + "9": 636555904.0, + "10": 561947200.0, + "11": 583709952.0, + "12": 564324736.0, + "13": 667590848.0, + "14": 617267584.0, + "15": 648895488.0, + "16": 725041088.0, + "17": 658455040.0, + "18": 617841408.0, + "19": 671466368.0, + "20": 681241536.0, + "21": 542588480.0, + "22": 561296512.0, + "23": 533316096.0, + "24": 534168416.0, + "25": 554083968.0, + "26": 517141952.0, + "27": 533389984.0, + "28": 532567744.0, + "29": 610600128.0, + "30": 505291456.0, + "31": 557962368.0, + "32": 466748160.0, + "33": 448101632.0, + "34": 416660160.0, + "35": 445682624.0, + "36": 391770560.0, + "37": 387096512.0, + "38": 375659392.0, + "39": 334143808.0, + "40": 403059712.0, + "41": 415992512.0, + "42": 403642912.0, + "43": 394461952.0, + "44": 469376704.0, + "45": 461335072.0, + "46": 432078784.0, + "47": 400121088.0, + "48": 384682720.0, + "49": 387162752.0, + "50": 362854048.0, + "51": 366676864.0, + "52": 350360320.0, + "53": 311223488.0, + "54": 225290304.0, + "55": 260253088.0, + "56": 263247040.0, + "57": 246056096.0, + "58": 252780608.0, + "59": 231579392.0, + "60": 195835792.0, + "61": 187546496.0, + "62": 174440096.0, + "63": 174030656.0, + "64": 168326032.0, + "65": 155198272.0, + "66": 158226864.0, + "67": 160128528.0, + "68": 148477024.0, + "69": 141989008.0, + "70": 135430688.0, + "71": 124386168.0, + "72": 125975096.0, + "73": 116937424.0, + "74": 114577992.0, + "75": 104977400.0, + "76": 96893568.0, + "77": 94433896.0, + "78": 89154088.0, + "79": 82774424.0, + "80": 58226176.0, + "81": 81798272.0, + "82": 79535104.0, + "83": 76218472.0, + "84": 69928088.0, + "85": 76252288.0, + "86": 69833912.0, + "87": 70481392.0, + "88": 70838696.0, + "89": 67401872.0, + "90": 69592816.0, + "91": 63257244.0, + "92": 65279352.0, + "93": 65195692.0, + "94": 66865744.0, + "95": 64002816.0, + "96": 62594632.0, + "97": 60766636.0, + "98": 67129552.0, + "99": 54521448.0, + "100": 64295692.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 7495737856.0, + "2": 7880305152.0, + "3": 7913859584.0, + "4": 8136681984.0, + "5": 8008493568.0, + "6": 8264870400.0, + "7": 7785671168.0, + "8": 8777623552.0, + "9": 8682989568.0, + "10": 8521247232.0, + "11": 8554801664.0, + "12": 8426613248.0, + "13": 8461216256.0, + "14": 8393058816.0, + "15": 8264870400.0, + "16": 7752116736.0, + "17": 7752116736.0, + "18": 8554801664.0, + "19": 8298424832.0, + "20": 7880305152.0, + "21": 7657482752.0, + "22": 8426613248.0, + "23": 7913859584.0, + "24": 8777623552.0, + "25": 8042048000.0, + "26": 8649435136.0, + "27": 8008493568.0, + "28": 7785671168.0, + "29": 8393058816.0, + "30": 8777623552.0, + "31": 7785671168.0, + "32": 8170236416.0, + "33": 8521247232.0, + "34": 7657482752.0, + "35": 8008493568.0, + "36": 8554801664.0, + "37": 7657482752.0, + "38": 8042048000.0, + "39": 7657482752.0, + "40": 8298424832.0, + "41": 7657482752.0, + "42": 8170236416.0, + "43": 7752116736.0, + "44": 8393058816.0, + "45": 7657482752.0, + "46": 8136681984.0, + "47": 8777623552.0, + "48": 8136681984.0, + "49": 8521247232.0, + "50": 7657482752.0, + "51": 8264870400.0, + "52": 8682989568.0, + "53": 7913859584.0, + "54": 8170236416.0, + "55": 8393058816.0, + "56": 8649435136.0, + "57": 7657482752.0, + "58": 8042048000.0, + "59": 8170236416.0, + "60": 8393058816.0, + "61": 8682989568.0, + "62": 8905811968.0, + "63": 7785671168.0, + "64": 7785671168.0, + "65": 8008493568.0, + "66": 8008493568.0, + "67": 8042048000.0, + "68": 8136681984.0, + "69": 8170236416.0, + "70": 8393058816.0, + "71": 8649435136.0, + "72": 8905811968.0, + "73": 7785671168.0, + "74": 7913859584.0, + "75": 8264870400.0, + "76": 8426613248.0, + "77": 8298424832.0, + "78": 8298424832.0, + "79": 8426613248.0, + "80": 8298424832.0, + "81": 8264870400.0, + "82": 8170236416.0, + "83": 8170236416.0, + "84": 8042048000.0, + "85": 7880305152.0, + "86": 7905470976.0, + "87": 7752116736.0, + "88": 7785671168.0, + "89": 7785671168.0, + "90": 7785671168.0, + "91": 7657482752.0, + "92": 8905811968.0, + "93": 7623928320.0, + "94": 8905811968.0, + "95": 8717592576.0, + "96": 8682989568.0, + "97": 8905811968.0, + "98": 8777623552.0, + "99": 8743020544.0, + "100": 8649435136.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 19589115904.0, + "2": 22235258880.0, + "3": 23198478336.0, + "4": 23198478336.0, + "5": 23198478336.0, + "6": 23198478336.0, + "7": 23198478336.0, + "8": 23198478336.0, + "9": 23198478336.0, + "10": 23198478336.0, + "11": 23198478336.0, + "12": 23198478336.0, + "13": 23273062400.0, + "14": 23273062400.0, + "15": 23273062400.0, + "16": 23273062400.0, + "17": 23273062400.0, + "18": 23273062400.0, + "19": 23273062400.0, + "20": 23273062400.0, + "21": 23273062400.0, + "22": 23273062400.0, + "23": 23273062400.0, + "24": 23273062400.0, + "25": 23273062400.0, + "26": 23273062400.0, + "27": 23449098240.0, + "28": 23533105152.0, + "29": 23763828736.0, + "30": 23763828736.0, + "31": 23763828736.0, + "32": 23763828736.0, + "33": 23763828736.0, + "34": 23763828736.0, + "35": 23763828736.0, + "36": 23763828736.0, + "37": 23763828736.0, + "38": 23763828736.0, + "39": 23763828736.0, + "40": 23763828736.0, + "41": 23763828736.0, + "42": 23763828736.0, + "43": 23763828736.0, + "44": 23763828736.0, + "45": 23763828736.0, + "46": 23763828736.0, + "47": 23763828736.0, + "48": 23763828736.0, + "49": 23763828736.0, + "50": 23763828736.0, + "51": 23763828736.0, + "52": 23763828736.0, + "53": 23763828736.0, + "54": 23763828736.0, + "55": 23763828736.0, + "56": 23763828736.0, + "57": 23763828736.0, + "58": 23763828736.0, + "59": 23763828736.0, + "60": 23763828736.0, + "61": 23763828736.0, + "62": 23763828736.0, + "63": 23763828736.0, + "64": 23763828736.0, + "65": 23763828736.0, + "66": 23763828736.0, + "67": 23763828736.0, + "68": 23763828736.0, + "69": 23763828736.0, + "70": 23763828736.0, + "71": 23763828736.0, + "72": 23763828736.0, + "73": 23763828736.0, + "74": 23763828736.0, + "75": 23763828736.0, + "76": 23763828736.0, + "77": 23763828736.0, + "78": 23763828736.0, + "79": 23763828736.0, + "80": 23763828736.0, + "81": 23763828736.0, + "82": 23763828736.0, + "83": 23763828736.0, + "84": 23763828736.0, + "85": 23763828736.0, + "86": 23763828736.0, + "87": 23763828736.0, + "88": 23763828736.0, + "89": 23763828736.0, + "90": 23763828736.0, + "91": 23763828736.0, + "92": 23763828736.0, + "93": 23763828736.0, + "94": 23763828736.0, + "95": 23763828736.0, + "96": 23763828736.0, + "97": 23763828736.0, + "98": 23763828736.0, + "99": 23763828736.0, + "100": 23763828736.0 + } + }, + "seq_load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 1.32089, + "2": 1.32018, + "3": 1.4064, + "4": 1.44917, + "5": 1.42887, + "6": 1.40553, + "7": 1.39632, + "8": 1.37555, + "9": 1.3583, + "10": 1.33437, + "11": 1.32884, + "12": 1.3072, + "13": 1.2805, + "14": 1.26227, + "15": 1.24915, + "16": 1.22524, + "17": 1.21692, + "18": 1.2105, + "19": 1.20437, + "20": 1.18931, + "21": 1.18174, + "22": 1.17621, + "23": 1.16339, + "24": 1.173, + "25": 1.17085, + "26": 1.16845, + "27": 1.16422, + "28": 1.16385, + "29": 1.1641, + "30": 1.16357, + "31": 1.16598, + "32": 1.16924, + "33": 1.16719, + "34": 1.16792, + "35": 1.16041, + "36": 1.17152, + "37": 1.17295, + "38": 1.17163, + "39": 1.17834, + "40": 1.19164, + "41": 1.18028, + "42": 1.17258, + "43": 1.16541, + "44": 1.17152, + "45": 1.17733, + "46": 1.17068, + "47": 1.17599, + "48": 1.17373, + "49": 1.16127, + "50": 1.16788, + "51": 1.16923, + "52": 1.15478, + "53": 1.16033, + "54": 1.15877, + "55": 1.15443, + "56": 1.15506, + "57": 1.16289, + "58": 1.14657, + "59": 1.14912, + "60": 1.15167, + "61": 1.15629, + "62": 1.14122, + "63": 1.14981, + "64": 1.15805, + "65": 1.14113, + "66": 1.14562, + "67": 1.13979, + "68": 1.14602, + "69": 1.13015, + "70": 1.14297, + "71": 1.15151, + "72": 1.14374, + "73": 1.13866, + "74": 1.14036, + "75": 1.14621, + "76": 1.13948, + "77": 1.13854, + "78": 1.13921, + "79": 1.13275, + "80": 1.13479, + "81": 1.13629, + "82": 1.13664, + "83": 1.13761, + "84": 1.1179, + "85": 1.13829, + "86": 1.13281, + "87": 1.14162, + "88": 1.14743, + "89": 1.14715, + "90": 1.12974, + "91": 1.13998, + "92": 1.15113, + "93": 1.14963, + "94": 1.13262, + "95": 1.13331, + "96": 1.1354, + "97": 1.13989, + "98": 1.12532, + "99": 1.12079, + "100": 1.12265 + } + }, + "load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 0.0, + "2": 0.0, + "3": 0.0, + "4": 0.0, + "5": 0.0, + "6": 0.0, + "7": 0.0, + "8": 0.0, + "9": 0.0, + "10": 0.0, + "11": 0.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 0.0, + "17": 0.0, + "18": 0.0, + "19": 0.0, + "20": 0.0, + "21": 0.0, + "22": 0.0, + "23": 0.0, + "24": 0.0, + "25": 0.0, + "26": 0.0, + "27": 0.0, + "28": 0.0, + "29": 0.0, + "30": 0.0, + "31": 0.0, + "32": 0.0, + "33": 0.0, + "34": 0.0, + "35": 0.0, + "36": 0.0, + "37": 0.0, + "38": 0.0, + "39": 0.0, + "40": 0.0, + "41": 0.0, + "42": 0.0, + "43": 0.0, + "44": 0.0, + "45": 0.0, + "46": 0.0, + "47": 0.0, + "48": 0.0, + "49": 0.0, + "50": 0.0, + "51": 0.0, + "52": 0.0, + "53": 0.0, + "54": 0.0, + "55": 0.0, + "56": 0.0, + "57": 0.0, + "58": 0.0, + "59": 0.0, + "60": 0.0, + "61": 0.0, + "62": 0.0, + "63": 0.0, + "64": 0.0, + "65": 0.0, + "66": 0.0, + "67": 0.0, + "68": 0.0, + "69": 0.0, + "70": 0.0, + "71": 0.0, + "72": 0.0, + "73": 0.0, + "74": 0.0, + "75": 0.0, + "76": 0.0, + "77": 0.0, + "78": 0.0, + "79": 0.0, + "80": 0.0, + "81": 0.0, + "82": 0.0, + "83": 0.0, + "84": 0.0, + "85": 0.0, + "86": 0.0, + "87": 0.0, + "88": 0.0, + "89": 0.0, + "90": 0.0, + "91": 0.0, + "92": 0.0, + "93": 0.0, + "94": 0.0, + "95": 0.0, + "96": 0.0, + "97": 0.0, + "98": 0.0, + "99": 0.0, + "100": 0.0 + } + }, + "mtp_1 loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.10066, + "2": 11.12339, + "3": 10.56102, + "4": 10.03991, + "5": 9.81947, + "6": 9.46153, + "7": 9.54921, + "8": 8.8373, + "9": 8.64521, + "10": 8.96132, + "11": 8.31931, + "12": 8.34681, + "13": 8.2497, + "14": 7.71141, + "15": 7.84945, + "16": 7.8865, + "17": 7.82718, + "18": 7.56885, + "19": 7.93904, + "20": 7.66202, + "21": 7.35045, + "22": 7.33319, + "23": 7.2052, + "24": 7.20671, + "25": 7.46621, + "26": 6.8815, + "27": 7.40163, + "28": 7.13674, + "29": 7.31636, + "30": 7.4101, + "31": 7.19134, + "32": 7.38627, + "33": 7.43401, + "34": 7.4759, + "35": 7.00817, + "36": 6.87492, + "37": 7.22849, + "38": 6.99492, + "39": 7.34438, + "40": 7.34465, + "41": 7.27569, + "42": 7.02666, + "43": 7.01127, + "44": 7.19087, + "45": 6.91052, + "46": 6.67843, + "47": 7.03605, + "48": 6.85011, + "49": 7.30936, + "50": 6.76769, + "51": 6.83025, + "52": 7.13401, + "53": 7.09536, + "54": 6.99051, + "55": 6.72637, + "56": 7.10668, + "57": 6.82313, + "58": 7.04889, + "59": 6.9575, + "60": 6.41213, + "61": 6.64679, + "62": 7.02446, + "63": 7.09197, + "64": 6.52215, + "65": 7.03747, + "66": 7.24051, + "67": 7.17826, + "68": 6.7737, + "69": 6.74461, + "70": 6.68983, + "71": 6.67037, + "72": 6.7881, + "73": 6.86182, + "74": 6.81589, + "75": 6.78296, + "76": 6.20173, + "77": 7.12268, + "78": 6.67556, + "79": 6.56854, + "80": 6.72873, + "81": 6.52208, + "82": 7.04488, + "83": 6.74482, + "84": 6.68893, + "85": 6.89011, + "86": 6.73528, + "87": 6.84054, + "88": 6.81409, + "89": 6.60285, + "90": 6.7943, + "91": 6.38002, + "92": 6.39329, + "93": 6.50963, + "94": 6.73244, + "95": 6.86635, + "96": 7.04408, + "97": 6.8633, + "98": 6.68823, + "99": 6.77825, + "100": 6.75892 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": "nan", + "2": 37.75177, + "3": 1.21514, + "4": 1.41922, + "5": 1.09814, + "6": 1.23101, + "7": 0.96676, + "8": 0.98051, + "9": 1.00264, + "10": 0.98718, + "11": 1.01295, + "12": 0.99344, + "13": 1.00805, + "14": 1.00035, + "15": 0.99623, + "16": 0.96479, + "17": 1.00276, + "18": 0.97457, + "19": 0.9824, + "20": 0.97117, + "21": 0.98548, + "22": 0.96846, + "23": 0.96686, + "24": 0.97114, + "25": 0.94961, + "26": 0.95318, + "27": 0.95294, + "28": 0.99119, + "29": 0.95107, + "30": 0.93774, + "31": 0.92755, + "32": 0.93542, + "33": 0.93237, + "34": 0.93995, + "35": 0.93163, + "36": 0.94796, + "37": 0.93657, + "38": 0.93782, + "39": 0.97396, + "40": 0.95444, + "41": 0.95377, + "42": 0.94884, + "43": 0.97023, + "44": 0.95639, + "45": 0.94947, + "46": 0.93702, + "47": 0.9584, + "48": 0.95255, + "49": 0.93641, + "50": 0.94351, + "51": 0.94628, + "52": 0.94385, + "53": 0.94097, + "54": 0.92534, + "55": 0.91725, + "56": 0.92602, + "57": 0.93494, + "58": 0.93499, + "59": 0.91453, + "60": 0.91957, + "61": 0.92959, + "62": 0.91904, + "63": 0.92168, + "64": 0.90443, + "65": 0.91828, + "66": 0.90899, + "67": 0.91035, + "68": 0.90691, + "69": 0.9111, + "70": 0.91747, + "71": 0.92327, + "72": 0.92418, + "73": 0.92267, + "74": 0.91489, + "75": 0.92839, + "76": 0.91752, + "77": 0.8982, + "78": 0.90229, + "79": 0.91189, + "80": 0.8993, + "81": 0.89857, + "82": 0.89773, + "83": 0.90722, + "84": 0.89632, + "85": 0.89154, + "86": 0.9066, + "87": 0.8944, + "88": 0.91125, + "89": 0.90146, + "90": 0.90456, + "91": 0.89383, + "92": 0.89868, + "93": 0.90844, + "94": 0.89579, + "95": 0.89247, + "96": 0.90326, + "97": 0.9265, + "98": 0.89127, + "99": 0.9002, + "100": 0.89996 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_offloading/golden_values_dev_dgx_h100.json new file mode 100644 index 00000000000..69972da0d79 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/golden_values/dsv3_tp2pp2ep4_offloading/golden_values_dev_dgx_h100.json @@ -0,0 +1,858 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.08746, + "2": 11.03169, + "3": 9.44802, + "4": 9.26223, + "5": 10.2508, + "6": 9.01326, + "7": 9.14368, + "8": 8.66099, + "9": 8.54902, + "10": 8.87517, + "11": 8.35747, + "12": 8.39256, + "13": 8.29824, + "14": 7.76859, + "15": 7.91356, + "16": 7.94466, + "17": 7.89991, + "18": 7.60876, + "19": 7.99408, + "20": 7.70915, + "21": 7.40396, + "22": 7.3879, + "23": 7.25796, + "24": 7.24387, + "25": 7.52202, + "26": 6.92934, + "27": 7.42908, + "28": 7.18115, + "29": 7.35198, + "30": 7.44367, + "31": 7.23133, + "32": 7.41693, + "33": 7.46055, + "34": 7.48601, + "35": 7.04609, + "36": 6.90289, + "37": 7.24405, + "38": 7.02823, + "39": 7.36677, + "40": 7.39518, + "41": 7.30419, + "42": 7.07135, + "43": 7.05866, + "44": 7.225, + "45": 6.96686, + "46": 6.73112, + "47": 7.07493, + "48": 6.88712, + "49": 7.37554, + "50": 6.79061, + "51": 6.86418, + "52": 7.19158, + "53": 7.14634, + "54": 7.00739, + "55": 6.74212, + "56": 7.12732, + "57": 6.84797, + "58": 7.08266, + "59": 6.98616, + "60": 6.42782, + "61": 6.64349, + "62": 7.06202, + "63": 7.10736, + "64": 6.53987, + "65": 7.05814, + "66": 7.2466, + "67": 7.20061, + "68": 6.7727, + "69": 6.74171, + "70": 6.68207, + "71": 6.66203, + "72": 6.80057, + "73": 6.86086, + "74": 6.79981, + "75": 6.76391, + "76": 6.20773, + "77": 7.11502, + "78": 6.67186, + "79": 6.56864, + "80": 6.73177, + "81": 6.5288, + "82": 7.03738, + "83": 6.71383, + "84": 6.66758, + "85": 6.87415, + "86": 6.71852, + "87": 6.8298, + "88": 6.77997, + "89": 6.5599, + "90": 6.768, + "91": 6.37934, + "92": 6.36237, + "93": 6.46748, + "94": 6.70717, + "95": 6.8429, + "96": 7.03237, + "97": 6.84723, + "98": 6.66174, + "99": 6.75518, + "100": 6.71798 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 38802228.0, + "2": 38543220.0, + "3": 38745524.0, + "4": 147837792.0, + "5": 296933952.0, + "6": 353229632.0, + "7": 482104224.0, + "8": 677665152.0, + "9": 667849152.0, + "10": 646732416.0, + "11": 659067968.0, + "12": 592541376.0, + "13": 890788224.0, + "14": 620274112.0, + "15": 585840896.0, + "16": 592576640.0, + "17": 598574080.0, + "18": 516995456.0, + "19": 608314240.0, + "20": 555121280.0, + "21": 514120576.0, + "22": 727499072.0, + "23": 514172000.0, + "24": 568477632.0, + "25": 522328320.0, + "26": 520175008.0, + "27": 486108800.0, + "28": 459994208.0, + "29": 406010304.0, + "30": 423326208.0, + "31": 573569728.0, + "32": 403735488.0, + "33": 397564224.0, + "34": 520299776.0, + "35": 331901504.0, + "36": 419880928.0, + "37": 440420288.0, + "38": 347273696.0, + "39": 406427424.0, + "40": 361923808.0, + "41": 296337792.0, + "42": 350027104.0, + "43": 303101344.0, + "44": 296291968.0, + "45": 256846464.0, + "46": 265275264.0, + "47": 252222864.0, + "48": 305941408.0, + "49": 289596512.0, + "50": 243269776.0, + "51": 240810144.0, + "52": 224456448.0, + "53": 160188192.0, + "54": 203198560.0, + "55": 175228448.0, + "56": 153069072.0, + "57": 161047200.0, + "58": 167820944.0, + "59": 152916400.0, + "60": 161219600.0, + "61": 90013936.0, + "62": 127245448.0, + "63": 117377544.0, + "64": 102235096.0, + "65": 104854616.0, + "66": 104718664.0, + "67": 94063000.0, + "68": 94997056.0, + "69": 79061344.0, + "70": 78802200.0, + "71": 58290148.0, + "72": 66201732.0, + "73": 60279780.0, + "74": 57925332.0, + "75": 54598372.0, + "76": 46520900.0, + "77": 47187216.0, + "78": 45095936.0, + "79": 44968960.0, + "80": 45605656.0, + "81": 44046640.0, + "82": 44927824.0, + "83": 41609208.0, + "84": 41571200.0, + "85": 38500360.0, + "86": 38371208.0, + "87": 39021336.0, + "88": 39378624.0, + "89": 39081744.0, + "90": 41258584.0, + "91": 38086496.0, + "92": 40111780.0, + "93": 40023632.0, + "94": 38552344.0, + "95": 38833920.0, + "96": 37421068.0, + "97": 38741736.0, + "98": 38816416.0, + "99": 38780600.0, + "100": 39128652.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 6453616128.0, + "2": 6453618176.0, + "3": 6453618176.0, + "4": 6453618176.0, + "5": 6453618176.0, + "6": 6453618176.0, + "7": 6453618176.0, + "8": 6453618176.0, + "9": 6453618176.0, + "10": 6453618176.0, + "11": 6453618176.0, + "12": 6453618176.0, + "13": 6453618176.0, + "14": 6453618176.0, + "15": 6453618176.0, + "16": 6453618176.0, + "17": 6453618176.0, + "18": 6453618176.0, + "19": 6453618176.0, + "20": 6453618176.0, + "21": 6453618176.0, + "22": 6453618176.0, + "23": 6453618176.0, + "24": 6453618176.0, + "25": 6453618176.0, + "26": 6453618176.0, + "27": 6453618176.0, + "28": 6453618176.0, + "29": 6453618176.0, + "30": 6453618176.0, + "31": 6453618176.0, + "32": 6453618176.0, + "33": 6453618176.0, + "34": 6453618176.0, + "35": 6453618176.0, + "36": 6453618176.0, + "37": 6453618176.0, + "38": 6453618176.0, + "39": 6453618176.0, + "40": 6453618176.0, + "41": 6453618176.0, + "42": 6453618176.0, + "43": 6453618176.0, + "44": 6453618176.0, + "45": 6453618176.0, + "46": 6453618176.0, + "47": 6453618176.0, + "48": 6453618176.0, + "49": 6453618176.0, + "50": 6453618176.0, + "51": 6453618176.0, + "52": 6453618176.0, + "53": 6453618176.0, + "54": 6453618176.0, + "55": 6453618176.0, + "56": 6453618176.0, + "57": 6453618176.0, + "58": 6453618176.0, + "59": 6453618176.0, + "60": 6453618176.0, + "61": 6453618176.0, + "62": 6453618176.0, + "63": 6453618176.0, + "64": 6453618176.0, + "65": 6453618176.0, + "66": 6453618176.0, + "67": 6453618176.0, + "68": 6453618176.0, + "69": 6453618176.0, + "70": 6453618176.0, + "71": 6453618176.0, + "72": 6453618176.0, + "73": 6453618176.0, + "74": 6453618176.0, + "75": 6453618176.0, + "76": 6453618176.0, + "77": 6453618176.0, + "78": 6453618176.0, + "79": 6453618176.0, + "80": 6453618176.0, + "81": 6453618176.0, + "82": 6453618176.0, + "83": 6453618176.0, + "84": 6453618176.0, + "85": 6453618176.0, + "86": 6453618176.0, + "87": 6453618176.0, + "88": 6453618176.0, + "89": 6453618176.0, + "90": 6453618176.0, + "91": 6453618176.0, + "92": 6453618176.0, + "93": 6453618176.0, + "94": 6453618176.0, + "95": 6453618176.0, + "96": 6453618176.0, + "97": 6453618176.0, + "98": 6453618176.0, + "99": 6453618176.0, + "100": 6453618176.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 6453624320.0, + "2": 7908994560.0, + "3": 7916840448.0, + "4": 7916840448.0, + "5": 7916840448.0, + "6": 7916840448.0, + "7": 7916840448.0, + "8": 7916840448.0, + "9": 7916840448.0, + "10": 7916840448.0, + "11": 7916840448.0, + "12": 7916840448.0, + "13": 7916840448.0, + "14": 7916840448.0, + "15": 7916840448.0, + "16": 7916840448.0, + "17": 7916840448.0, + "18": 7916840448.0, + "19": 7916840448.0, + "20": 7916840448.0, + "21": 7916840448.0, + "22": 7922667008.0, + "23": 7922667008.0, + "24": 7922667008.0, + "25": 7922667008.0, + "26": 7922667008.0, + "27": 7922667008.0, + "28": 7922667008.0, + "29": 7922667008.0, + "30": 7922667008.0, + "31": 7922667008.0, + "32": 7922667008.0, + "33": 7922667008.0, + "34": 7922667008.0, + "35": 7922667008.0, + "36": 7922667008.0, + "37": 7922667008.0, + "38": 7922667008.0, + "39": 7943397376.0, + "40": 7943397376.0, + "41": 7943397376.0, + "42": 7943397376.0, + "43": 7943397376.0, + "44": 7943397376.0, + "45": 7943397376.0, + "46": 7943397376.0, + "47": 7943397376.0, + "48": 7943397376.0, + "49": 7943397376.0, + "50": 7943397376.0, + "51": 7943397376.0, + "52": 7943397376.0, + "53": 7943397376.0, + "54": 7943397376.0, + "55": 7943397376.0, + "56": 7943397376.0, + "57": 7943397376.0, + "58": 7943397376.0, + "59": 7943397376.0, + "60": 7943397376.0, + "61": 7943397376.0, + "62": 7943397376.0, + "63": 7943397376.0, + "64": 7943397376.0, + "65": 7943397376.0, + "66": 7943397376.0, + "67": 7943397376.0, + "68": 7943397376.0, + "69": 7943397376.0, + "70": 7943397376.0, + "71": 7943397376.0, + "72": 7943397376.0, + "73": 7943397376.0, + "74": 7943397376.0, + "75": 7943397376.0, + "76": 7943397376.0, + "77": 7943397376.0, + "78": 7943397376.0, + "79": 7943397376.0, + "80": 7943397376.0, + "81": 7943397376.0, + "82": 7943397376.0, + "83": 7943397376.0, + "84": 7943397376.0, + "85": 7943397376.0, + "86": 7943397376.0, + "87": 7943397376.0, + "88": 7943397376.0, + "89": 7943397376.0, + "90": 7943397376.0, + "91": 7943397376.0, + "92": 7943397376.0, + "93": 7943397376.0, + "94": 7943397376.0, + "95": 7943397376.0, + "96": 7943397376.0, + "97": 7943397376.0, + "98": 7943397376.0, + "99": 7943397376.0, + "100": 7943397376.0 + } + }, + "seq_load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 1.3195, + "2": 1.31893, + "3": 1.42853, + "4": 1.43194, + "5": 1.44423, + "6": 1.4372, + "7": 1.42359, + "8": 1.40148, + "9": 1.3783, + "10": 1.34847, + "11": 1.3338, + "12": 1.31123, + "13": 1.28507, + "14": 1.26701, + "15": 1.24937, + "16": 1.22499, + "17": 1.21818, + "18": 1.20468, + "19": 1.19368, + "20": 1.17759, + "21": 1.17207, + "22": 1.16742, + "23": 1.14905, + "24": 1.15782, + "25": 1.15126, + "26": 1.15179, + "27": 1.1442, + "28": 1.14088, + "29": 1.14156, + "30": 1.14254, + "31": 1.1454, + "32": 1.15328, + "33": 1.14934, + "34": 1.14766, + "35": 1.13778, + "36": 1.15502, + "37": 1.15077, + "38": 1.14785, + "39": 1.15191, + "40": 1.16329, + "41": 1.16109, + "42": 1.15427, + "43": 1.14788, + "44": 1.15173, + "45": 1.15981, + "46": 1.15668, + "47": 1.15623, + "48": 1.15632, + "49": 1.14216, + "50": 1.15231, + "51": 1.15399, + "52": 1.14078, + "53": 1.14497, + "54": 1.1429, + "55": 1.14381, + "56": 1.14985, + "57": 1.16134, + "58": 1.13865, + "59": 1.14135, + "60": 1.14904, + "61": 1.15094, + "62": 1.13096, + "63": 1.13941, + "64": 1.15087, + "65": 1.13061, + "66": 1.13776, + "67": 1.13023, + "68": 1.14187, + "69": 1.12378, + "70": 1.13704, + "71": 1.14464, + "72": 1.13202, + "73": 1.12766, + "74": 1.1298, + "75": 1.13484, + "76": 1.13077, + "77": 1.12733, + "78": 1.12962, + "79": 1.12481, + "80": 1.12687, + "81": 1.12981, + "82": 1.13033, + "83": 1.13312, + "84": 1.11296, + "85": 1.13438, + "86": 1.1257, + "87": 1.13414, + "88": 1.14312, + "89": 1.14423, + "90": 1.12259, + "91": 1.1381, + "92": 1.14909, + "93": 1.15257, + "94": 1.13182, + "95": 1.13145, + "96": 1.13282, + "97": 1.1387, + "98": 1.12368, + "99": 1.11834, + "100": 1.11855 + } + }, + "load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 0.0, + "2": 0.0, + "3": 0.0, + "4": 0.0, + "5": 0.0, + "6": 0.0, + "7": 0.0, + "8": 0.0, + "9": 0.0, + "10": 0.0, + "11": 0.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 0.0, + "17": 0.0, + "18": 0.0, + "19": 0.0, + "20": 0.0, + "21": 0.0, + "22": 0.0, + "23": 0.0, + "24": 0.0, + "25": 0.0, + "26": 0.0, + "27": 0.0, + "28": 0.0, + "29": 0.0, + "30": 0.0, + "31": 0.0, + "32": 0.0, + "33": 0.0, + "34": 0.0, + "35": 0.0, + "36": 0.0, + "37": 0.0, + "38": 0.0, + "39": 0.0, + "40": 0.0, + "41": 0.0, + "42": 0.0, + "43": 0.0, + "44": 0.0, + "45": 0.0, + "46": 0.0, + "47": 0.0, + "48": 0.0, + "49": 0.0, + "50": 0.0, + "51": 0.0, + "52": 0.0, + "53": 0.0, + "54": 0.0, + "55": 0.0, + "56": 0.0, + "57": 0.0, + "58": 0.0, + "59": 0.0, + "60": 0.0, + "61": 0.0, + "62": 0.0, + "63": 0.0, + "64": 0.0, + "65": 0.0, + "66": 0.0, + "67": 0.0, + "68": 0.0, + "69": 0.0, + "70": 0.0, + "71": 0.0, + "72": 0.0, + "73": 0.0, + "74": 0.0, + "75": 0.0, + "76": 0.0, + "77": 0.0, + "78": 0.0, + "79": 0.0, + "80": 0.0, + "81": 0.0, + "82": 0.0, + "83": 0.0, + "84": 0.0, + "85": 0.0, + "86": 0.0, + "87": 0.0, + "88": 0.0, + "89": 0.0, + "90": 0.0, + "91": 0.0, + "92": 0.0, + "93": 0.0, + "94": 0.0, + "95": 0.0, + "96": 0.0, + "97": 0.0, + "98": 0.0, + "99": 0.0, + "100": 0.0 + } + }, + "mtp_1 loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.108, + "2": 11.12189, + "3": 10.52558, + "4": 10.0928, + "5": 9.80064, + "6": 9.62419, + "7": 9.64142, + "8": 8.925, + "9": 8.71545, + "10": 9.02083, + "11": 8.39111, + "12": 8.39541, + "13": 8.29189, + "14": 7.73945, + "15": 7.87875, + "16": 7.92045, + "17": 7.86504, + "18": 7.58738, + "19": 7.95997, + "20": 7.67834, + "21": 7.36338, + "22": 7.34447, + "23": 7.21472, + "24": 7.21539, + "25": 7.47252, + "26": 6.88556, + "27": 7.40177, + "28": 7.14754, + "29": 7.3214, + "30": 7.41732, + "31": 7.18891, + "32": 7.37942, + "33": 7.42754, + "34": 7.47331, + "35": 7.01055, + "36": 6.86596, + "37": 7.22023, + "38": 6.99621, + "39": 7.33335, + "40": 7.3399, + "41": 7.27398, + "42": 7.02156, + "43": 7.00205, + "44": 7.16505, + "45": 6.88926, + "46": 6.66914, + "47": 7.01661, + "48": 6.82852, + "49": 7.28961, + "50": 6.75106, + "51": 6.80896, + "52": 7.1231, + "53": 7.09476, + "54": 6.96682, + "55": 6.69955, + "56": 7.08088, + "57": 6.80505, + "58": 7.02715, + "59": 6.92444, + "60": 6.39633, + "61": 6.62029, + "62": 7.00517, + "63": 7.06616, + "64": 6.49358, + "65": 7.0118, + "66": 7.21108, + "67": 7.1572, + "68": 6.74535, + "69": 6.7204, + "70": 6.66325, + "71": 6.63728, + "72": 6.76771, + "73": 6.8409, + "74": 6.78236, + "75": 6.75692, + "76": 6.17702, + "77": 7.09619, + "78": 6.64679, + "79": 6.54997, + "80": 6.6982, + "81": 6.5012, + "82": 7.01591, + "83": 6.7239, + "84": 6.66327, + "85": 6.86873, + "86": 6.71056, + "87": 6.8157, + "88": 6.79038, + "89": 6.57424, + "90": 6.77383, + "91": 6.35714, + "92": 6.36933, + "93": 6.47726, + "94": 6.70782, + "95": 6.84043, + "96": 7.02177, + "97": 6.83985, + "98": 6.66738, + "99": 6.75469, + "100": 6.73191 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": "nan", + "2": 41.06526, + "3": 4.86684, + "4": 4.52999, + "5": 4.65333, + "6": 4.36115, + "7": 4.23824, + "8": 4.18849, + "9": 4.1916, + "10": 4.22674, + "11": 4.27816, + "12": 4.25994, + "13": 4.23996, + "14": 4.23514, + "15": 4.20501, + "16": 4.20243, + "17": 4.24153, + "18": 4.20253, + "19": 4.28514, + "20": 4.1074, + "21": 4.04806, + "22": 4.10002, + "23": 4.06614, + "24": 4.18836, + "25": 4.09867, + "26": 4.132, + "27": 4.27997, + "28": 4.28077, + "29": 4.2981, + "30": 4.24335, + "31": 4.1466, + "32": 4.06161, + "33": 4.06952, + "34": 4.11486, + "35": 4.07319, + "36": 4.06514, + "37": 4.00595, + "38": 4.01521, + "39": 4.01303, + "40": 4.10288, + "41": 4.07163, + "42": 4.08584, + "43": 4.07334, + "44": 4.02623, + "45": 4.0241, + "46": 4.02916, + "47": 4.00444, + "48": 3.99284, + "49": 3.99415, + "50": 3.99376, + "51": 4.01801, + "52": 3.99298, + "53": 3.98868, + "54": 4.04897, + "55": 4.02323, + "56": 4.01553, + "57": 4.00809, + "58": 4.0062, + "59": 4.00154, + "60": 3.91632, + "61": 3.90837, + "62": 3.87697, + "63": 3.90033, + "64": 3.93709, + "65": 3.9106, + "66": 3.89789, + "67": 3.84906, + "68": 3.88993, + "69": 3.88041, + "70": 3.88311, + "71": 3.89878, + "72": 3.90263, + "73": 3.85099, + "74": 3.87548, + "75": 3.87758, + "76": 3.88438, + "77": 3.85553, + "78": 3.83581, + "79": 3.82458, + "80": 3.84224, + "81": 3.83752, + "82": 3.85044, + "83": 3.83136, + "84": 3.83183, + "85": 3.85411, + "86": 3.85577, + "87": 3.86619, + "88": 3.85567, + "89": 3.88877, + "90": 3.87397, + "91": 3.84342, + "92": 3.84892, + "93": 3.88791, + "94": 3.83278, + "95": 3.79308, + "96": 3.81636, + "97": 3.81123, + "98": 3.83559, + "99": 3.79575, + "100": 3.80415 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3_tp1pp1ep8/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3_tp1pp1ep8/golden_values_dev_dgx_h100.json new file mode 100644 index 00000000000..56b34da573f --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3_tp1pp1ep8/golden_values_dev_dgx_h100.json @@ -0,0 +1,751 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.01703, + "2": 11.04439, + "3": 9.39182, + "4": 9.79578, + "5": 9.28997, + "6": 9.07196, + "7": 9.20472, + "8": 8.76041, + "9": 8.62418, + "10": 8.9493, + "11": 8.36601, + "12": 8.40771, + "13": 8.27, + "14": 7.75992, + "15": 7.89363, + "16": 7.92656, + "17": 7.8838, + "18": 7.63657, + "19": 7.99249, + "20": 7.72567, + "21": 7.41775, + "22": 7.39464, + "23": 7.27391, + "24": 7.2471, + "25": 7.53304, + "26": 6.95745, + "27": 7.4728, + "28": 7.21521, + "29": 7.38443, + "30": 7.48167, + "31": 7.25807, + "32": 7.46772, + "33": 7.50393, + "34": 7.539, + "35": 7.0788, + "36": 6.94116, + "37": 7.29001, + "38": 7.06634, + "39": 7.40955, + "40": 7.42021, + "41": 7.33944, + "42": 7.1014, + "43": 7.09861, + "44": 7.2635, + "45": 7.00199, + "46": 6.77164, + "47": 7.10736, + "48": 6.93341, + "49": 7.41725, + "50": 6.82951, + "51": 6.90183, + "52": 7.23329, + "53": 7.18101, + "54": 7.064, + "55": 6.78723, + "56": 7.15074, + "57": 6.86798, + "58": 7.12296, + "59": 7.02328, + "60": 6.46198, + "61": 6.671, + "62": 7.07679, + "63": 7.12953, + "64": 6.58349, + "65": 7.10475, + "66": 7.26507, + "67": 7.2387, + "68": 6.80809, + "69": 6.79072, + "70": 6.72236, + "71": 6.68876, + "72": 6.83548, + "73": 6.89104, + "74": 6.83714, + "75": 6.80172, + "76": 6.26762, + "77": 7.15212, + "78": 6.72124, + "79": 6.61308, + "80": 6.77598, + "81": 6.56869, + "82": 7.07255, + "83": 6.76676, + "84": 6.7227, + "85": 6.90408, + "86": 6.76833, + "87": 6.86947, + "88": 6.81869, + "89": 6.62666, + "90": 6.82332, + "91": 6.42832, + "92": 6.41359, + "93": 6.50649, + "94": 6.75897, + "95": 6.89352, + "96": 7.06288, + "97": 6.90029, + "98": 6.72483, + "99": 6.8186, + "100": 6.78543 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 38808124.0, + "2": 38549032.0, + "3": 41890528.0, + "4": 116357776.0, + "5": 256034256.0, + "6": 387873600.0, + "7": 683421440.0, + "8": 665104896.0, + "9": 664680128.0, + "10": 508289056.0, + "11": 687394880.0, + "12": 683805952.0, + "13": 768137472.0, + "14": 752400064.0, + "15": 705379520.0, + "16": 730943232.0, + "17": 781033472.0, + "18": 702587904.0, + "19": 759270336.0, + "20": 825648896.0, + "21": 737507264.0, + "22": 702371648.0, + "23": 718627712.0, + "24": 628258688.0, + "25": 745663232.0, + "26": 708929920.0, + "27": 589861248.0, + "28": 617271040.0, + "29": 676571200.0, + "30": 627801664.0, + "31": 620798400.0, + "32": 561002624.0, + "33": 633467328.0, + "34": 595820160.0, + "35": 552070464.0, + "36": 495394368.0, + "37": 471902656.0, + "38": 454233248.0, + "39": 346678048.0, + "40": 380820928.0, + "41": 400164512.0, + "42": 501051680.0, + "43": 460385568.0, + "44": 563695488.0, + "45": 514818240.0, + "46": 548393600.0, + "47": 312004448.0, + "48": 463235680.0, + "49": 346253280.0, + "50": 463466208.0, + "51": 439003552.0, + "52": 438371392.0, + "53": 418167072.0, + "54": 288184064.0, + "55": 389152032.0, + "56": 304091712.0, + "57": 431585664.0, + "58": 388031936.0, + "59": 253602032.0, + "60": 400297888.0, + "61": 272505792.0, + "62": 275117760.0, + "63": 353315232.0, + "64": 256399312.0, + "65": 328209344.0, + "66": 337521600.0, + "67": 285967904.0, + "68": 327801216.0, + "69": 242670688.0, + "70": 204645664.0, + "71": 306809472.0, + "72": 330444000.0, + "73": 227007008.0, + "74": 322182944.0, + "75": 299973856.0, + "76": 200681120.0, + "77": 245376064.0, + "78": 214983552.0, + "79": 227423104.0, + "80": 199750544.0, + "81": 242234976.0, + "82": 173917536.0, + "83": 180044176.0, + "84": 227175440.0, + "85": 230396048.0, + "86": 220829824.0, + "87": 230938624.0, + "88": 224980944.0, + "89": 158633632.0, + "90": 248894512.0, + "91": 236276224.0, + "92": 241455696.0, + "93": 178443888.0, + "94": 211577216.0, + "95": 227582928.0, + "96": 204159568.0, + "97": 199192048.0, + "98": 192968576.0, + "99": 167763152.0, + "100": 168119248.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 5839049728.0, + "2": 5839051264.0, + "3": 5839051264.0, + "4": 5839051264.0, + "5": 5839051264.0, + "6": 5839051264.0, + "7": 5839051264.0, + "8": 5839051264.0, + "9": 5839051264.0, + "10": 5839051264.0, + "11": 5839051264.0, + "12": 5839051264.0, + "13": 5839051264.0, + "14": 5839051264.0, + "15": 5839051264.0, + "16": 5839051264.0, + "17": 5839051264.0, + "18": 5839051264.0, + "19": 5839051264.0, + "20": 5839051264.0, + "21": 5839051264.0, + "22": 5839051264.0, + "23": 5839051264.0, + "24": 5839051264.0, + "25": 5839051264.0, + "26": 5839051264.0, + "27": 5839051264.0, + "28": 5839051264.0, + "29": 5839051264.0, + "30": 5839051264.0, + "31": 5839051264.0, + "32": 5839051264.0, + "33": 5839051264.0, + "34": 5839051264.0, + "35": 5839051264.0, + "36": 5839051264.0, + "37": 5839051264.0, + "38": 5839051264.0, + "39": 5839051264.0, + "40": 5839051264.0, + "41": 5839051264.0, + "42": 5839051264.0, + "43": 5839051264.0, + "44": 5839051264.0, + "45": 5839051264.0, + "46": 5839051264.0, + "47": 5839051264.0, + "48": 5839051264.0, + "49": 5839051264.0, + "50": 5839051264.0, + "51": 5839051264.0, + "52": 5839051264.0, + "53": 5839051264.0, + "54": 5839051264.0, + "55": 5839051264.0, + "56": 5839051264.0, + "57": 5839051264.0, + "58": 5839051264.0, + "59": 5839051264.0, + "60": 5839051264.0, + "61": 5839051264.0, + "62": 5839051264.0, + "63": 5839051264.0, + "64": 5839051264.0, + "65": 5839051264.0, + "66": 5839051264.0, + "67": 5839051264.0, + "68": 5839051264.0, + "69": 5839051264.0, + "70": 5839051264.0, + "71": 5839051264.0, + "72": 5839051264.0, + "73": 5839051264.0, + "74": 5839051264.0, + "75": 5839051264.0, + "76": 5839051264.0, + "77": 5839051264.0, + "78": 5839051264.0, + "79": 5839051264.0, + "80": 5839051264.0, + "81": 5839051264.0, + "82": 5839051264.0, + "83": 5839051264.0, + "84": 5839051264.0, + "85": 5839051264.0, + "86": 5839051264.0, + "87": 5839051264.0, + "88": 5839051264.0, + "89": 5839051264.0, + "90": 5839051264.0, + "91": 5839051264.0, + "92": 5839051264.0, + "93": 5839051264.0, + "94": 5839051264.0, + "95": 5839051264.0, + "96": 5839051264.0, + "97": 5839051264.0, + "98": 5839051264.0, + "99": 5839051264.0, + "100": 5839051264.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 27967320064.0, + "2": 29549088768.0, + "3": 34374254592.0, + "4": 34374254592.0, + "5": 34374254592.0, + "6": 34374254592.0, + "7": 34374254592.0, + "8": 34374254592.0, + "9": 34374254592.0, + "10": 34374254592.0, + "11": 34374254592.0, + "12": 34374254592.0, + "13": 34374254592.0, + "14": 34374254592.0, + "15": 34374254592.0, + "16": 34374254592.0, + "17": 34374254592.0, + "18": 34374254592.0, + "19": 34374254592.0, + "20": 34374254592.0, + "21": 34374254592.0, + "22": 34374254592.0, + "23": 34374254592.0, + "24": 34374254592.0, + "25": 34374254592.0, + "26": 34374254592.0, + "27": 34374254592.0, + "28": 34374254592.0, + "29": 34374254592.0, + "30": 34374254592.0, + "31": 34374254592.0, + "32": 34374254592.0, + "33": 34374254592.0, + "34": 34374254592.0, + "35": 34374254592.0, + "36": 34374254592.0, + "37": 34374254592.0, + "38": 34374254592.0, + "39": 34374254592.0, + "40": 34374254592.0, + "41": 34374254592.0, + "42": 34374254592.0, + "43": 34374254592.0, + "44": 34374254592.0, + "45": 34374254592.0, + "46": 34374254592.0, + "47": 34374254592.0, + "48": 34374254592.0, + "49": 34374254592.0, + "50": 34374254592.0, + "51": 34374254592.0, + "52": 34374254592.0, + "53": 34374254592.0, + "54": 34374254592.0, + "55": 34374254592.0, + "56": 34374254592.0, + "57": 34374254592.0, + "58": 34374254592.0, + "59": 34374254592.0, + "60": 34374254592.0, + "61": 34374254592.0, + "62": 34374254592.0, + "63": 34374254592.0, + "64": 34374254592.0, + "65": 34374254592.0, + "66": 34374254592.0, + "67": 34374254592.0, + "68": 34374254592.0, + "69": 34374254592.0, + "70": 34374254592.0, + "71": 34374254592.0, + "72": 34374254592.0, + "73": 34374254592.0, + "74": 34374254592.0, + "75": 34374254592.0, + "76": 34374254592.0, + "77": 34374254592.0, + "78": 34374254592.0, + "79": 34374254592.0, + "80": 34374254592.0, + "81": 34374254592.0, + "82": 34374254592.0, + "83": 34374254592.0, + "84": 34374254592.0, + "85": 34374254592.0, + "86": 34374254592.0, + "87": 34374254592.0, + "88": 34374254592.0, + "89": 34374254592.0, + "90": 34374254592.0, + "91": 34374254592.0, + "92": 34374254592.0, + "93": 34374254592.0, + "94": 34374254592.0, + "95": 34374254592.0, + "96": 34374254592.0, + "97": 34374254592.0, + "98": 34374254592.0, + "99": 34374254592.0, + "100": 34374254592.0 + } + }, + "global_load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 1.23221, + "2": 1.21833, + "3": 2.02172, + "4": 2.43369, + "5": 2.60261, + "6": 2.56556, + "7": 2.55288, + "8": 2.52408, + "9": 2.50705, + "10": 2.46688, + "11": 2.41257, + "12": 2.50416, + "13": 2.5745, + "14": 2.51508, + "15": 2.54743, + "16": 2.70309, + "17": 2.75911, + "18": 2.60608, + "19": 2.68766, + "20": 2.64369, + "21": 2.62749, + "22": 2.56216, + "23": 2.50329, + "24": 2.40894, + "25": 2.45126, + "26": 2.31637, + "27": 2.37973, + "28": 2.28472, + "29": 2.25889, + "30": 2.28365, + "31": 2.1907, + "32": 2.16625, + "33": 2.21448, + "34": 2.1989, + "35": 2.07223, + "36": 1.99949, + "37": 2.05811, + "38": 2.01918, + "39": 1.9891, + "40": 1.97091, + "41": 2.02078, + "42": 1.90698, + "43": 1.93998, + "44": 1.98793, + "45": 1.90911, + "46": 1.8337, + "47": 1.88192, + "48": 1.82595, + "49": 1.95836, + "50": 1.8042, + "51": 1.78229, + "52": 1.93637, + "53": 1.93758, + "54": 1.84485, + "55": 1.78763, + "56": 1.82531, + "57": 1.77222, + "58": 1.77883, + "59": 1.73946, + "60": 1.68867, + "61": 1.67273, + "62": 1.70716, + "63": 1.71551, + "64": 1.68262, + "65": 1.74674, + "66": 1.7761, + "67": 1.79537, + "68": 1.6955, + "69": 1.71014, + "70": 1.65104, + "71": 1.64652, + "72": 1.69455, + "73": 1.67294, + "74": 1.69744, + "75": 1.6622, + "76": 1.61694, + "77": 1.71765, + "78": 1.60985, + "79": 1.61696, + "80": 1.62036, + "81": 1.61376, + "82": 1.65171, + "83": 1.63251, + "84": 1.62222, + "85": 1.64275, + "86": 1.62568, + "87": 1.63006, + "88": 1.62006, + "89": 1.57217, + "90": 1.67753, + "91": 1.59858, + "92": 1.60317, + "93": 1.56398, + "94": 1.64063, + "95": 1.63116, + "96": 1.66586, + "97": 1.61482, + "98": 1.59708, + "99": 1.61382, + "100": 1.59051 + } + }, + "load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 0.0, + "2": 0.0, + "3": 0.0, + "4": 0.0, + "5": 0.0, + "6": 0.0, + "7": 0.0, + "8": 0.0, + "9": 0.0, + "10": 0.0, + "11": 0.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 0.0, + "17": 0.0, + "18": 0.0, + "19": 0.0, + "20": 0.0, + "21": 0.0, + "22": 0.0, + "23": 0.0, + "24": 0.0, + "25": 0.0, + "26": 0.0, + "27": 0.0, + "28": 0.0, + "29": 0.0, + "30": 0.0, + "31": 0.0, + "32": 0.0, + "33": 0.0, + "34": 0.0, + "35": 0.0, + "36": 0.0, + "37": 0.0, + "38": 0.0, + "39": 0.0, + "40": 0.0, + "41": 0.0, + "42": 0.0, + "43": 0.0, + "44": 0.0, + "45": 0.0, + "46": 0.0, + "47": 0.0, + "48": 0.0, + "49": 0.0, + "50": 0.0, + "51": 0.0, + "52": 0.0, + "53": 0.0, + "54": 0.0, + "55": 0.0, + "56": 0.0, + "57": 0.0, + "58": 0.0, + "59": 0.0, + "60": 0.0, + "61": 0.0, + "62": 0.0, + "63": 0.0, + "64": 0.0, + "65": 0.0, + "66": 0.0, + "67": 0.0, + "68": 0.0, + "69": 0.0, + "70": 0.0, + "71": 0.0, + "72": 0.0, + "73": 0.0, + "74": 0.0, + "75": 0.0, + "76": 0.0, + "77": 0.0, + "78": 0.0, + "79": 0.0, + "80": 0.0, + "81": 0.0, + "82": 0.0, + "83": 0.0, + "84": 0.0, + "85": 0.0, + "86": 0.0, + "87": 0.0, + "88": 0.0, + "89": 0.0, + "90": 0.0, + "91": 0.0, + "92": 0.0, + "93": 0.0, + "94": 0.0, + "95": 0.0, + "96": 0.0, + "97": 0.0, + "98": 0.0, + "99": 0.0, + "100": 0.0 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": "nan", + "2": 24.49051, + "3": 0.60328, + "4": 0.51658, + "5": 0.46003, + "6": 0.53184, + "7": 0.52534, + "8": 0.75563, + "9": 0.39181, + "10": 0.48228, + "11": 0.40426, + "12": 0.46834, + "13": 0.52047, + "14": 0.47337, + "15": 0.39842, + "16": 0.43026, + "17": 0.39154, + "18": 0.37626, + "19": 0.37353, + "20": 0.37468, + "21": 0.3832, + "22": 0.40365, + "23": 0.39088, + "24": 0.42688, + "25": 0.40006, + "26": 0.41278, + "27": 0.41075, + "28": 0.37339, + "29": 0.3821, + "30": 0.37561, + "31": 0.36958, + "32": 0.38534, + "33": 0.39248, + "34": 0.45248, + "35": 0.37998, + "36": 0.38085, + "37": 0.38685, + "38": 0.38019, + "39": 0.37529, + "40": 0.37238, + "41": 0.37826, + "42": 0.37035, + "43": 0.3723, + "44": 0.37646, + "45": 0.36957, + "46": 0.36018, + "47": 0.36161, + "48": 0.3559, + "49": 0.36302, + "50": 0.35136, + "51": 0.35769, + "52": 0.3658, + "53": 0.37425, + "54": 0.35837, + "55": 0.35509, + "56": 0.35071, + "57": 0.34645, + "58": 0.34946, + "59": 0.34264, + "60": 0.34035, + "61": 0.33638, + "62": 0.34462, + "63": 0.33953, + "64": 0.34127, + "65": 0.3511, + "66": 0.34382, + "67": 0.34216, + "68": 0.34256, + "69": 0.34126, + "70": 0.33989, + "71": 0.34282, + "72": 0.34048, + "73": 0.33817, + "74": 0.34408, + "75": 0.33993, + "76": 0.33447, + "77": 0.34721, + "78": 0.33616, + "79": 0.33381, + "80": 0.3358, + "81": 0.33153, + "82": 0.33918, + "83": 0.334, + "84": 0.34074, + "85": 0.33777, + "86": 0.33815, + "87": 0.34001, + "88": 0.33439, + "89": 0.33354, + "90": 0.34861, + "91": 0.33222, + "92": 0.34094, + "93": 0.33184, + "94": 0.3508, + "95": 0.34722, + "96": 0.34498, + "97": 0.33449, + "98": 0.33677, + "99": 0.33501, + "100": 0.3394 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3_tp2pp1ep8_muon/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3_tp2pp1ep8_muon/golden_values_dev_dgx_h100.json new file mode 100644 index 00000000000..4379e3ab633 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3_tp2pp1ep8_muon/golden_values_dev_dgx_h100.json @@ -0,0 +1,751 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.03229, + "2": 11.06283, + "3": 10.61465, + "4": 10.35058, + "5": 10.02796, + "6": 9.6407, + "7": 9.67063, + "8": 9.12835, + "9": 8.90557, + "10": 9.22954, + "11": 8.64606, + "12": 8.61967, + "13": 8.49667, + "14": 7.98042, + "15": 8.05515, + "16": 8.1316, + "17": 8.06076, + "18": 7.75826, + "19": 8.05887, + "20": 7.76047, + "21": 7.48279, + "22": 7.42605, + "23": 7.35389, + "24": 7.24747, + "25": 7.52972, + "26": 6.94352, + "27": 7.39465, + "28": 7.1481, + "29": 7.3092, + "30": 7.33059, + "31": 7.15804, + "32": 7.28647, + "33": 7.39054, + "34": 7.36597, + "35": 6.97988, + "36": 6.80231, + "37": 7.1605, + "38": 6.91909, + "39": 7.26314, + "40": 7.26441, + "41": 7.14477, + "42": 6.96892, + "43": 6.96586, + "44": 7.07861, + "45": 6.76674, + "46": 6.56314, + "47": 6.92743, + "48": 6.71879, + "49": 7.20878, + "50": 6.66097, + "51": 6.70764, + "52": 7.01128, + "53": 6.92433, + "54": 6.89366, + "55": 6.60015, + "56": 6.86226, + "57": 6.66366, + "58": 6.91481, + "59": 6.77251, + "60": 6.30308, + "61": 6.40311, + "62": 6.81115, + "63": 6.85135, + "64": 6.38067, + "65": 6.84315, + "66": 7.00928, + "67": 7.05311, + "68": 6.61688, + "69": 6.57927, + "70": 6.53479, + "71": 6.39603, + "72": 6.6264, + "73": 6.66771, + "74": 6.5325, + "75": 6.61858, + "76": 6.01296, + "77": 6.91237, + "78": 6.47706, + "79": 6.43394, + "80": 6.52704, + "81": 6.3841, + "82": 6.8633, + "83": 6.56361, + "84": 6.51861, + "85": 6.63523, + "86": 6.54406, + "87": 6.6225, + "88": 6.5918, + "89": 6.36046, + "90": 6.63235, + "91": 6.20831, + "92": 6.12779, + "93": 6.23273, + "94": 6.52537, + "95": 6.65411, + "96": 6.79886, + "97": 6.65789, + "98": 6.46253, + "99": 6.58186, + "100": 6.55271 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 38807864.0, + "2": 38549096.0, + "3": 38742532.0, + "4": 37691416.0, + "5": 38945144.0, + "6": 38614696.0, + "7": 38495744.0, + "8": 39035328.0, + "9": 38654512.0, + "10": 39556120.0, + "11": 42486216.0, + "12": 38877840.0, + "13": 69756528.0, + "14": 66617608.0, + "15": 82519424.0, + "16": 104948384.0, + "17": 95267120.0, + "18": 73439376.0, + "19": 98682208.0, + "20": 98985944.0, + "21": 80011184.0, + "22": 66912632.0, + "23": 161833040.0, + "24": 128053976.0, + "25": 210928800.0, + "26": 199353888.0, + "27": 146331456.0, + "28": 208355264.0, + "29": 176420608.0, + "30": 118214272.0, + "31": 202404272.0, + "32": 82862344.0, + "33": 199403472.0, + "34": 79910328.0, + "35": 202913040.0, + "36": 206007424.0, + "37": 182525184.0, + "38": 117654376.0, + "39": 161088848.0, + "40": 154326592.0, + "41": 201973488.0, + "42": 142438912.0, + "43": 189866208.0, + "44": 98133768.0, + "45": 156209616.0, + "46": 133186696.0, + "47": 186177920.0, + "48": 142394480.0, + "49": 122892776.0, + "50": 148918880.0, + "51": 143324928.0, + "52": 148982896.0, + "53": 109892528.0, + "54": 134012464.0, + "55": 109193832.0, + "56": 121621568.0, + "57": 142198960.0, + "58": 130088152.0, + "59": 127775320.0, + "60": 129786496.0, + "61": 130950272.0, + "62": 124104840.0, + "63": 114237008.0, + "64": 130563920.0, + "65": 123769296.0, + "66": 101592440.0, + "67": 119254416.0, + "68": 123327240.0, + "69": 72779760.0, + "70": 91415536.0, + "71": 92920872.0, + "72": 107120704.0, + "73": 91764360.0, + "74": 101981632.0, + "75": 104961616.0, + "76": 109452032.0, + "77": 110120936.0, + "78": 95444648.0, + "79": 117342896.0, + "80": 108533840.0, + "81": 119555248.0, + "82": 104730176.0, + "83": 101393232.0, + "84": 98212752.0, + "85": 82548360.0, + "86": 101298960.0, + "87": 123967712.0, + "88": 92881840.0, + "89": 124031552.0, + "90": 116779128.0, + "91": 113603824.0, + "92": 81027928.0, + "93": 96665040.0, + "94": 98345816.0, + "95": 104923216.0, + "96": 106642704.0, + "97": 104828656.0, + "98": 123774640.0, + "99": 92280208.0, + "100": 61159024.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 5164616704.0, + "2": 5164464128.0, + "3": 5164846592.0, + "4": 5164012544.0, + "5": 5163921408.0, + "6": 5164862464.0, + "7": 5164427264.0, + "8": 5164945920.0, + "9": 5164630016.0, + "10": 5165398528.0, + "11": 5166358016.0, + "12": 5166326784.0, + "13": 5167418368.0, + "14": 5167156224.0, + "15": 5167451648.0, + "16": 5167816192.0, + "17": 5167300608.0, + "18": 5167399936.0, + "19": 5167505920.0, + "20": 5167549952.0, + "21": 5167638016.0, + "22": 5168112128.0, + "23": 5167609344.0, + "24": 5167376384.0, + "25": 5167323648.0, + "26": 5167346176.0, + "27": 5167627776.0, + "28": 5167067136.0, + "29": 5167097856.0, + "30": 5167289344.0, + "31": 5167456256.0, + "32": 5167542272.0, + "33": 5167672320.0, + "34": 5167719936.0, + "35": 5168186368.0, + "36": 5168129536.0, + "37": 5168017920.0, + "38": 5168032768.0, + "39": 5167564800.0, + "40": 5167377920.0, + "41": 5167475200.0, + "42": 5167211520.0, + "43": 5167788032.0, + "44": 5167472640.0, + "45": 5166945280.0, + "46": 5167180800.0, + "47": 5167170560.0, + "48": 5167196672.0, + "49": 5166870016.0, + "50": 5167164928.0, + "51": 5166551552.0, + "52": 5166326784.0, + "53": 5166607360.0, + "54": 5166118912.0, + "55": 5166547968.0, + "56": 5166593024.0, + "57": 5166532608.0, + "58": 5166304256.0, + "59": 5166757888.0, + "60": 5166023680.0, + "61": 5166556160.0, + "62": 5166401024.0, + "63": 5166593536.0, + "64": 5165926912.0, + "65": 5166305792.0, + "66": 5166389760.0, + "67": 5166265856.0, + "68": 5165746176.0, + "69": 5166175232.0, + "70": 5166344192.0, + "71": 5166054400.0, + "72": 5166317056.0, + "73": 5166567936.0, + "74": 5166349312.0, + "75": 5166853120.0, + "76": 5166771712.0, + "77": 5166111744.0, + "78": 5166678528.0, + "79": 5166682624.0, + "80": 5166760448.0, + "81": 5166550016.0, + "82": 5166609920.0, + "83": 5166603264.0, + "84": 5166662656.0, + "85": 5166124544.0, + "86": 5166369792.0, + "87": 5166479360.0, + "88": 5166396416.0, + "89": 5166067200.0, + "90": 5166131200.0, + "91": 5166662144.0, + "92": 5166668288.0, + "93": 5166449152.0, + "94": 5166358528.0, + "95": 5166448640.0, + "96": 5166174208.0, + "97": 5166289920.0, + "98": 5166466048.0, + "99": 5166997504.0, + "100": 5167059968.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 15786969088.0, + "2": 17799479296.0, + "3": 17799479296.0, + "4": 17799479296.0, + "5": 17799479296.0, + "6": 17799479296.0, + "7": 17799479296.0, + "8": 17799479296.0, + "9": 18026358784.0, + "10": 18026358784.0, + "11": 18773534720.0, + "12": 18925803520.0, + "13": 19718295552.0, + "14": 19718295552.0, + "15": 19718295552.0, + "16": 20017494016.0, + "17": 20039491584.0, + "18": 20039491584.0, + "19": 20039491584.0, + "20": 20092014592.0, + "21": 20092014592.0, + "22": 20271822848.0, + "23": 20271822848.0, + "24": 20271822848.0, + "25": 20271822848.0, + "26": 20271822848.0, + "27": 20271822848.0, + "28": 20271822848.0, + "29": 20271822848.0, + "30": 20271822848.0, + "31": 20271822848.0, + "32": 20271822848.0, + "33": 20271822848.0, + "34": 20271822848.0, + "35": 20356554752.0, + "36": 20356554752.0, + "37": 20356554752.0, + "38": 20356554752.0, + "39": 20356554752.0, + "40": 20356554752.0, + "41": 20356554752.0, + "42": 20356554752.0, + "43": 20356554752.0, + "44": 20356554752.0, + "45": 20356554752.0, + "46": 20356554752.0, + "47": 20356554752.0, + "48": 20356554752.0, + "49": 20356554752.0, + "50": 20356554752.0, + "51": 20356554752.0, + "52": 20356554752.0, + "53": 20356554752.0, + "54": 20356554752.0, + "55": 20356554752.0, + "56": 20356554752.0, + "57": 20356554752.0, + "58": 20356554752.0, + "59": 20356554752.0, + "60": 20356554752.0, + "61": 20356554752.0, + "62": 20356554752.0, + "63": 20356554752.0, + "64": 20356554752.0, + "65": 20356554752.0, + "66": 20356554752.0, + "67": 20356554752.0, + "68": 20356554752.0, + "69": 20356554752.0, + "70": 20356554752.0, + "71": 20356554752.0, + "72": 20356554752.0, + "73": 20356554752.0, + "74": 20356554752.0, + "75": 20356554752.0, + "76": 20356554752.0, + "77": 20356554752.0, + "78": 20356554752.0, + "79": 20356554752.0, + "80": 20356554752.0, + "81": 20356554752.0, + "82": 20356554752.0, + "83": 20356554752.0, + "84": 20356554752.0, + "85": 20356554752.0, + "86": 20356554752.0, + "87": 20356554752.0, + "88": 20356554752.0, + "89": 20356554752.0, + "90": 20356554752.0, + "91": 20356554752.0, + "92": 20356554752.0, + "93": 20356554752.0, + "94": 20356554752.0, + "95": 20356554752.0, + "96": 20356554752.0, + "97": 20356554752.0, + "98": 20356554752.0, + "99": 20356554752.0, + "100": 20356554752.0 + } + }, + "global_load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 1.29383, + "2": 1.23727, + "3": 1.34885, + "4": 1.34974, + "5": 1.43339, + "6": 1.50301, + "7": 1.58863, + "8": 1.71828, + "9": 1.74586, + "10": 1.73649, + "11": 1.83227, + "12": 1.92535, + "13": 2.02851, + "14": 1.98736, + "15": 2.04125, + "16": 2.10075, + "17": 2.10469, + "18": 2.1035, + "19": 2.15461, + "20": 2.06595, + "21": 2.11022, + "22": 2.10878, + "23": 2.12894, + "24": 2.07103, + "25": 2.10206, + "26": 2.07125, + "27": 2.02041, + "28": 2.07235, + "29": 2.06028, + "30": 1.97928, + "31": 2.00988, + "32": 1.95824, + "33": 2.05732, + "34": 1.96999, + "35": 1.98177, + "36": 1.92454, + "37": 1.96509, + "38": 1.93897, + "39": 1.92825, + "40": 1.91927, + "41": 1.91601, + "42": 1.87599, + "43": 1.9061, + "44": 1.85799, + "45": 1.79277, + "46": 1.79497, + "47": 1.83411, + "48": 1.80874, + "49": 1.87289, + "50": 1.82218, + "51": 1.80711, + "52": 1.86665, + "53": 1.84554, + "54": 1.80715, + "55": 1.79256, + "56": 1.77444, + "57": 1.79768, + "58": 1.8485, + "59": 1.85653, + "60": 1.82666, + "61": 1.83345, + "62": 1.82621, + "63": 1.82356, + "64": 1.84148, + "65": 1.88432, + "66": 1.8594, + "67": 1.89422, + "68": 1.86367, + "69": 1.87048, + "70": 1.82569, + "71": 1.79379, + "72": 1.8637, + "73": 1.83795, + "74": 1.8236, + "75": 1.83368, + "76": 1.79774, + "77": 1.85594, + "78": 1.81214, + "79": 1.80976, + "80": 1.80911, + "81": 1.79744, + "82": 1.82821, + "83": 1.81447, + "84": 1.8023, + "85": 1.80103, + "86": 1.8226, + "87": 1.82098, + "88": 1.79714, + "89": 1.78771, + "90": 1.83842, + "91": 1.79982, + "92": 1.78345, + "93": 1.72786, + "94": 1.78795, + "95": 1.7831, + "96": 1.80042, + "97": 1.79266, + "98": 1.80145, + "99": 1.7983, + "100": 1.7744 + } + }, + "load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 0.0, + "2": 0.0, + "3": 0.0, + "4": 0.0, + "5": 0.0, + "6": 0.0, + "7": 0.0, + "8": 0.0, + "9": 0.0, + "10": 0.0, + "11": 0.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 0.0, + "17": 0.0, + "18": 0.0, + "19": 0.0, + "20": 0.0, + "21": 0.0, + "22": 0.0, + "23": 0.0, + "24": 0.0, + "25": 0.0, + "26": 0.0, + "27": 0.0, + "28": 0.0, + "29": 0.0, + "30": 0.0, + "31": 0.0, + "32": 0.0, + "33": 0.0, + "34": 0.0, + "35": 0.0, + "36": 0.0, + "37": 0.0, + "38": 0.0, + "39": 0.0, + "40": 0.0, + "41": 0.0, + "42": 0.0, + "43": 0.0, + "44": 0.0, + "45": 0.0, + "46": 0.0, + "47": 0.0, + "48": 0.0, + "49": 0.0, + "50": 0.0, + "51": 0.0, + "52": 0.0, + "53": 0.0, + "54": 0.0, + "55": 0.0, + "56": 0.0, + "57": 0.0, + "58": 0.0, + "59": 0.0, + "60": 0.0, + "61": 0.0, + "62": 0.0, + "63": 0.0, + "64": 0.0, + "65": 0.0, + "66": 0.0, + "67": 0.0, + "68": 0.0, + "69": 0.0, + "70": 0.0, + "71": 0.0, + "72": 0.0, + "73": 0.0, + "74": 0.0, + "75": 0.0, + "76": 0.0, + "77": 0.0, + "78": 0.0, + "79": 0.0, + "80": 0.0, + "81": 0.0, + "82": 0.0, + "83": 0.0, + "84": 0.0, + "85": 0.0, + "86": 0.0, + "87": 0.0, + "88": 0.0, + "89": 0.0, + "90": 0.0, + "91": 0.0, + "92": 0.0, + "93": 0.0, + "94": 0.0, + "95": 0.0, + "96": 0.0, + "97": 0.0, + "98": 0.0, + "99": 0.0, + "100": 0.0 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": "nan", + "2": 18.82948, + "3": 0.88454, + "4": 0.83792, + "5": 0.79224, + "6": 0.79523, + "7": 0.77715, + "8": 0.77485, + "9": 0.7773, + "10": 0.77808, + "11": 0.77456, + "12": 0.7663, + "13": 0.76712, + "14": 0.74243, + "15": 0.73631, + "16": 0.74735, + "17": 0.74134, + "18": 0.73863, + "19": 0.74082, + "20": 0.75044, + "21": 0.74055, + "22": 0.7457, + "23": 1.60741, + "24": 0.73754, + "25": 0.73379, + "26": 0.72834, + "27": 0.73434, + "28": 0.74395, + "29": 0.81175, + "30": 0.74173, + "31": 1.75124, + "32": 0.74549, + "33": 0.75109, + "34": 0.7494, + "35": 1.57526, + "36": 0.74733, + "37": 0.74901, + "38": 0.75057, + "39": 0.74498, + "40": 0.75196, + "41": 0.75121, + "42": 0.74605, + "43": 0.75233, + "44": 0.74708, + "45": 0.74818, + "46": 0.75043, + "47": 0.78099, + "48": 0.7604, + "49": 0.74654, + "50": 0.74129, + "51": 0.74531, + "52": 0.74307, + "53": 0.74272, + "54": 0.74453, + "55": 0.74161, + "56": 0.75837, + "57": 0.75129, + "58": 0.74905, + "59": 0.76391, + "60": 0.75203, + "61": 0.74294, + "62": 0.7483, + "63": 0.74823, + "64": 0.75303, + "65": 0.75264, + "66": 0.75207, + "67": 0.74505, + "68": 0.72392, + "69": 0.72863, + "70": 0.72567, + "71": 0.73153, + "72": 0.73359, + "73": 0.72391, + "74": 0.7228, + "75": 0.72724, + "76": 0.72369, + "77": 0.73478, + "78": 0.73666, + "79": 0.72791, + "80": 0.72578, + "81": 0.72374, + "82": 0.72116, + "83": 0.72222, + "84": 0.7255, + "85": 0.72474, + "86": 0.72255, + "87": 0.72602, + "88": 0.72631, + "89": 0.72515, + "90": 0.71978, + "91": 0.72636, + "92": 0.7222, + "93": 0.72206, + "94": 0.72373, + "95": 0.72553, + "96": 0.72616, + "97": 0.72461, + "98": 0.72144, + "99": 0.72119, + "100": 0.72135 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3_tp2pp2ep4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3_tp2pp2ep4/golden_values_dev_dgx_h100.json new file mode 100644 index 00000000000..2765bfe10a5 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3_tp2pp2ep4/golden_values_dev_dgx_h100.json @@ -0,0 +1,751 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 10.98969, + "2": 10.94269, + "3": 9.44133, + "4": 9.57178, + "5": 10.12784, + "6": 9.13045, + "7": 9.29422, + "8": 8.97693, + "9": 8.78194, + "10": 8.97218, + "11": 8.46014, + "12": 8.50026, + "13": 8.37284, + "14": 7.88799, + "15": 7.98037, + "16": 7.98804, + "17": 7.97945, + "18": 7.66161, + "19": 8.01302, + "20": 7.73591, + "21": 7.43185, + "22": 7.43672, + "23": 7.27938, + "24": 7.27298, + "25": 7.54592, + "26": 6.94757, + "27": 7.46341, + "28": 7.21482, + "29": 7.37474, + "30": 7.47644, + "31": 7.25958, + "32": 7.45259, + "33": 7.50292, + "34": 7.53704, + "35": 7.08282, + "36": 6.94489, + "37": 7.30197, + "38": 7.08429, + "39": 7.41717, + "40": 7.43819, + "41": 7.35677, + "42": 7.13377, + "43": 7.11154, + "44": 7.27338, + "45": 7.02137, + "46": 6.79717, + "47": 7.11416, + "48": 6.94613, + "49": 7.43798, + "50": 6.85213, + "51": 6.93255, + "52": 7.27021, + "53": 7.23162, + "54": 7.12688, + "55": 6.82526, + "56": 7.20027, + "57": 6.90069, + "58": 7.17655, + "59": 7.08175, + "60": 6.51156, + "61": 6.72127, + "62": 7.14205, + "63": 7.20045, + "64": 6.63384, + "65": 7.15774, + "66": 7.3498, + "67": 7.29959, + "68": 6.88081, + "69": 6.85124, + "70": 6.78012, + "71": 6.74865, + "72": 6.89667, + "73": 6.92164, + "74": 6.89531, + "75": 7.08132, + "76": 6.33078, + "77": 7.22606, + "78": 6.78546, + "79": 6.69139, + "80": 6.85076, + "81": 6.64662, + "82": 7.14356, + "83": 6.83037, + "84": 6.78413, + "85": 6.99803, + "86": 6.84845, + "87": 6.95273, + "88": 6.86133, + "89": 6.71087, + "90": 6.8915, + "91": 6.51576, + "92": 6.48661, + "93": 6.59179, + "94": 6.84333, + "95": 6.96859, + "96": 7.16109, + "97": 6.99177, + "98": 6.80047, + "99": 6.88787, + "100": 6.85856 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 38807996.0, + "2": 38548956.0, + "3": 38743192.0, + "4": 72314416.0, + "5": 230903200.0, + "6": 453912608.0, + "7": 617362368.0, + "8": 863241536.0, + "9": 724465536.0, + "10": 747396096.0, + "11": 788098304.0, + "12": 721510720.0, + "13": 661215936.0, + "14": 642278976.0, + "15": 620454528.0, + "16": 680608896.0, + "17": 708652352.0, + "18": 680564928.0, + "19": 677483648.0, + "20": 734426176.0, + "21": 665113216.0, + "22": 639460480.0, + "23": 627401536.0, + "24": 710011840.0, + "25": 692181120.0, + "26": 633419648.0, + "27": 621328256.0, + "28": 560678400.0, + "29": 638795264.0, + "30": 634098688.0, + "31": 652243776.0, + "32": 611337216.0, + "33": 617741504.0, + "34": 586369792.0, + "35": 574091520.0, + "36": 507983424.0, + "37": 490786208.0, + "38": 520283936.0, + "39": 557440384.0, + "40": 516094368.0, + "41": 488221888.0, + "42": 428712160.0, + "43": 677436800.0, + "44": 447334272.0, + "45": 388982880.0, + "46": 457177920.0, + "47": 481867904.0, + "48": 419218112.0, + "49": 358829312.0, + "50": 535818016.0, + "51": 338336448.0, + "52": 554760896.0, + "53": 377248256.0, + "54": 423433536.0, + "55": 398606464.0, + "56": 335533472.0, + "57": 519664160.0, + "58": 303094976.0, + "59": 313367072.0, + "60": 510399232.0, + "61": 344847200.0, + "62": 287684416.0, + "63": 450825888.0, + "64": 423116256.0, + "65": 413146976.0, + "66": 406713728.0, + "67": 251372640.0, + "68": 274330048.0, + "69": 264674160.0, + "70": 261273664.0, + "71": 218730784.0, + "72": 390213216.0, + "73": 211289440.0, + "74": 218397184.0, + "75": 148978400.0, + "76": 175512480.0, + "77": 173040928.0, + "78": 331370304.0, + "79": 365834688.0, + "80": 350744864.0, + "81": 333470752.0, + "82": 343783200.0, + "83": 173743504.0, + "84": 173699264.0, + "85": 195813312.0, + "86": 148479104.0, + "87": 205779712.0, + "88": 303626784.0, + "89": 152330768.0, + "90": 327528096.0, + "91": 151356896.0, + "92": 194280720.0, + "93": 253955312.0, + "94": 186443536.0, + "95": 299933184.0, + "96": 304810208.0, + "97": 183472368.0, + "98": 167803104.0, + "99": 290446144.0, + "100": 284504544.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 4852344832.0, + "2": 4854810112.0, + "3": 4846593536.0, + "4": 4842550272.0, + "5": 4845446144.0, + "6": 4852108800.0, + "7": 4853625344.0, + "8": 4850128896.0, + "9": 4849647616.0, + "10": 4848303104.0, + "11": 4859386880.0, + "12": 4860119552.0, + "13": 4859424256.0, + "14": 4860503552.0, + "15": 4858298880.0, + "16": 4859518464.0, + "17": 4858603520.0, + "18": 4855634944.0, + "19": 4855728128.0, + "20": 4855499264.0, + "21": 4855516672.0, + "22": 4856185344.0, + "23": 4854414336.0, + "24": 4858136064.0, + "25": 4861625856.0, + "26": 4855058944.0, + "27": 4855199744.0, + "28": 4854846976.0, + "29": 4855630848.0, + "30": 4854733824.0, + "31": 4856314880.0, + "32": 4858267136.0, + "33": 4857998336.0, + "34": 4857282560.0, + "35": 4856235008.0, + "36": 4856565248.0, + "37": 4855668736.0, + "38": 4855953408.0, + "39": 4855832064.0, + "40": 4856911872.0, + "41": 4854017024.0, + "42": 4854135296.0, + "43": 4853044224.0, + "44": 4852092416.0, + "45": 4852727808.0, + "46": 4853379584.0, + "47": 4854127616.0, + "48": 4853984768.0, + "49": 4854060032.0, + "50": 4854696448.0, + "51": 4855526400.0, + "52": 4855796224.0, + "53": 4856049152.0, + "54": 4854604288.0, + "55": 4853985792.0, + "56": 4853196288.0, + "57": 4852560384.0, + "58": 4852393984.0, + "59": 4851395072.0, + "60": 4851994624.0, + "61": 4852775936.0, + "62": 4852408832.0, + "63": 4852465664.0, + "64": 4852731392.0, + "65": 4853173248.0, + "66": 4852724224.0, + "67": 4853165056.0, + "68": 4853029888.0, + "69": 4852538880.0, + "70": 4854194688.0, + "71": 4853723648.0, + "72": 4853012992.0, + "73": 4852541440.0, + "74": 4852973056.0, + "75": 4853424128.0, + "76": 4852229120.0, + "77": 4852233216.0, + "78": 4852518400.0, + "79": 4854306816.0, + "80": 4854050304.0, + "81": 4853029888.0, + "82": 4853646848.0, + "83": 4852160000.0, + "84": 4852318720.0, + "85": 4852516352.0, + "86": 4851066880.0, + "87": 4852100096.0, + "88": 4851771392.0, + "89": 4852435456.0, + "90": 4851024896.0, + "91": 4852230656.0, + "92": 4852848128.0, + "93": 4853720064.0, + "94": 4851213312.0, + "95": 4852602368.0, + "96": 4850690048.0, + "97": 4851885568.0, + "98": 4853395968.0, + "99": 4852758528.0, + "100": 4851643904.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 13116608512.0, + "2": 15313514496.0, + "3": 15313514496.0, + "4": 15313514496.0, + "5": 15313514496.0, + "6": 15384104960.0, + "7": 15384104960.0, + "8": 15384104960.0, + "9": 15384104960.0, + "10": 15384104960.0, + "11": 15384104960.0, + "12": 15384104960.0, + "13": 15384104960.0, + "14": 15384104960.0, + "15": 15384104960.0, + "16": 15384104960.0, + "17": 15384104960.0, + "18": 15384104960.0, + "19": 15384104960.0, + "20": 15384104960.0, + "21": 15455662080.0, + "22": 15539045376.0, + "23": 15539045376.0, + "24": 15656714240.0, + "25": 15686926336.0, + "26": 15686926336.0, + "27": 15686926336.0, + "28": 15686926336.0, + "29": 15686926336.0, + "30": 15686926336.0, + "31": 15686926336.0, + "32": 15686926336.0, + "33": 15686926336.0, + "34": 15686926336.0, + "35": 15686926336.0, + "36": 15686926336.0, + "37": 15686926336.0, + "38": 15686926336.0, + "39": 15686926336.0, + "40": 15686926336.0, + "41": 15686926336.0, + "42": 15686926336.0, + "43": 15686926336.0, + "44": 15686926336.0, + "45": 15686926336.0, + "46": 15686926336.0, + "47": 15686926336.0, + "48": 15686926336.0, + "49": 15686926336.0, + "50": 15686926336.0, + "51": 15686926336.0, + "52": 15686926336.0, + "53": 15686926336.0, + "54": 15686926336.0, + "55": 15686926336.0, + "56": 15686926336.0, + "57": 15686926336.0, + "58": 15686926336.0, + "59": 15686926336.0, + "60": 15686926336.0, + "61": 15686926336.0, + "62": 15686926336.0, + "63": 15686926336.0, + "64": 15686926336.0, + "65": 15686926336.0, + "66": 15686926336.0, + "67": 15686926336.0, + "68": 15686926336.0, + "69": 15686926336.0, + "70": 15686926336.0, + "71": 15686926336.0, + "72": 15686926336.0, + "73": 15686926336.0, + "74": 15686926336.0, + "75": 15686926336.0, + "76": 15686926336.0, + "77": 15686926336.0, + "78": 15686926336.0, + "79": 15686926336.0, + "80": 15686926336.0, + "81": 15686926336.0, + "82": 15686926336.0, + "83": 15686926336.0, + "84": 15686926336.0, + "85": 15686926336.0, + "86": 15686926336.0, + "87": 15686926336.0, + "88": 15686926336.0, + "89": 15686926336.0, + "90": 15686926336.0, + "91": 15686926336.0, + "92": 15686926336.0, + "93": 15686926336.0, + "94": 15686926336.0, + "95": 15686926336.0, + "96": 15686926336.0, + "97": 15686926336.0, + "98": 15686926336.0, + "99": 15686926336.0, + "100": 15686926336.0 + } + }, + "global_load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 1.30659, + "2": 1.23825, + "3": 1.92264, + "4": 2.26281, + "5": 2.20701, + "6": 2.27286, + "7": 2.35471, + "8": 2.34917, + "9": 2.28131, + "10": 2.22271, + "11": 2.16957, + "12": 2.19339, + "13": 2.32807, + "14": 2.2083, + "15": 2.19834, + "16": 2.2169, + "17": 2.16342, + "18": 2.20933, + "19": 2.37549, + "20": 2.23906, + "21": 2.1217, + "22": 2.16764, + "23": 2.20853, + "24": 2.23188, + "25": 2.27194, + "26": 2.09907, + "27": 2.14213, + "28": 2.09686, + "29": 2.07056, + "30": 2.10211, + "31": 2.04305, + "32": 1.95949, + "33": 1.99475, + "34": 1.97923, + "35": 1.88299, + "36": 1.84523, + "37": 1.9701, + "38": 1.89809, + "39": 1.92065, + "40": 1.88646, + "41": 1.87479, + "42": 1.79108, + "43": 1.89979, + "44": 1.93567, + "45": 1.81407, + "46": 1.79741, + "47": 1.82696, + "48": 1.82666, + "49": 1.87444, + "50": 1.74835, + "51": 1.73183, + "52": 1.83296, + "53": 1.84816, + "54": 1.81394, + "55": 1.75792, + "56": 1.79542, + "57": 1.69751, + "58": 1.7674, + "59": 1.78141, + "60": 1.65251, + "61": 1.68415, + "62": 1.71256, + "63": 1.71284, + "64": 1.68877, + "65": 1.7441, + "66": 1.75705, + "67": 1.75852, + "68": 1.66505, + "69": 1.68301, + "70": 1.62912, + "71": 1.63486, + "72": 1.70783, + "73": 1.67381, + "74": 1.65754, + "75": 1.59372, + "76": 1.59904, + "77": 1.67915, + "78": 1.62981, + "79": 1.63071, + "80": 1.66897, + "81": 1.64097, + "82": 1.6727, + "83": 1.63509, + "84": 1.62912, + "85": 1.6456, + "86": 1.66178, + "87": 1.66347, + "88": 1.68663, + "89": 1.62841, + "90": 1.68005, + "91": 1.62227, + "92": 1.64996, + "93": 1.61288, + "94": 1.65477, + "95": 1.65198, + "96": 1.66817, + "97": 1.66652, + "98": 1.64205, + "99": 1.64645, + "100": 1.65243 + } + }, + "load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 0.0, + "2": 0.0, + "3": 0.0, + "4": 0.0, + "5": 0.0, + "6": 0.0, + "7": 0.0, + "8": 0.0, + "9": 0.0, + "10": 0.0, + "11": 0.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 0.0, + "17": 0.0, + "18": 0.0, + "19": 0.0, + "20": 0.0, + "21": 0.0, + "22": 0.0, + "23": 0.0, + "24": 0.0, + "25": 0.0, + "26": 0.0, + "27": 0.0, + "28": 0.0, + "29": 0.0, + "30": 0.0, + "31": 0.0, + "32": 0.0, + "33": 0.0, + "34": 0.0, + "35": 0.0, + "36": 0.0, + "37": 0.0, + "38": 0.0, + "39": 0.0, + "40": 0.0, + "41": 0.0, + "42": 0.0, + "43": 0.0, + "44": 0.0, + "45": 0.0, + "46": 0.0, + "47": 0.0, + "48": 0.0, + "49": 0.0, + "50": 0.0, + "51": 0.0, + "52": 0.0, + "53": 0.0, + "54": 0.0, + "55": 0.0, + "56": 0.0, + "57": 0.0, + "58": 0.0, + "59": 0.0, + "60": 0.0, + "61": 0.0, + "62": 0.0, + "63": 0.0, + "64": 0.0, + "65": 0.0, + "66": 0.0, + "67": 0.0, + "68": 0.0, + "69": 0.0, + "70": 0.0, + "71": 0.0, + "72": 0.0, + "73": 0.0, + "74": 0.0, + "75": 0.0, + "76": 0.0, + "77": 0.0, + "78": 0.0, + "79": 0.0, + "80": 0.0, + "81": 0.0, + "82": 0.0, + "83": 0.0, + "84": 0.0, + "85": 0.0, + "86": 0.0, + "87": 0.0, + "88": 0.0, + "89": 0.0, + "90": 0.0, + "91": 0.0, + "92": 0.0, + "93": 0.0, + "94": 0.0, + "95": 0.0, + "96": 0.0, + "97": 0.0, + "98": 0.0, + "99": 0.0, + "100": 0.0 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": "nan", + "2": 28.05964, + "3": 0.73239, + "4": 0.71372, + "5": 0.60185, + "6": 0.6044, + "7": 19.24934, + "8": 0.88688, + "9": 0.57275, + "10": 0.58585, + "11": 0.68705, + "12": 0.57317, + "13": 0.5788, + "14": 0.57326, + "15": 0.57145, + "16": 0.60211, + "17": 0.57496, + "18": 0.57422, + "19": 0.58286, + "20": 0.58775, + "21": 0.58188, + "22": 0.58286, + "23": 0.6801, + "24": 0.57652, + "25": 0.57641, + "26": 0.57536, + "27": 0.59959, + "28": 0.57587, + "29": 0.57226, + "30": 0.59282, + "31": 0.57196, + "32": 0.57349, + "33": 0.57849, + "34": 0.59212, + "35": 0.59374, + "36": 0.57161, + "37": 0.58128, + "38": 0.59094, + "39": 0.61982, + "40": 0.56764, + "41": 0.56721, + "42": 0.56878, + "43": 0.5723, + "44": 0.56957, + "45": 0.57284, + "46": 0.57033, + "47": 0.56876, + "48": 0.56977, + "49": 0.57062, + "50": 0.56956, + "51": 0.56854, + "52": 0.57398, + "53": 0.5778, + "54": 0.5756, + "55": 0.56782, + "56": 0.57246, + "57": 0.5672, + "58": 0.57056, + "59": 0.56724, + "60": 0.57057, + "61": 0.56656, + "62": 0.56714, + "63": 0.56537, + "64": 0.56489, + "65": 0.56882, + "66": 0.56477, + "67": 0.56628, + "68": 0.56225, + "69": 0.56344, + "70": 0.56201, + "71": 0.5629, + "72": 0.56199, + "73": 0.56055, + "74": 0.56043, + "75": 0.56397, + "76": 0.55974, + "77": 0.5589, + "78": 0.55934, + "79": 0.55999, + "80": 0.56177, + "81": 0.55965, + "82": 0.55884, + "83": 0.55891, + "84": 0.55584, + "85": 0.55777, + "86": 0.55919, + "87": 0.55988, + "88": 0.56165, + "89": 0.55698, + "90": 0.57482, + "91": 0.56254, + "92": 0.55836, + "93": 0.5577, + "94": 0.55486, + "95": 0.55772, + "96": 0.5595, + "97": 0.5595, + "98": 0.56279, + "99": 0.55811, + "100": 0.56017 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3_tp2pp2ep4_cp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3_tp2pp2ep4_cp2/golden_values_dev_dgx_h100.json new file mode 100644 index 00000000000..bfe032e1282 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3_tp2pp2ep4_cp2/golden_values_dev_dgx_h100.json @@ -0,0 +1,751 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.03702, + "2": 10.96921, + "3": 9.88284, + "4": 9.52814, + "5": 9.52692, + "6": 9.26542, + "7": 9.30752, + "8": 8.81748, + "9": 8.74011, + "10": 9.01767, + "11": 8.48464, + "12": 8.5019, + "13": 8.37453, + "14": 7.85779, + "15": 7.9947, + "16": 8.04378, + "17": 8.03637, + "18": 7.71921, + "19": 8.05882, + "20": 7.85385, + "21": 7.51644, + "22": 7.47508, + "23": 7.35378, + "24": 7.33721, + "25": 7.57724, + "26": 7.00761, + "27": 7.51857, + "28": 7.23755, + "29": 7.3992, + "30": 7.52023, + "31": 7.27913, + "32": 7.47126, + "33": 7.52391, + "34": 7.56408, + "35": 7.10932, + "36": 6.96096, + "37": 7.31643, + "38": 7.09039, + "39": 7.43573, + "40": 7.45722, + "41": 7.37377, + "42": 7.15269, + "43": 7.13713, + "44": 7.29701, + "45": 7.04688, + "46": 6.81205, + "47": 7.14682, + "48": 6.97444, + "49": 7.4633, + "50": 6.87356, + "51": 6.96184, + "52": 7.30159, + "53": 7.2457, + "54": 7.15012, + "55": 6.85302, + "56": 7.23636, + "57": 6.93077, + "58": 7.21098, + "59": 7.11821, + "60": 6.53048, + "61": 6.74687, + "62": 7.18185, + "63": 7.24548, + "64": 6.65799, + "65": 7.19185, + "66": 7.3785, + "67": 7.32088, + "68": 6.90537, + "69": 6.87422, + "70": 6.80272, + "71": 6.78977, + "72": 6.91801, + "73": 6.97544, + "74": 6.94213, + "75": 6.89732, + "76": 6.35276, + "77": 7.24434, + "78": 6.81078, + "79": 6.71668, + "80": 6.86923, + "81": 6.66659, + "82": 7.16919, + "83": 6.84387, + "84": 6.80675, + "85": 7.00873, + "86": 6.85457, + "87": 6.96838, + "88": 6.89851, + "89": 6.68854, + "90": 6.89592, + "91": 6.52062, + "92": 6.50983, + "93": 6.61302, + "94": 6.85549, + "95": 6.98556, + "96": 7.1719, + "97": 7.003, + "98": 6.81352, + "99": 6.89774, + "100": 6.87292 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 38807948.0, + "2": 38548840.0, + "3": 38745016.0, + "4": 81743032.0, + "5": 237158928.0, + "6": 365802016.0, + "7": 595377280.0, + "8": 794047616.0, + "9": 683558656.0, + "10": 630999552.0, + "11": 690560512.0, + "12": 589413824.0, + "13": 680059328.0, + "14": 658018176.0, + "15": 683348224.0, + "16": 658580416.0, + "17": 759000704.0, + "18": 743476800.0, + "19": 649203072.0, + "20": 677803328.0, + "21": 677698304.0, + "22": 680347136.0, + "23": 740635072.0, + "24": 703721472.0, + "25": 726781952.0, + "26": 683747072.0, + "27": 580467840.0, + "28": 686460032.0, + "29": 635658368.0, + "30": 615239296.0, + "31": 598780416.0, + "32": 579897856.0, + "33": 551726464.0, + "34": 488852928.0, + "35": 614979200.0, + "36": 548871488.0, + "37": 531666176.0, + "38": 539160384.0, + "39": 717852672.0, + "40": 453161056.0, + "41": 444177952.0, + "42": 422384544.0, + "43": 447803488.0, + "44": 629739712.0, + "45": 426750784.0, + "46": 400574368.0, + "47": 324592896.0, + "48": 331122816.0, + "49": 245569184.0, + "50": 252710288.0, + "51": 275424512.0, + "52": 321987456.0, + "53": 251434896.0, + "54": 426554080.0, + "55": 319952928.0, + "56": 307228928.0, + "57": 220823408.0, + "58": 227602912.0, + "59": 244147328.0, + "60": 400305984.0, + "61": 250462928.0, + "62": 353749536.0, + "63": 196032160.0, + "64": 231230864.0, + "65": 211820928.0, + "66": 287183008.0, + "67": 320558400.0, + "68": 186231744.0, + "69": 167155056.0, + "70": 176336384.0, + "71": 171557392.0, + "72": 261253376.0, + "73": 173534624.0, + "74": 171186608.0, + "75": 199323376.0, + "76": 241560512.0, + "77": 217064608.0, + "78": 214971584.0, + "79": 142496336.0, + "80": 199752128.0, + "81": 132144936.0, + "82": 199072720.0, + "83": 145432592.0, + "84": 179987440.0, + "85": 117152080.0, + "86": 94998264.0, + "87": 168012880.0, + "88": 180941536.0, + "89": 164934528.0, + "90": 119915248.0, + "91": 110451872.0, + "92": 99884056.0, + "93": 169001664.0, + "94": 114064608.0, + "95": 89186104.0, + "96": 97201344.0, + "97": 164577472.0, + "98": 101734720.0, + "99": 92270848.0, + "100": 98906016.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 5902307328.0, + "2": 5902308864.0, + "3": 5902308864.0, + "4": 5902308864.0, + "5": 5902308864.0, + "6": 5902308864.0, + "7": 5902308864.0, + "8": 5902308864.0, + "9": 5902308864.0, + "10": 5902308864.0, + "11": 5902308864.0, + "12": 5902308864.0, + "13": 5902308864.0, + "14": 5902308864.0, + "15": 5902308864.0, + "16": 5902308864.0, + "17": 5902308864.0, + "18": 5902308864.0, + "19": 5902308864.0, + "20": 5902308864.0, + "21": 5902308864.0, + "22": 5902308864.0, + "23": 5902308864.0, + "24": 5902308864.0, + "25": 5902308864.0, + "26": 5902308864.0, + "27": 5902308864.0, + "28": 5902308864.0, + "29": 5902308864.0, + "30": 5902308864.0, + "31": 5902308864.0, + "32": 5902308864.0, + "33": 5902308864.0, + "34": 5902308864.0, + "35": 5902308864.0, + "36": 5902308864.0, + "37": 5902308864.0, + "38": 5902308864.0, + "39": 5902308864.0, + "40": 5902308864.0, + "41": 5902308864.0, + "42": 5902308864.0, + "43": 5902308864.0, + "44": 5902308864.0, + "45": 5902308864.0, + "46": 5902308864.0, + "47": 5902308864.0, + "48": 5902308864.0, + "49": 5902308864.0, + "50": 5902308864.0, + "51": 5902308864.0, + "52": 5902308864.0, + "53": 5902308864.0, + "54": 5902308864.0, + "55": 5902308864.0, + "56": 5902308864.0, + "57": 5902308864.0, + "58": 5902308864.0, + "59": 5902308864.0, + "60": 5902308864.0, + "61": 5902308864.0, + "62": 5902308864.0, + "63": 5902308864.0, + "64": 5902308864.0, + "65": 5902308864.0, + "66": 5902308864.0, + "67": 5902308864.0, + "68": 5902308864.0, + "69": 5902308864.0, + "70": 5902308864.0, + "71": 5902308864.0, + "72": 5902308864.0, + "73": 5902308864.0, + "74": 5902308864.0, + "75": 5902308864.0, + "76": 5902308864.0, + "77": 5902308864.0, + "78": 5902308864.0, + "79": 5902308864.0, + "80": 5902308864.0, + "81": 5902308864.0, + "82": 5902308864.0, + "83": 5902308864.0, + "84": 5902308864.0, + "85": 5902308864.0, + "86": 5902308864.0, + "87": 5902308864.0, + "88": 5902308864.0, + "89": 5902308864.0, + "90": 5902308864.0, + "91": 5902308864.0, + "92": 5902308864.0, + "93": 5902308864.0, + "94": 5902308864.0, + "95": 5902308864.0, + "96": 5902308864.0, + "97": 5902308864.0, + "98": 5902308864.0, + "99": 5902308864.0, + "100": 5902308864.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 7491686912.0, + "2": 9361367040.0, + "3": 9400341504.0, + "4": 9502356480.0, + "5": 9537902592.0, + "6": 9537902592.0, + "7": 9537902592.0, + "8": 9537902592.0, + "9": 9537902592.0, + "10": 9575347200.0, + "11": 9575347200.0, + "12": 9575347200.0, + "13": 9584957440.0, + "14": 9584957440.0, + "15": 9584957440.0, + "16": 9584957440.0, + "17": 9584957440.0, + "18": 9584957440.0, + "19": 9584957440.0, + "20": 9584957440.0, + "21": 9584957440.0, + "22": 9584957440.0, + "23": 9584957440.0, + "24": 9584957440.0, + "25": 9584957440.0, + "26": 9584957440.0, + "27": 9584957440.0, + "28": 9584957440.0, + "29": 9584957440.0, + "30": 9584957440.0, + "31": 9584957440.0, + "32": 9584957440.0, + "33": 9584957440.0, + "34": 9584957440.0, + "35": 9584957440.0, + "36": 9584957440.0, + "37": 9584957440.0, + "38": 9584957440.0, + "39": 9584957440.0, + "40": 9584957440.0, + "41": 9584957440.0, + "42": 9584957440.0, + "43": 9584957440.0, + "44": 9584957440.0, + "45": 9584957440.0, + "46": 9584957440.0, + "47": 9584957440.0, + "48": 9584957440.0, + "49": 9584957440.0, + "50": 9584957440.0, + "51": 9584957440.0, + "52": 9584957440.0, + "53": 9584957440.0, + "54": 9584957440.0, + "55": 9584957440.0, + "56": 9584957440.0, + "57": 9584957440.0, + "58": 9584957440.0, + "59": 9584957440.0, + "60": 9584957440.0, + "61": 9584957440.0, + "62": 9584957440.0, + "63": 9584957440.0, + "64": 9584957440.0, + "65": 9584957440.0, + "66": 9584957440.0, + "67": 9584957440.0, + "68": 9584957440.0, + "69": 9584957440.0, + "70": 9584957440.0, + "71": 9584957440.0, + "72": 9584957440.0, + "73": 9584957440.0, + "74": 9584957440.0, + "75": 9584957440.0, + "76": 9584957440.0, + "77": 9584957440.0, + "78": 9584957440.0, + "79": 9584957440.0, + "80": 9584957440.0, + "81": 9584957440.0, + "82": 9584957440.0, + "83": 9584957440.0, + "84": 9584957440.0, + "85": 9584957440.0, + "86": 9584957440.0, + "87": 9584957440.0, + "88": 9584957440.0, + "89": 9584957440.0, + "90": 9584957440.0, + "91": 9584957440.0, + "92": 9584957440.0, + "93": 9584957440.0, + "94": 9584957440.0, + "95": 9584957440.0, + "96": 9584957440.0, + "97": 9584957440.0, + "98": 9584957440.0, + "99": 9584957440.0, + "100": 9584957440.0 + } + }, + "global_load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 1.33933, + "2": 1.28315, + "3": 1.66633, + "4": 2.20623, + "5": 2.22342, + "6": 2.26912, + "7": 2.24371, + "8": 2.19417, + "9": 2.15082, + "10": 2.12323, + "11": 2.06823, + "12": 2.09226, + "13": 2.07499, + "14": 1.90401, + "15": 1.95399, + "16": 2.01662, + "17": 2.03368, + "18": 1.96948, + "19": 1.94959, + "20": 1.82595, + "21": 1.77419, + "22": 1.81186, + "23": 1.82437, + "24": 1.79265, + "25": 1.78529, + "26": 1.67949, + "27": 1.71971, + "28": 1.69955, + "29": 1.68275, + "30": 1.73698, + "31": 1.66989, + "32": 1.65877, + "33": 1.68852, + "34": 1.66328, + "35": 1.56241, + "36": 1.5541, + "37": 1.61772, + "38": 1.56065, + "39": 1.59173, + "40": 1.65318, + "41": 1.62418, + "42": 1.56223, + "43": 1.63282, + "44": 1.69323, + "45": 1.59235, + "46": 1.6158, + "47": 1.63331, + "48": 1.62366, + "49": 1.63072, + "50": 1.54415, + "51": 1.59053, + "52": 1.68037, + "53": 1.69712, + "54": 1.65104, + "55": 1.56732, + "56": 1.62024, + "57": 1.50847, + "58": 1.564, + "59": 1.56449, + "60": 1.49448, + "61": 1.49025, + "62": 1.5606, + "63": 1.54864, + "64": 1.49554, + "65": 1.61253, + "66": 1.57385, + "67": 1.56341, + "68": 1.49336, + "69": 1.48985, + "70": 1.45992, + "71": 1.45517, + "72": 1.58575, + "73": 1.51713, + "74": 1.50901, + "75": 1.48383, + "76": 1.4516, + "77": 1.55437, + "78": 1.45807, + "79": 1.42952, + "80": 1.43377, + "81": 1.44847, + "82": 1.48423, + "83": 1.46642, + "84": 1.43211, + "85": 1.46825, + "86": 1.47937, + "87": 1.49561, + "88": 1.4904, + "89": 1.41924, + "90": 1.50936, + "91": 1.42872, + "92": 1.44825, + "93": 1.43118, + "94": 1.46495, + "95": 1.44664, + "96": 1.4799, + "97": 1.44695, + "98": 1.44209, + "99": 1.43742, + "100": 1.42256 + } + }, + "load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 0.0, + "2": 0.0, + "3": 0.0, + "4": 0.0, + "5": 0.0, + "6": 0.0, + "7": 0.0, + "8": 0.0, + "9": 0.0, + "10": 0.0, + "11": 0.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 0.0, + "17": 0.0, + "18": 0.0, + "19": 0.0, + "20": 0.0, + "21": 0.0, + "22": 0.0, + "23": 0.0, + "24": 0.0, + "25": 0.0, + "26": 0.0, + "27": 0.0, + "28": 0.0, + "29": 0.0, + "30": 0.0, + "31": 0.0, + "32": 0.0, + "33": 0.0, + "34": 0.0, + "35": 0.0, + "36": 0.0, + "37": 0.0, + "38": 0.0, + "39": 0.0, + "40": 0.0, + "41": 0.0, + "42": 0.0, + "43": 0.0, + "44": 0.0, + "45": 0.0, + "46": 0.0, + "47": 0.0, + "48": 0.0, + "49": 0.0, + "50": 0.0, + "51": 0.0, + "52": 0.0, + "53": 0.0, + "54": 0.0, + "55": 0.0, + "56": 0.0, + "57": 0.0, + "58": 0.0, + "59": 0.0, + "60": 0.0, + "61": 0.0, + "62": 0.0, + "63": 0.0, + "64": 0.0, + "65": 0.0, + "66": 0.0, + "67": 0.0, + "68": 0.0, + "69": 0.0, + "70": 0.0, + "71": 0.0, + "72": 0.0, + "73": 0.0, + "74": 0.0, + "75": 0.0, + "76": 0.0, + "77": 0.0, + "78": 0.0, + "79": 0.0, + "80": 0.0, + "81": 0.0, + "82": 0.0, + "83": 0.0, + "84": 0.0, + "85": 0.0, + "86": 0.0, + "87": 0.0, + "88": 0.0, + "89": 0.0, + "90": 0.0, + "91": 0.0, + "92": 0.0, + "93": 0.0, + "94": 0.0, + "95": 0.0, + "96": 0.0, + "97": 0.0, + "98": 0.0, + "99": 0.0, + "100": 0.0 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": "nan", + "2": 31.15623, + "3": 1.32195, + "4": 1.17834, + "5": 1.12804, + "6": 1.22009, + "7": 1.019, + "8": 1.2231, + "9": 1.02821, + "10": 1.03953, + "11": 1.03994, + "12": 1.04819, + "13": 1.05657, + "14": 1.06704, + "15": 1.10064, + "16": 1.06051, + "17": 1.03238, + "18": 1.0827, + "19": 1.03747, + "20": 1.08899, + "21": 1.18885, + "22": 1.06863, + "23": 1.13137, + "24": 1.05994, + "25": 1.15646, + "26": 1.12705, + "27": 1.07479, + "28": 1.08212, + "29": 1.08351, + "30": 1.06003, + "31": 1.10867, + "32": 1.1605, + "33": 1.10077, + "34": 1.08635, + "35": 1.12451, + "36": 1.1213, + "37": 1.11666, + "38": 1.12379, + "39": 1.13942, + "40": 1.1224, + "41": 1.13938, + "42": 1.13057, + "43": 1.11392, + "44": 1.13889, + "45": 1.13957, + "46": 1.15603, + "47": 1.15413, + "48": 1.1606, + "49": 1.15964, + "50": 1.14743, + "51": 1.17528, + "52": 1.1954, + "53": 1.15101, + "54": 1.14607, + "55": 1.15512, + "56": 1.14354, + "57": 1.16399, + "58": 1.17534, + "59": 1.16788, + "60": 1.15365, + "61": 1.17556, + "62": 1.18078, + "63": 1.18416, + "64": 1.17802, + "65": 1.14607, + "66": 1.14925, + "67": 1.16534, + "68": 1.17426, + "69": 1.18552, + "70": 1.1706, + "71": 1.1506, + "72": 1.16502, + "73": 1.16604, + "74": 1.16655, + "75": 1.15549, + "76": 1.1845, + "77": 1.16399, + "78": 1.18803, + "79": 1.1795, + "80": 1.17202, + "81": 1.17726, + "82": 1.16908, + "83": 1.17807, + "84": 1.17608, + "85": 1.18097, + "86": 1.18051, + "87": 1.16893, + "88": 1.17008, + "89": 1.17655, + "90": 1.17507, + "91": 1.18205, + "92": 1.28879, + "93": 1.24552, + "94": 1.17158, + "95": 1.17698, + "96": 1.17432, + "97": 1.17808, + "98": 1.16814, + "99": 1.1832, + "100": 1.17515 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3next_tp2pp2ep4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3next_tp2pp2ep4/golden_values_dev_dgx_h100.json new file mode 100644 index 00000000000..8ee449af121 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/golden_values/qwen3next_tp2pp2ep4/golden_values_dev_dgx_h100.json @@ -0,0 +1,751 @@ +{ + "lm loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 11.05971, + "2": 11.05136, + "3": 9.82635, + "4": 12.89077, + "5": 10.03021, + "6": 9.20007, + "7": 9.37221, + "8": 8.67507, + "9": 8.616, + "10": 9.04569, + "11": 8.37936, + "12": 8.50093, + "13": 8.39421, + "14": 7.8265, + "15": 7.96869, + "16": 8.02392, + "17": 7.95664, + "18": 7.67913, + "19": 8.05375, + "20": 7.74955, + "21": 7.41689, + "22": 7.399, + "23": 7.26883, + "24": 7.20338, + "25": 7.51816, + "26": 6.94572, + "27": 7.43293, + "28": 7.1586, + "29": 7.31556, + "30": 7.3972, + "31": 7.16633, + "32": 7.32936, + "33": 7.37894, + "34": 7.40557, + "35": 6.98138, + "36": 6.79667, + "37": 7.15918, + "38": 6.91347, + "39": 7.26098, + "40": 7.28195, + "41": 7.18769, + "42": 6.94007, + "43": 6.93942, + "44": 7.09561, + "45": 6.79918, + "46": 6.58414, + "47": 6.90688, + "48": 6.71235, + "49": 7.17895, + "50": 6.63372, + "51": 6.69136, + "52": 6.96741, + "53": 6.94951, + "54": 6.80965, + "55": 6.5594, + "56": 6.92884, + "57": 6.6754, + "58": 6.89474, + "59": 6.75212, + "60": 6.24772, + "61": 6.41684, + "62": 6.83546, + "63": 6.82944, + "64": 6.38268, + "65": 6.85, + "66": 7.02794, + "67": 6.9989, + "68": 6.59131, + "69": 6.53096, + "70": 6.48614, + "71": 6.44652, + "72": 6.5952, + "73": 6.65919, + "74": 6.52627, + "75": 6.56237, + "76": 5.97601, + "77": 6.89805, + "78": 6.44372, + "79": 6.37328, + "80": 6.48996, + "81": 6.33326, + "82": 6.84471, + "83": 6.53754, + "84": 6.44651, + "85": 6.62936, + "86": 6.49687, + "87": 6.60402, + "88": 6.54784, + "89": 6.36069, + "90": 6.59789, + "91": 6.16293, + "92": 6.11182, + "93": 6.22731, + "94": 6.49885, + "95": 6.57447, + "96": 6.76677, + "97": 6.61519, + "98": 6.42103, + "99": 6.529, + "100": 6.50087 + } + }, + "num-zeros": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 38808276.0, + "2": 38549024.0, + "3": 38742456.0, + "4": 37689892.0, + "5": 38944200.0, + "6": 38634624.0, + "7": 214685888.0, + "8": 155450848.0, + "9": 362692544.0, + "10": 152852112.0, + "11": 243848384.0, + "12": 435264992.0, + "13": 205054480.0, + "14": 154738464.0, + "15": 283878816.0, + "16": 271681184.0, + "17": 302874432.0, + "18": 432035712.0, + "19": 318859616.0, + "20": 227965504.0, + "21": 174404848.0, + "22": 183301456.0, + "23": 215310000.0, + "24": 162643744.0, + "25": 129105016.0, + "26": 148993728.0, + "27": 127427256.0, + "28": 173724432.0, + "29": 116602880.0, + "30": 140203744.0, + "31": 208669184.0, + "32": 136321552.0, + "33": 108124976.0, + "34": 101918584.0, + "35": 95928616.0, + "36": 139921584.0, + "37": 116413520.0, + "38": 86185552.0, + "39": 104445480.0, + "40": 125989696.0, + "41": 79276072.0, + "42": 88936864.0, + "43": 82896520.0, + "44": 66654400.0, + "45": 64952532.0, + "46": 54516156.0, + "47": 79204080.0, + "48": 57427576.0, + "49": 66247896.0, + "50": 73395704.0, + "51": 42632044.0, + "52": 38866768.0, + "53": 43799836.0, + "54": 52201580.0, + "55": 61986768.0, + "56": 64989088.0, + "57": 60387228.0, + "58": 41991792.0, + "59": 49109156.0, + "60": 54267732.0, + "61": 42824812.0, + "62": 48601668.0, + "63": 54462064.0, + "64": 51904840.0, + "65": 51380968.0, + "66": 57536128.0, + "67": 53170336.0, + "68": 41521096.0, + "69": 41317636.0, + "70": 41061440.0, + "71": 55146852.0, + "72": 50476164.0, + "73": 53992524.0, + "74": 42200568.0, + "75": 45167232.0, + "76": 40233088.0, + "77": 44047644.0, + "78": 38808364.0, + "79": 44972736.0, + "80": 45609544.0, + "81": 40905048.0, + "82": 38640636.0, + "83": 41613316.0, + "84": 41575328.0, + "85": 41650100.0, + "86": 38375312.0, + "87": 39025256.0, + "88": 39382364.0, + "89": 39085552.0, + "90": 38117500.0, + "91": 38090828.0, + "92": 40115160.0, + "93": 40027168.0, + "94": 38556408.0, + "95": 38837976.0, + "96": 37425664.0, + "97": 38745964.0, + "98": 38820724.0, + "99": 38784884.0, + "100": 39132676.0 + } + }, + "mem-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 5218820096.0, + "2": 5218397696.0, + "3": 5218744832.0, + "4": 5217916416.0, + "5": 5221508608.0, + "6": 5220727808.0, + "7": 5220129280.0, + "8": 5219253248.0, + "9": 5222144000.0, + "10": 5223271424.0, + "11": 5222099456.0, + "12": 5217415680.0, + "13": 5220348416.0, + "14": 5218833920.0, + "15": 5218626560.0, + "16": 5217932800.0, + "17": 5218185728.0, + "18": 5218004480.0, + "19": 5219763200.0, + "20": 5220395008.0, + "21": 5220411904.0, + "22": 5220636160.0, + "23": 5221642240.0, + "24": 5221083136.0, + "25": 5220417024.0, + "26": 5217579008.0, + "27": 5214744576.0, + "28": 5214345728.0, + "29": 5216320512.0, + "30": 5218736640.0, + "31": 5216657920.0, + "32": 5215752704.0, + "33": 5217199104.0, + "34": 5217231360.0, + "35": 5217511936.0, + "36": 5217805824.0, + "37": 5217778176.0, + "38": 5216730624.0, + "39": 5213962240.0, + "40": 5214565888.0, + "41": 5215327744.0, + "42": 5215710208.0, + "43": 5214577152.0, + "44": 5215266816.0, + "45": 5216308736.0, + "46": 5216429568.0, + "47": 5216673792.0, + "48": 5216537600.0, + "49": 5217883136.0, + "50": 5217187328.0, + "51": 5216250880.0, + "52": 5217424384.0, + "53": 5218176000.0, + "54": 5216177664.0, + "55": 5216419840.0, + "56": 5216124416.0, + "57": 5217261568.0, + "58": 5217806336.0, + "59": 5217235456.0, + "60": 5216873984.0, + "61": 5218426368.0, + "62": 5217275904.0, + "63": 5218569728.0, + "64": 5215545856.0, + "65": 5217874432.0, + "66": 5216065536.0, + "67": 5216753664.0, + "68": 5217648128.0, + "69": 5216547840.0, + "70": 5216409088.0, + "71": 5216130048.0, + "72": 5215314944.0, + "73": 5215986688.0, + "74": 5216513024.0, + "75": 5217041408.0, + "76": 5217597952.0, + "77": 5217403392.0, + "78": 5217127424.0, + "79": 5214411264.0, + "80": 5217670144.0, + "81": 5215154176.0, + "82": 5216296448.0, + "83": 5216160256.0, + "84": 5217668608.0, + "85": 5215599616.0, + "86": 5217665024.0, + "87": 5216344064.0, + "88": 5217366016.0, + "89": 5216684544.0, + "90": 5217457664.0, + "91": 5216530944.0, + "92": 5214828032.0, + "93": 5215619584.0, + "94": 5217387008.0, + "95": 5215918592.0, + "96": 5217335296.0, + "97": 5216422912.0, + "98": 5216081920.0, + "99": 5216164864.0, + "100": 5216628736.0 + } + }, + "mem-max-allocated-bytes": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 50219483136.0, + "2": 52098605056.0, + "3": 52098605056.0, + "4": 52098605056.0, + "5": 52215910400.0, + "6": 52215910400.0, + "7": 52215910400.0, + "8": 52279160832.0, + "9": 52279160832.0, + "10": 52279160832.0, + "11": 52279160832.0, + "12": 52279160832.0, + "13": 52279160832.0, + "14": 52279160832.0, + "15": 52279160832.0, + "16": 52279160832.0, + "17": 52279160832.0, + "18": 52279160832.0, + "19": 52279160832.0, + "20": 52279160832.0, + "21": 52279160832.0, + "22": 52279160832.0, + "23": 52279160832.0, + "24": 52279160832.0, + "25": 52279160832.0, + "26": 52279160832.0, + "27": 52279160832.0, + "28": 52279160832.0, + "29": 52279160832.0, + "30": 52279160832.0, + "31": 52279160832.0, + "32": 52279160832.0, + "33": 52279160832.0, + "34": 52279160832.0, + "35": 52279160832.0, + "36": 52279160832.0, + "37": 52279160832.0, + "38": 52279160832.0, + "39": 52279160832.0, + "40": 52279160832.0, + "41": 52279160832.0, + "42": 52279160832.0, + "43": 52279160832.0, + "44": 52279160832.0, + "45": 52279160832.0, + "46": 52279160832.0, + "47": 52279160832.0, + "48": 52279160832.0, + "49": 52279160832.0, + "50": 52279160832.0, + "51": 52279160832.0, + "52": 52279160832.0, + "53": 52279160832.0, + "54": 52279160832.0, + "55": 52279160832.0, + "56": 52279160832.0, + "57": 52279160832.0, + "58": 52279160832.0, + "59": 52279160832.0, + "60": 52279160832.0, + "61": 52279160832.0, + "62": 52279160832.0, + "63": 52279160832.0, + "64": 52279160832.0, + "65": 52279160832.0, + "66": 52279160832.0, + "67": 52279160832.0, + "68": 52279160832.0, + "69": 52279160832.0, + "70": 52279160832.0, + "71": 52279160832.0, + "72": 52279160832.0, + "73": 52279160832.0, + "74": 52279160832.0, + "75": 52279160832.0, + "76": 52279160832.0, + "77": 52279160832.0, + "78": 52279160832.0, + "79": 52279160832.0, + "80": 52279160832.0, + "81": 52279160832.0, + "82": 52279160832.0, + "83": 52279160832.0, + "84": 52279160832.0, + "85": 52279160832.0, + "86": 52279160832.0, + "87": 52279160832.0, + "88": 52279160832.0, + "89": 52279160832.0, + "90": 52279160832.0, + "91": 52279160832.0, + "92": 52279160832.0, + "93": 52279160832.0, + "94": 52279160832.0, + "95": 52279160832.0, + "96": 52279160832.0, + "97": 52279160832.0, + "98": 52279160832.0, + "99": 52279160832.0, + "100": 52279160832.0 + } + }, + "global_load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 1.00738, + "2": 1.0124, + "3": 1.0781, + "4": 1.48895, + "5": 1.25016, + "6": 1.64773, + "7": 1.67788, + "8": 1.71031, + "9": 1.70603, + "10": 1.6844, + "11": 1.65953, + "12": 1.66856, + "13": 1.64663, + "14": 1.55928, + "15": 1.576, + "16": 1.55036, + "17": 1.53073, + "18": 1.51217, + "19": 1.57021, + "20": 1.47992, + "21": 1.4351, + "22": 1.38801, + "23": 1.36441, + "24": 1.34654, + "25": 1.37466, + "26": 1.41363, + "27": 1.36275, + "28": 1.33694, + "29": 1.31754, + "30": 1.36878, + "31": 1.3735, + "32": 1.33928, + "33": 1.37018, + "34": 1.34282, + "35": 1.32226, + "36": 1.3, + "37": 1.35037, + "38": 1.29347, + "39": 1.31439, + "40": 1.37268, + "41": 1.33668, + "42": 1.30454, + "43": 1.32654, + "44": 1.36268, + "45": 1.29182, + "46": 1.29421, + "47": 1.29093, + "48": 1.28921, + "49": 1.3133, + "50": 1.26989, + "51": 1.27744, + "52": 1.27536, + "53": 1.30231, + "54": 1.2497, + "55": 1.24424, + "56": 1.2927, + "57": 1.25298, + "58": 1.26664, + "59": 1.25779, + "60": 1.24883, + "61": 1.21935, + "62": 1.25551, + "63": 1.25266, + "64": 1.22899, + "65": 1.26754, + "66": 1.28345, + "67": 1.26669, + "68": 1.24236, + "69": 1.24451, + "70": 1.2189, + "71": 1.22839, + "72": 1.26814, + "73": 1.24226, + "74": 1.2436, + "75": 1.23715, + "76": 1.20107, + "77": 1.26681, + "78": 1.21522, + "79": 1.21505, + "80": 1.21775, + "81": 1.2297, + "82": 1.25364, + "83": 1.21371, + "84": 1.21646, + "85": 1.23171, + "86": 1.24462, + "87": 1.24366, + "88": 1.25207, + "89": 1.20085, + "90": 1.27876, + "91": 1.21596, + "92": 1.19493, + "93": 1.18377, + "94": 1.21884, + "95": 1.19044, + "96": 1.23795, + "97": 1.21355, + "98": 1.20399, + "99": 1.21003, + "100": 1.22582 + } + }, + "load_balancing_loss": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": 0.0, + "2": 0.0, + "3": 0.0, + "4": 0.0, + "5": 0.0, + "6": 0.0, + "7": 0.0, + "8": 0.0, + "9": 0.0, + "10": 0.0, + "11": 0.0, + "12": 0.0, + "13": 0.0, + "14": 0.0, + "15": 0.0, + "16": 0.0, + "17": 0.0, + "18": 0.0, + "19": 0.0, + "20": 0.0, + "21": 0.0, + "22": 0.0, + "23": 0.0, + "24": 0.0, + "25": 0.0, + "26": 0.0, + "27": 0.0, + "28": 0.0, + "29": 0.0, + "30": 0.0, + "31": 0.0, + "32": 0.0, + "33": 0.0, + "34": 0.0, + "35": 0.0, + "36": 0.0, + "37": 0.0, + "38": 0.0, + "39": 0.0, + "40": 0.0, + "41": 0.0, + "42": 0.0, + "43": 0.0, + "44": 0.0, + "45": 0.0, + "46": 0.0, + "47": 0.0, + "48": 0.0, + "49": 0.0, + "50": 0.0, + "51": 0.0, + "52": 0.0, + "53": 0.0, + "54": 0.0, + "55": 0.0, + "56": 0.0, + "57": 0.0, + "58": 0.0, + "59": 0.0, + "60": 0.0, + "61": 0.0, + "62": 0.0, + "63": 0.0, + "64": 0.0, + "65": 0.0, + "66": 0.0, + "67": 0.0, + "68": 0.0, + "69": 0.0, + "70": 0.0, + "71": 0.0, + "72": 0.0, + "73": 0.0, + "74": 0.0, + "75": 0.0, + "76": 0.0, + "77": 0.0, + "78": 0.0, + "79": 0.0, + "80": 0.0, + "81": 0.0, + "82": 0.0, + "83": 0.0, + "84": 0.0, + "85": 0.0, + "86": 0.0, + "87": 0.0, + "88": 0.0, + "89": 0.0, + "90": 0.0, + "91": 0.0, + "92": 0.0, + "93": 0.0, + "94": 0.0, + "95": 0.0, + "96": 0.0, + "97": 0.0, + "98": 0.0, + "99": 0.0, + "100": 0.0 + } + }, + "iteration-time": { + "start_step": 1, + "end_step": 100, + "step_interval": 1, + "values": { + "1": "nan", + "2": 38.96466, + "3": 5.51464, + "4": 5.52927, + "5": 5.52978, + "6": 5.36095, + "7": 5.2221, + "8": 5.26768, + "9": 5.27447, + "10": 5.21976, + "11": 5.16015, + "12": 8.31263, + "13": 5.17729, + "14": 5.20674, + "15": 5.14853, + "16": 5.17665, + "17": 5.21178, + "18": 5.15991, + "19": 5.21923, + "20": 5.16207, + "21": 5.19921, + "22": 5.12475, + "23": 5.17995, + "24": 5.17182, + "25": 5.14194, + "26": 5.14932, + "27": 5.1458, + "28": 5.12625, + "29": 5.1383, + "30": 5.14837, + "31": 5.13193, + "32": 5.13382, + "33": 5.12483, + "34": 5.13111, + "35": 5.12616, + "36": 5.13918, + "37": 5.14101, + "38": 5.13675, + "39": 5.13476, + "40": 5.12481, + "41": 5.09526, + "42": 5.09454, + "43": 5.1065, + "44": 5.09338, + "45": 5.1464, + "46": 5.09689, + "47": 5.09257, + "48": 5.09728, + "49": 5.11757, + "50": 5.13761, + "51": 5.10062, + "52": 5.13383, + "53": 5.13507, + "54": 5.09528, + "55": 5.09131, + "56": 5.09965, + "57": 5.09769, + "58": 5.09983, + "59": 5.09428, + "60": 5.09775, + "61": 5.09249, + "62": 5.09351, + "63": 5.09893, + "64": 5.09227, + "65": 5.09562, + "66": 5.09035, + "67": 5.08991, + "68": 5.09481, + "69": 5.10783, + "70": 5.09168, + "71": 5.09469, + "72": 5.10611, + "73": 5.09294, + "74": 5.11001, + "75": 5.10243, + "76": 5.09169, + "77": 5.08925, + "78": 5.09105, + "79": 5.08949, + "80": 5.08692, + "81": 5.08883, + "82": 5.09439, + "83": 5.09138, + "84": 5.08561, + "85": 5.08659, + "86": 5.09142, + "87": 5.0843, + "88": 5.09149, + "89": 5.08179, + "90": 5.0895, + "91": 5.09166, + "92": 5.08691, + "93": 5.09117, + "94": 5.09387, + "95": 5.08588, + "96": 5.09482, + "97": 5.08307, + "98": 5.08269, + "99": 5.08345, + "100": 5.0857 + } + } +} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe2.0/model_configs/dsv3_proxy.yaml b/tests/functional_tests/test_cases/moe2.0/model_configs/dsv3_proxy.yaml index 70924aed0cc..941f1f21ee1 100644 --- a/tests/functional_tests/test_cases/moe2.0/model_configs/dsv3_proxy.yaml +++ b/tests/functional_tests/test_cases/moe2.0/model_configs/dsv3_proxy.yaml @@ -35,7 +35,7 @@ MODEL_ARGS: --ffn-hidden-size: 4096 # Add MoE args --num-experts: 32 - --moe-layer-freq: ([0]*1+[1]*15) + --moe-layer-freq: ([0]*2+[1]*14) --moe-ffn-hidden-size: 1024 --moe-shared-expert-intermediate-size: 1024 --moe-router-load-balancing-type: seq_aux_loss @@ -72,13 +72,13 @@ MODEL_ARGS: --init-method-std: 0.02 # Training args --global-batch-size: 32 - --train-iters: 50 - --exit-duration-in-mins: 230 + --train-iters: 100 + --save-interval: 50 --no-check-for-nan-in-loss-and-grad: true + --deterministic-mode: true METRICS: - "lm loss" - - "num-zeros" - "mem-allocated-bytes" - "mem-max-allocated-bytes" - "mtp_1 loss" diff --git a/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3_proxy.yaml b/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3_proxy.yaml index 46e298ec971..a8d9258213d 100644 --- a/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3_proxy.yaml +++ b/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3_proxy.yaml @@ -16,6 +16,7 @@ MODEL_ARGS: --make-vocab-size-divisible-by: 3232 --untie-embeddings-and-output-weights: true # Add attention related args + --num-attention-heads: 16 --group-query-attention: true --num-query-groups: 4 --kv-channels: 128 @@ -28,20 +29,12 @@ MODEL_ARGS: --ffn-hidden-size: 4096 # Add MoE args --num-experts: 32 - --moe-layer-freq: ([0]*1+[1]*15) --moe-ffn-hidden-size: 1024 --moe-shared-expert-intermediate-size: 1024 - --moe-router-load-balancing-type: aux_loss + --moe-router-load-balancing-type: global_aux_loss --moe-router-topk: 4 - --moe-router-pre-softmax: true - --moe-grouped-gemm: true --moe-aux-loss-coeff: 1e-4 - --moe-router-group-topk: 2 - --moe-router-num-groups: 4 - --moe-router-topk-scaling-factor: 2.0 - --moe-router-score-function: sigmoid - --moe-router-enable-expert-bias: true - --moe-router-bias-update-rate: 1e-3 + --moe-router-score-function: softmax --moe-router-dtype: fp32 # Add regularization args --attention-dropout: 0.0 @@ -62,13 +55,13 @@ MODEL_ARGS: --init-method-std: 0.02 # Training args --global-batch-size: 32 - --train-iters: 50 - --exit-duration-in-mins: 230 + --train-iters: 100 + --save-interval: 50 --no-check-for-nan-in-loss-and-grad: true + --deterministic-mode: true METRICS: - "lm loss" - - "num-zeros" - "mem-allocated-bytes" - "mem-max-allocated-bytes" - "load_balancing_loss" diff --git a/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3next_proxy.yaml b/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3next_proxy.yaml new file mode 100644 index 00000000000..6dacc83dea5 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/model_configs/qwen3next_proxy.yaml @@ -0,0 +1,76 @@ +MODEL_ARGS: + # Data args + --seq-length: 4096 + --data-cache-path: ${DATA_CACHE_PATH} + --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document + --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json + --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt + --split: 949,50,1 + # Add transformer base args + --num-layers: 16 + --hidden-size: 1024 + --normalization: RMSNorm + --norm-epsilon: 1e-6 + --disable-bias-linear: true + --max-position-embeddings: 4096 + --make-vocab-size-divisible-by: 3232 + --untie-embeddings-and-output-weights: true + # Add attention related args (GQA) + --num-attention-heads: 16 + --group-query-attention: true + --num-query-groups: 4 + --kv-channels: 128 + --qk-layernorm: true + --position-embedding-type: rope + --rotary-percent: 1.0 + --rotary-base: 1000000 + # Gated Delta Net (Linear Attention) args + --experimental-attention-variant: gated_delta_net + --linear-attention-freq: 3 + --linear-conv-kernel-dim: 4 + --linear-key-head-dim: 128 + --linear-value-head-dim: 128 + --linear-num-key-heads: 16 + --linear-num-value-heads: 32 + # Add MLP related args + --swiglu: true + --ffn-hidden-size: 4096 + # Add MoE args + --num-experts: 32 + --moe-ffn-hidden-size: 1024 + --moe-shared-expert-intermediate-size: 1024 + --moe-router-load-balancing-type: global_aux_loss + --moe-router-topk: 4 + --moe-aux-loss-coeff: 1e-4 + --moe-router-score-function: softmax + --moe-router-dtype: fp32 + # Add regularization args + --attention-dropout: 0.0 + --hidden-dropout: 0.0 + --clip-grad: 1.0 + --weight-decay: 0.1 + # Add learning rate args + --lr-warmup-fraction: .01 + --lr: 0.00015 + --min-lr: 1.0e-5 + --lr-decay-style: cosine + --adam-beta1: 0.9 + --adam-beta2: 0.95 + # Add validation args + --eval-iters: 32 + --eval-interval: 200 + # Add initialization args + --init-method-std: 0.02 + # Training args + --global-batch-size: 32 + --train-iters: 100 + --save-interval: 50 + --no-check-for-nan-in-loss-and-grad: true + --deterministic-mode: true + +METRICS: + - "lm loss" + - "mem-allocated-bytes" + - "mem-max-allocated-bytes" + - "load_balancing_loss" + diff --git a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp1pp1ep8.yaml b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp1pp1ep8.yaml index 305e2847305..a4dacb0b919 100644 --- a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp1pp1ep8.yaml +++ b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp1pp1ep8.yaml @@ -5,6 +5,8 @@ ENV_VARS: NCCL_NVLS_ENABLE: 0 PYTHONWARNINGS: ignore NCCL_DEBUG: VERSION + NCCL_ALGO: Ring + CUBLAS_WORKSPACE_CONFIG: ":4096:8" MODEL_ARGS: # Transformer Engine args @@ -19,22 +21,14 @@ MODEL_ARGS: --use-distributed-optimizer: true --overlap-grad-reduce: true --overlap-param-gather: true - # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN - --attention-backend: unfused # TODO: switch back to fused attention after fix --use-mcore-models: true --sequence-parallel: true --micro-batch-size: 4 # MoE training related args + --moe-grouped-gemm: true --moe-token-dispatcher-type: alltoall - --moe-permute-fusion: true - --save-interval: 25 # Add mixed precision args --bf16: true - --exit-interval: 50 - # kernel fusion related args - --no-rope-fusion: true - --cross-entropy-loss-fusion: true - --cross-entropy-fusion-impl: native # MISC --manual-gc: true --manual-gc-interval: 100 diff --git a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp1ep8_muon.yaml b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp1ep8_muon.yaml new file mode 100644 index 00000000000..2c0f8a150c8 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp1ep8_muon.yaml @@ -0,0 +1,41 @@ +ENV_VARS: + CUDA_DEVICE_MAX_CONNECTIONS: 1 + NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0 + PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True + NCCL_NVLS_ENABLE: 0 + PYTHONWARNINGS: ignore + NCCL_DEBUG: VERSION + NCCL_ALGO: Ring + CUBLAS_WORKSPACE_CONFIG: ":4096:8" + +MODEL_ARGS: + # Transformer Engine args + --transformer-impl: transformer_engine + # Distributed args + --distributed-timeout-minutes: 60 + --tensor-model-parallel-size: 2 + --pipeline-model-parallel-size: 1 + --expert-model-parallel-size: 8 + --context-parallel-size: 1 + --expert-tensor-parallel-size: 1 + # NOTE: Muon optimizer does not support distributed optimizer + # --use-distributed-optimizer: true + --use-mcore-models: true + --sequence-parallel: true + --micro-batch-size: 4 + # MoE training related args + --moe-grouped-gemm: true + --moe-token-dispatcher-type: allgather + # Muon optimizer args + --optimizer: muon + --muon-momentum: 0.9 + --muon-extra-scale-factor: 0.2 + --muon-scale-mode: spectral + --use-checkpoint-opt_param-scheduler: true + # Add mixed precision args + --bf16: true + # MISC + --manual-gc: true + --manual-gc-interval: 100 +TEST_TYPE: regular + diff --git a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4.yaml b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4.yaml index b93862aff8c..4d92fcf21e2 100644 --- a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4.yaml +++ b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4.yaml @@ -5,6 +5,8 @@ ENV_VARS: NCCL_NVLS_ENABLE: 0 PYTHONWARNINGS: ignore NCCL_DEBUG: VERSION + NCCL_ALGO: Ring + CUBLAS_WORKSPACE_CONFIG: ":4096:8" MODEL_ARGS: # Transformer Engine args @@ -20,35 +22,18 @@ MODEL_ARGS: --use-distributed-optimizer: true --overlap-grad-reduce: true --overlap-param-gather: true - # Use unfused attention since MLA with fused attention and deterministic mode leads to NaN - --attention-backend: unfused # TODO: switch back to fused attention after fix --use-mcore-models: true --sequence-parallel: true --micro-batch-size: 4 # MoE training related args - --moe-token-dispatcher-type: alltoall - --moe-permute-fusion: true - # Add checkpointing args - --save: ${CHECKPOINT_SAVE_PATH} - --load: ${CHECKPOINT_LOAD_PATH} - --save-interval: 25 - # Add logging args - --log-timers-to-tensorboard: true - --log-memory-to-tensorboard: true - --log-num-zeros-in-grad: true - --log-params-norm: true - --log-validation-ppl-to-tensorboard: true - --log-throughput: true - --log-interval: 1 - --logging-level: 40 - --tensorboard-dir: ${TENSORBOARD_PATH} + --moe-grouped-gemm: true + --moe-token-dispatcher-type: flex + --moe-flex-dispatcher-backend: deepep # Add mixed precision args --bf16: true - --exit-interval: 50 # kernel fusion related args - --no-rope-fusion: true - --cross-entropy-loss-fusion: true - --cross-entropy-fusion-impl: native + --moe-permute-fusion: true + --moe-router-fusion: true # MISC --manual-gc: true --manual-gc-interval: 100 diff --git a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_a2aOverlap.yaml b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_a2aOverlap.yaml new file mode 100644 index 00000000000..ff15343a474 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_a2aOverlap.yaml @@ -0,0 +1,44 @@ +ENV_VARS: + # NOTE: CUDA_DEVICE_MAX_CONNECTIONS should be set to a larger value for A2A overlap + CUDA_DEVICE_MAX_CONNECTIONS: 32 + NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0 + PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True + NCCL_NVLS_ENABLE: 0 + PYTHONWARNINGS: ignore + NCCL_DEBUG: VERSION + NCCL_ALGO: Ring + CUBLAS_WORKSPACE_CONFIG: ":4096:8" + +MODEL_ARGS: + # Transformer Engine args + --transformer-impl: transformer_engine + # Distributed args + --distributed-timeout-minutes: 60 + --tensor-model-parallel-size: 2 + --pipeline-model-parallel-size: 2 + --num-virtual-stages-per-pipeline-rank: 4 + --expert-model-parallel-size: 4 + --context-parallel-size: 1 + --expert-tensor-parallel-size: 1 + --use-distributed-optimizer: true + --overlap-grad-reduce: true + --overlap-param-gather: true + --use-mcore-models: true + --sequence-parallel: true + --micro-batch-size: 4 + # MoE training related args + --moe-grouped-gemm: true + --moe-token-dispatcher-type: alltoall + # A2A communication overlap args + --overlap-moe-expert-parallel-comm: true + --delay-wgrad-compute: true + # Add mixed precision args + --bf16: true + # kernel fusion related args + --moe-permute-fusion: true + --moe-router-fusion: true + # MISC + --manual-gc: true + --manual-gc-interval: 100 +TEST_TYPE: resume-ckpt + diff --git a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_cp2.yaml b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_cp2.yaml new file mode 100644 index 00000000000..9eacbacbd9e --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_cp2.yaml @@ -0,0 +1,39 @@ +ENV_VARS: + CUDA_DEVICE_MAX_CONNECTIONS: 1 + NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0 + PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True + NCCL_NVLS_ENABLE: 0 + PYTHONWARNINGS: ignore + NCCL_DEBUG: VERSION + NCCL_ALGO: Ring + CUBLAS_WORKSPACE_CONFIG: ":4096:8" + +MODEL_ARGS: + # Transformer Engine args + --transformer-impl: transformer_engine + # Distributed args + --distributed-timeout-minutes: 60 + --tensor-model-parallel-size: 2 + --pipeline-model-parallel-size: 2 + --expert-model-parallel-size: 2 + --context-parallel-size: 2 + --expert-tensor-parallel-size: 1 + --use-distributed-optimizer: true + --overlap-grad-reduce: true + --overlap-param-gather: true + --use-mcore-models: true + --sequence-parallel: true + --micro-batch-size: 4 + # MoE training related args + --moe-grouped-gemm: true + --moe-token-dispatcher-type: alltoall + # Add mixed precision args + --bf16: true + # kernel fusion related args + --moe-permute-fusion: true + --moe-router-fusion: true + # MISC + --manual-gc: true + --manual-gc-interval: 100 +TEST_TYPE: resume-ckpt + diff --git a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_cudagraph.yaml b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_cudagraph.yaml new file mode 100644 index 00000000000..e1d1cb185d7 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_cudagraph.yaml @@ -0,0 +1,52 @@ +ENV_VARS: + CUDA_DEVICE_MAX_CONNECTIONS: 1 + NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0 + # NOTE: expandable_segments must be commented out for cuda graph to work + # PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True + NCCL_NVLS_ENABLE: 0 + PYTHONWARNINGS: ignore + NCCL_DEBUG: VERSION + NCCL_ALGO: Ring + CUBLAS_WORKSPACE_CONFIG: ":4096:8" + +MODEL_ARGS: + # Transformer Engine args + --transformer-impl: transformer_engine + # Distributed args + --distributed-timeout-minutes: 60 + --tensor-model-parallel-size: 2 + --pipeline-model-parallel-size: 2 + --num-virtual-stages-per-pipeline-rank: 4 + --expert-model-parallel-size: 4 + --context-parallel-size: 1 + --expert-tensor-parallel-size: 1 + --use-distributed-optimizer: true + --overlap-grad-reduce: true + --overlap-param-gather: true + --use-mcore-models: true + --sequence-parallel: true + --micro-batch-size: 4 + # MoE training related args + --moe-grouped-gemm: true + --moe-token-dispatcher-type: alltoall + # CUDA Graph args + --cuda-graph-impl: transformer_engine + --cuda-graph-scope: "[attn mlp moe_router moe_preprocess]" + --cuda-graph-warmup-steps: 1 + --te-rng-tracker: true + # Add mixed precision args + --bf16: true + --fp8-format: hybrid + --fp8-recipe: blockwise + --first-last-layers-bf16: true + # kernel fusion related args + --moe-permute-fusion: true + --moe-router-fusion: true + # Selective recompute for moe_act + --recompute-granularity: selective + --recompute-modules: "[moe_act]" + # MISC + --manual-gc: true + --manual-gc-interval: 100 +TEST_TYPE: resume-ckpt + diff --git a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_fp8.yaml b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_fp8.yaml new file mode 100644 index 00000000000..9e9211ec075 --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_fp8.yaml @@ -0,0 +1,41 @@ +ENV_VARS: + CUDA_DEVICE_MAX_CONNECTIONS: 1 + NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0 + PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True + NCCL_NVLS_ENABLE: 0 + PYTHONWARNINGS: ignore + NCCL_DEBUG: VERSION + NCCL_ALGO: Ring + CUBLAS_WORKSPACE_CONFIG: ":4096:8" + +MODEL_ARGS: + # Transformer Engine args + --transformer-impl: transformer_engine + # Distributed args + --distributed-timeout-minutes: 60 + --tensor-model-parallel-size: 2 + --pipeline-model-parallel-size: 2 + --num-virtual-stages-per-pipeline-rank: 4 + --expert-model-parallel-size: 4 + --context-parallel-size: 1 + --expert-tensor-parallel-size: 1 + --use-distributed-optimizer: true + --overlap-grad-reduce: true + --overlap-param-gather: true + --use-mcore-models: true + --sequence-parallel: true + --micro-batch-size: 4 + # MoE training related args + --moe-grouped-gemm: true + --moe-token-dispatcher-type: alltoall + # FP8 args + --bf16: true + --fp8-format: e4m3 + --fp8-recipe: blockwise + --first-last-layers-bf16: true + # kernel fusion related args + --moe-permute-fusion: true + # MISC + --manual-gc: true + --manual-gc-interval: 100 +TEST_TYPE: resume-ckpt diff --git a/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_offloading.yaml b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_offloading.yaml new file mode 100644 index 00000000000..6ef121f511d --- /dev/null +++ b/tests/functional_tests/test_cases/moe2.0/runtime_configs/tp2pp2ep4_offloading.yaml @@ -0,0 +1,45 @@ +ENV_VARS: + CUDA_DEVICE_MAX_CONNECTIONS: 1 + NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0 + NVTE_CPU_OFFLOAD_V1: 1 + PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True + NCCL_NVLS_ENABLE: 0 + PYTHONWARNINGS: ignore + NCCL_DEBUG: VERSION + NCCL_ALGO: Ring + CUBLAS_WORKSPACE_CONFIG: ":4096:8" + +MODEL_ARGS: + # Transformer Engine args + --transformer-impl: transformer_engine + # Distributed args + --distributed-timeout-minutes: 60 + --tensor-model-parallel-size: 2 + --pipeline-model-parallel-size: 2 + --expert-model-parallel-size: 4 + --context-parallel-size: 1 + --expert-tensor-parallel-size: 1 + --use-distributed-optimizer: true + # NOTE: uncomment if TE >= 2.9.0 + # --overlap-grad-reduce: true + # --overlap-param-gather: true + --use-mcore-models: true + --sequence-parallel: true + --micro-batch-size: 1 + # MoE training related args + --moe-grouped-gemm: true + --moe-token-dispatcher-type: alltoall + # Fine-grained activation offloading args + --recompute-granularity: selective + --recompute-modules: "[layernorm mla_up_proj mlp moe_act]" + --fine-grained-activation-offloading: true + --offload-modules: "[expert_fc1 moe_act attn_norm mlp_norm qkv_linear core_attn attn_proj]" + # Add mixed precision args + --bf16: true + # kernel fusion related args + --moe-permute-fusion: true + --moe-router-fusion: true + # MISC + --manual-gc: true + --manual-gc-interval: 100 +TEST_TYPE: resume-ckpt diff --git a/tests/test_utils/recipes/moe2.0.yaml b/tests/test_utils/recipes/h100/moe2.0.yaml similarity index 53% rename from tests/test_utils/recipes/moe2.0.yaml rename to tests/test_utils/recipes/h100/moe2.0.yaml index 39fccd08c40..22b1067b451 100644 --- a/tests/test_utils/recipes/moe2.0.yaml +++ b/tests/test_utils/recipes/h100/moe2.0.yaml @@ -55,7 +55,7 @@ spec: "CHECKPOINT_LOAD_PATH=/mnt/artifacts" "TRAINING_SCRIPT_PATH=pretrain_gpt.py" "TRAINING_PARAMS_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/model_config.yaml" - "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/{test_case}/golden_values_{environment}_{platforms}.json" + "GOLDEN_VALUES_PATH=./tests/functional_tests/test_cases/{model}/golden_values/{test_case}/golden_values_{environment}_{platforms}.json" "N_REPEAT={n_repeat}" "ENABLE_LIGHTWEIGHT_MODE=${{ENABLE_LIGHTWEIGHT_MODE}}" "RECORD_CHECKPOINTS=${{RECORD_CHECKPOINTS}}" @@ -65,36 +65,112 @@ spec: products: ########################### - # Merge train tests # + # DSv3 model tests # ########################### - test_case: [dsv3_tp1pp1ep8] products: - - model_config: dsv3_proxy - runtime_config: tp1pp1ep8 + - model_config: [dsv3_proxy] + runtime_config: [tp1pp1ep8] environment: [dev] - scope: [nightly-broken] + scope: [mr, mr-github] platforms: [dgx_h100] - test_case: [dsv3_tp2pp2ep4] products: - - model_config: dsv3_proxy - runtime_config: tp2pp2ep4 + - model_config: [dsv3_proxy] + runtime_config: [tp2pp2ep4] environment: [dev] - scope: [nightly-broken] + scope: [mr, mr-github] platforms: [dgx_h100] - - test_case: [qwen3_tp1pp1ep1] + # FP8 training test + - test_case: [dsv3_tp2pp2ep4_fp8] products: - - model_config: qwen3_proxy - runtime_config: tp1pp1ep1 + - model_config: [dsv3_proxy] + runtime_config: [tp2pp2ep4_fp8] environment: [dev] - scope: [nightly-broken] + scope: [mr, mr-github] + platforms: [dgx_h100] + # Scoped CUDA graphs test + - test_case: [dsv3_tp2pp2ep4_cudagraph] + products: + - model_config: [dsv3_proxy] + runtime_config: [tp2pp2ep4_cudagraph] + environment: [dev] + scope: [mr, mr-github] + platforms: [dgx_h100] + # Fine-grained activation offloading test + - test_case: [dsv3_tp2pp2ep4_offloading] + products: + - model_config: [dsv3_proxy] + runtime_config: [tp2pp2ep4_offloading] + environment: [dev] + scope: [mr, mr-github] + platforms: [dgx_h100] + # A2A communication overlap test + - test_case: [dsv3_tp2pp2ep4_a2aOverlap] + products: + - model_config: [dsv3_proxy] + runtime_config: [tp2pp2ep4_a2aOverlap] + environment: [dev] + scope: [mr, mr-github] + platforms: [dgx_h100] + # Context parallelism test + - test_case: [dsv3_tp2pp2ep4_cp2] + products: + - model_config: [dsv3_proxy] + runtime_config: [tp2pp2ep4_cp2] + environment: [dev] + scope: [mr, mr-github] + platforms: [dgx_h100] + + ########################### + # Qwen3 model tests # + ########################### + - test_case: [qwen3_tp1pp1ep8] + products: + - model_config: [qwen3_proxy] + runtime_config: [tp1pp1ep8] + environment: [dev] + scope: [mr, mr-github] platforms: [dgx_h100] - test_case: [qwen3_tp2pp2ep4] products: - - model_config: qwen3_proxy - runtime_config: tp2pp2ep4 + - model_config: [qwen3_proxy] + runtime_config: [tp2pp2ep4] environment: [dev] - scope: [nightly-broken] + scope: [mr, mr-github] + platforms: [dgx_h100] + # Muon optimizer with allgather dispatcher test + - test_case: [qwen3_tp2pp1ep8_muon] + products: + - model_config: [qwen3_proxy] + runtime_config: [tp2pp1ep8_muon] + environment: [dev] + scope: [mr, mr-github] + platforms: [dgx_h100] + # Context parallelism test + - test_case: [qwen3_tp2pp2ep4_cp2] + products: + - model_config: [qwen3_proxy] + runtime_config: [tp2pp2ep4_cp2] + environment: [dev] + scope: [mr, mr-github] platforms: [dgx_h100] + + ########################### + # Qwen3-next model tests # + ########################### + # Gated Delta Net (Linear Attention) test + - test_case: [qwen3next_tp2pp2ep4] + products: + - model_config: [qwen3next_proxy] + runtime_config: [tp2pp2ep4] + environment: [dev] + scope: [mr, mr-github] + platforms: [dgx_h100] + + ########################### + # BERT model tests # + ########################### - test_case: [bert_mcore_tp1_pp2] products: - environment: [dev]