From 083394c9a240b0801f0d04589d5b2e26113b86e3 Mon Sep 17 00:00:00 2001 From: geyuqiang Date: Tue, 23 Dec 2025 17:41:34 +0800 Subject: [PATCH 1/4] Update Paddle Submodule to release/3.3 --- Paddle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Paddle b/Paddle index 935f0a6601f..fd66eaf3f0e 160000 --- a/Paddle +++ b/Paddle @@ -1 +1 @@ -Subproject commit 935f0a6601f90f4eb1d091682b31b243da50851b +Subproject commit fd66eaf3f0e415f50f38bbff24fa84b9cc9f796a From 75d0ddc7c40764850f2d1775e84140cd59ca47b4 Mon Sep 17 00:00:00 2001 From: geyuqiang Date: Tue, 23 Dec 2025 20:25:35 +0800 Subject: [PATCH 2/4] add layer_norm_cuda_kernel --- .../cuda_kernels/layer_norm_cuda_kernel.cu | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 backends/iluvatar_gpu/kernels/cuda_kernels/layer_norm_cuda_kernel.cu diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/layer_norm_cuda_kernel.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/layer_norm_cuda_kernel.cu new file mode 100644 index 00000000000..cef5052be91 --- /dev/null +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/layer_norm_cuda_kernel.cu @@ -0,0 +1,26 @@ +// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/legacy/gpu/layer_norm_cuda_kernel.cu" //NOLINT + +PD_CUSTOM_KERNEL_REGISTER( + fused_rms_norm_ext, GPU, ALL_LAYOUT, phi::RMSLnFwd, float, phi::bfloat16) {} + +PD_CUSTOM_KERNEL_REGISTER(fused_rms_norm_ext_grad, + GPU, + ALL_LAYOUT, + phi::RMSLnBwd, + float, + phi::bfloat16) {} From 72f2d811d6949cd2c65d0a6803cbcde6c41f91cd Mon Sep 17 00:00:00 2001 From: geyuqiang Date: Tue, 23 Dec 2025 21:02:25 +0800 Subject: [PATCH 3/4] fix kernel --- .../kernels/cuda_kernels/layer_norm_cuda_kernel.cu | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/layer_norm_cuda_kernel.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/layer_norm_cuda_kernel.cu index cef5052be91..9734be136bb 100644 --- a/backends/iluvatar_gpu/kernels/cuda_kernels/layer_norm_cuda_kernel.cu +++ b/backends/iluvatar_gpu/kernels/cuda_kernels/layer_norm_cuda_kernel.cu @@ -15,11 +15,15 @@ #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/legacy/gpu/layer_norm_cuda_kernel.cu" //NOLINT -PD_CUSTOM_KERNEL_REGISTER( - fused_rms_norm_ext, GPU, ALL_LAYOUT, phi::RMSLnFwd, float, phi::bfloat16) {} +PD_CUSTOM_KERNEL_REGISTER(fused_rms_norm_ext, + iluvatar_gpu, + ALL_LAYOUT, + phi::RMSLnFwd, + float, + phi::bfloat16) {} PD_CUSTOM_KERNEL_REGISTER(fused_rms_norm_ext_grad, - GPU, + iluvatar_gpu, ALL_LAYOUT, phi::RMSLnBwd, float, From 69ff2290097eedc8aa30ae2f8aab084a2922c679 Mon Sep 17 00:00:00 2001 From: geyuqiang Date: Wed, 24 Dec 2025 11:22:00 +0800 Subject: [PATCH 4/4] remove rms_norm --- .../cuda_kernels/layer_norm_cuda_kernel.cu | 30 ------------------- .../ernie_core/layer_norm_cuda_kernel.cu | 16 +++++++--- backends/iluvatar_gpu/tests/disabled_test.txt | 1 + 3 files changed, 13 insertions(+), 34 deletions(-) delete mode 100644 backends/iluvatar_gpu/kernels/cuda_kernels/layer_norm_cuda_kernel.cu diff --git a/backends/iluvatar_gpu/kernels/cuda_kernels/layer_norm_cuda_kernel.cu b/backends/iluvatar_gpu/kernels/cuda_kernels/layer_norm_cuda_kernel.cu deleted file mode 100644 index 9734be136bb..00000000000 --- a/backends/iluvatar_gpu/kernels/cuda_kernels/layer_norm_cuda_kernel.cu +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/legacy/gpu/layer_norm_cuda_kernel.cu" //NOLINT - -PD_CUSTOM_KERNEL_REGISTER(fused_rms_norm_ext, - iluvatar_gpu, - ALL_LAYOUT, - phi::RMSLnFwd, - float, - phi::bfloat16) {} - -PD_CUSTOM_KERNEL_REGISTER(fused_rms_norm_ext_grad, - iluvatar_gpu, - ALL_LAYOUT, - phi::RMSLnBwd, - float, - phi::bfloat16) {} diff --git a/backends/iluvatar_gpu/kernels/ernie_core/layer_norm_cuda_kernel.cu b/backends/iluvatar_gpu/kernels/ernie_core/layer_norm_cuda_kernel.cu index 5f4288d97af..75417d058e6 100644 --- a/backends/iluvatar_gpu/kernels/ernie_core/layer_norm_cuda_kernel.cu +++ b/backends/iluvatar_gpu/kernels/ernie_core/layer_norm_cuda_kernel.cu @@ -81,8 +81,16 @@ void RMSLnBwd(const Context &ctx, } // namespace phi -PD_REGISTER_PLUGIN_KERNEL( - fused_rms_norm_ext, iluvatar_gpu, ALL_LAYOUT, phi::RMSLnFwd, float) {} +PD_REGISTER_PLUGIN_KERNEL(fused_rms_norm_ext, + iluvatar_gpu, + ALL_LAYOUT, + phi::RMSLnFwd, + float, + phi::bfloat16) {} -PD_REGISTER_PLUGIN_KERNEL( - fused_rms_norm_ext_grad, iluvatar_gpu, ALL_LAYOUT, phi::RMSLnBwd, float) {} +PD_REGISTER_PLUGIN_KERNEL(fused_rms_norm_ext_grad, + iluvatar_gpu, + ALL_LAYOUT, + phi::RMSLnBwd, + float, + phi::bfloat16) {} diff --git a/backends/iluvatar_gpu/tests/disabled_test.txt b/backends/iluvatar_gpu/tests/disabled_test.txt index 277765189ed..57a868125e8 100644 --- a/backends/iluvatar_gpu/tests/disabled_test.txt +++ b/backends/iluvatar_gpu/tests/disabled_test.txt @@ -556,3 +556,4 @@ test_rms_norm_op.py test_batched_gemm.py test_match_matrix_tensor_op.py test_tensor.py +test_rms_norm.py