diff --git a/.github/workflows/intel_test.yml b/.github/workflows/intel_test.yml index 09bcd640ab..e32c8128e3 100644 --- a/.github/workflows/intel_test.yml +++ b/.github/workflows/intel_test.yml @@ -43,16 +43,22 @@ jobs: gpu: BMG intel_graphics: ROLLING sycl_target: intel_gpu_bmg_g21 + igc_version_major: 2 + igc_version_minor: 18 runner: bmg108629-01 - compiler: RELEASE gpu: PVC intel_graphics: ROLLING sycl_target: intel_gpu_pvc + igc_version_major: 2 + igc_version_minor: 11 runner: pvc146162-01 - compiler: NIGHTLY gpu: PVC intel_graphics: ROLLING sycl_target: intel_gpu_pvc + igc_version_major: 2 + igc_version_minor: 11 runner: pvc146162-01 name: Run Intel ${{ matrix.compiler }} tests on ${{ matrix.gpu }} with intel-graphics ${{ matrix.intel_graphics }} @@ -103,6 +109,8 @@ jobs: cmake -G Ninja \ -DCUTLASS_ENABLE_SYCL=ON \ -DDPCPP_SYCL_TARGET=${{ matrix.sycl_target }} \ + -DIGC_VERSION_MAJOR=${{ matrix.igc_version_major }} \ + -DIGC_VERSION_MINOR=${{ matrix.igc_version_minor }} \ -DCMAKE_CXX_FLAGS="-Werror" \ -DCUTLASS_SYCL_RUNNING_CI=ON cmake --build . diff --git a/.github/workflows/intel_test_gpp_host.yml b/.github/workflows/intel_test_gpp_host.yml index c75c9ad508..87d588af0f 100644 --- a/.github/workflows/intel_test_gpp_host.yml +++ b/.github/workflows/intel_test_gpp_host.yml @@ -28,11 +28,15 @@ jobs: gpu: BMG intel_graphics: ROLLING sycl_target: intel_gpu_bmg_g21 + igc_version_major: 2 + igc_version_minor: 18 runner: bmg108629-01 - compiler: RELEASE gpu: PVC intel_graphics: ROLLING sycl_target: intel_gpu_pvc + igc_version_major: 2 + igc_version_minor: 11 runner: pvc146162-01 @@ -83,6 +87,8 @@ jobs: cmake -G Ninja \ -DCUTLASS_ENABLE_SYCL=ON \ -DDPCPP_SYCL_TARGET=${{ matrix.sycl_target }} \ + -DIGC_VERSION_MAJOR=${{ matrix.igc_version_major }} \ + -DIGC_VERSION_MINOR=${{ matrix.igc_version_minor }} \ -DCUTLASS_SYCL_RUNNING_CI=ON \ -DCMAKE_CXX_FLAGS="-Werror" \ -DDPCPP_HOST_COMPILER=g++-13 diff --git a/test/unit/cute/intel_xe/CMakeLists.txt b/test/unit/cute/intel_xe/CMakeLists.txt index 89a58e2015..cdd23be94c 100755 --- a/test/unit/cute/intel_xe/CMakeLists.txt +++ b/test/unit/cute/intel_xe/CMakeLists.txt @@ -26,6 +26,18 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +if(NOT DEFINED IGC_VERSION_MAJOR) + set(IGC_VERSION_MAJOR 2) +endif() +if(NOT DEFINED IGC_VERSION_MINOR) + set(IGC_VERSION_MINOR 18) +endif() + +add_compile_definitions( + IGC_VERSION_MAJOR=${IGC_VERSION_MAJOR} + IGC_VERSION_MINOR=${IGC_VERSION_MINOR} +) + if(SYCL_INTEL_TARGET) cutlass_test_unit_add_executable( cutlass_test_unit_cute_intel_xe diff --git a/test/unit/cute/intel_xe/mma.cpp b/test/unit/cute/intel_xe/mma.cpp index d30c8ae8d7..7dd3d3113f 100755 --- a/test/unit/cute/intel_xe/mma.cpp +++ b/test/unit/cute/intel_xe/mma.cpp @@ -311,3 +311,107 @@ TEST(PVC_CuTe_Xe, MMA_XE_8x16x8_F32TF32TF32F32_TT) { MMA_Test(512, 512, 256); } + +#if (IGC_VERSION_MAJOR >= 2 && IGC_VERSION_MINOR >= 18) + +TEST(PVC_CuTe_Xe, MMA_DPAS_S8_8x16) { + MMA_Test, 64, 64, 8, 16, 32, int8_t, int8_t, + int32_t>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_S8_4x16) { + MMA_Test, 32, 64, 4, 16, 32, int8_t, int8_t, + int32_t>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_S8_2x16) { + MMA_Test, 16, 64, 2, 16, 32, int8_t, int8_t, + int32_t>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_S8_1x16) { + MMA_Test, 8, 64, 1, 16, 32, int8_t, int8_t, + int32_t>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_U8_8x16) { + MMA_Test, 64, 64, 8, 16, 32, uint8_t, uint8_t, + int32_t>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_U8_4x16) { + MMA_Test, 32, 64, 4, 16, 32, uint8_t, uint8_t, + int32_t>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_U8_2x16) { + MMA_Test, 16, 64, 2, 16, 32, uint8_t, uint8_t, + int32_t>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_U8_1x16) { + MMA_Test, 8, 64, 1, 16, 32, uint8_t, uint8_t, + int32_t>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_BF16_8x16) { + MMA_Test, 256, 256, 32, 64, 32, bfloat16_t, + bfloat16_t, float>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_BF16_4x16) { + MMA_Test, 32, 64, 4, 16, 16, bfloat16_t, + bfloat16_t, float>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_BF16_2x16) { + MMA_Test, 16, 64, 2, 16, 16, bfloat16_t, + bfloat16_t, float>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_BF16_1x16) { + MMA_Test, 8, 64, 1, 16, 16, bfloat16_t, + bfloat16_t, float>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_F16_8x16) { + MMA_Test, 64, 64, 8, 16, 16, half_t, half_t, + float>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_F16_4x16) { + MMA_Test, 32, 64, 4, 16, 16, half_t, half_t, + float>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_F16_2x16) { + MMA_Test, 16, 64, 2, 16, 16, half_t, half_t, + float>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_F16_1x16) { + MMA_Test, 8, 64, 1, 16, 16, half_t, half_t, + float>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_TF32_8x16) { + MMA_Test, 64, 64, 8, 16, 32, tfloat32_t, + tfloat32_t, float>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_TF32_4x16) { + MMA_Test, 64, 64, 8, 16, 16, tfloat32_t, + tfloat32_t, float>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_TF32_2x16) { + MMA_Test, 64, 64, 8, 16, 16, tfloat32_t, + tfloat32_t, float>(512, 512, 256); +} + +TEST(PVC_CuTe_Xe, MMA_DPAS_TF32_1x16) { + MMA_Test, 64, 64, 8, 16, 16, tfloat32_t, + tfloat32_t, float>(512, 512, 256); +} + +#endif diff --git a/test/unit/cute/intel_xe/tiled_mma.cpp b/test/unit/cute/intel_xe/tiled_mma.cpp index 1625d0ad35..c8a0d4f16b 100644 --- a/test/unit/cute/intel_xe/tiled_mma.cpp +++ b/test/unit/cute/intel_xe/tiled_mma.cpp @@ -1,5 +1,6 @@ /*************************************************************************************************** * Copyright (c) 2025 - 2025 Codeplay Software Ltd. All rights reserved. + * Copyright (C) 2025 Intel Corporation, All rights reserved. * SPDX-License-Identifier: BSD-3-Clause * * Redistribution and use in source and binary forms, with or without @@ -69,3 +70,45 @@ TEST(PVC_CuTe_Xe, tiled_mma_2) { check_tiled_mma, TileShape, SubgroupLayout, ExpectedTiledMMA>(); } + +TEST(PVC_CuTe_Xe, tiled_mma_dpas_3) { + + using TileShape = Shape<_256, _256, _32>; + using SubgroupLayout = Layout, Stride<_4, _1, _0>>; + using ExpectedTiledMMA = TiledMMA< + MMA_Atom>, + Layout, Stride<_4, _1, _0>>, + const Tile, Stride<_1, _32, _8>>, + Layout, Stride<_1, _64, _16>>, + decltype(coalesce(Layout, Stride<_1>>{}))>>; + check_tiled_mma>, TileShape, + SubgroupLayout, ExpectedTiledMMA>(); +} + +TEST(PVC_CuTe_Xe, tiled_mma_dpas_4) { + + using TileShape = Shape<_128, _64, _32>; + using SubgroupLayout = Layout, Stride<_2, _1, _0>>; + using ExpectedTiledMMA = TiledMMA< + MMA_Atom>, + Layout, Stride<_2, _1, _0>>, + const Tile, Stride<_1, _32, _8>>, + Layout, Stride<_1, _32, _16>>, + decltype(coalesce(Layout, Stride<_1>>{}))>>; + check_tiled_mma>, TileShape, + SubgroupLayout, ExpectedTiledMMA>(); +} + +TEST(PVC_CuTe_Xe, tiled_mma_dpas_5) { + + using TileShape = Shape<_128, _64, _32>; + using SubgroupLayout = Layout, Stride<_2, _1, _8>>; + using ExpectedTiledMMA = TiledMMA< + MMA_Atom>, + Layout, Stride<_2, _1, _8>>, + const Tile, Stride<_1, _32, _8>>, + Layout, Stride<_1, _32, _16>>, + decltype(coalesce(Layout, Stride<_1>>{}))>>; + check_tiled_mma>, TileShape, + SubgroupLayout, ExpectedTiledMMA>(); +}