Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/intel_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,22 @@ jobs:
gpu: BMG
intel_graphics: ROLLING
sycl_target: intel_gpu_bmg_g21
igc_version_major: 2

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not ideal that this is hard coded. Could you see whether there is some way to detect this so we don't need to update this manual when the driver changes? Can be in a follow up PR.

igc_version_minor: 18
runner: bmg108629-01
- compiler: RELEASE
gpu: PVC
intel_graphics: ROLLING
sycl_target: intel_gpu_pvc
igc_version_major: 2
igc_version_minor: 11
runner: pvc146162-01
- compiler: NIGHTLY
gpu: PVC
intel_graphics: ROLLING
sycl_target: intel_gpu_pvc
igc_version_major: 2
igc_version_minor: 11
runner: pvc146162-01

name: Run Intel ${{ matrix.compiler }} tests on ${{ matrix.gpu }} with intel-graphics ${{ matrix.intel_graphics }}
Expand Down Expand Up @@ -103,6 +109,8 @@ jobs:
cmake -G Ninja \
-DCUTLASS_ENABLE_SYCL=ON \
-DDPCPP_SYCL_TARGET=${{ matrix.sycl_target }} \
-DIGC_VERSION_MAJOR=${{ matrix.igc_version_major }} \
-DIGC_VERSION_MINOR=${{ matrix.igc_version_minor }} \
-DCMAKE_CXX_FLAGS="-Werror" \
-DCUTLASS_SYCL_RUNNING_CI=ON
cmake --build .
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/intel_test_gpp_host.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,15 @@ jobs:
gpu: BMG
intel_graphics: ROLLING
sycl_target: intel_gpu_bmg_g21
igc_version_major: 2
igc_version_minor: 18
runner: bmg108629-01
- compiler: RELEASE
gpu: PVC
intel_graphics: ROLLING
sycl_target: intel_gpu_pvc
igc_version_major: 2
igc_version_minor: 11
runner: pvc146162-01


Expand Down Expand Up @@ -83,6 +87,8 @@ jobs:
cmake -G Ninja \
-DCUTLASS_ENABLE_SYCL=ON \
-DDPCPP_SYCL_TARGET=${{ matrix.sycl_target }} \
-DIGC_VERSION_MAJOR=${{ matrix.igc_version_major }} \
-DIGC_VERSION_MINOR=${{ matrix.igc_version_minor }} \
-DCUTLASS_SYCL_RUNNING_CI=ON \
-DCMAKE_CXX_FLAGS="-Werror" \
-DDPCPP_HOST_COMPILER=g++-13
Expand Down
12 changes: 12 additions & 0 deletions test/unit/cute/intel_xe/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

if(NOT DEFINED IGC_VERSION_MAJOR)
set(IGC_VERSION_MAJOR 2)
endif()
if(NOT DEFINED IGC_VERSION_MINOR)
set(IGC_VERSION_MINOR 18)
endif()

add_compile_definitions(
IGC_VERSION_MAJOR=${IGC_VERSION_MAJOR}
IGC_VERSION_MINOR=${IGC_VERSION_MINOR}
)

if(SYCL_INTEL_TARGET)
cutlass_test_unit_add_executable(
cutlass_test_unit_cute_intel_xe
Expand Down
104 changes: 104 additions & 0 deletions test/unit/cute/intel_xe/mma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,3 +311,107 @@ TEST(PVC_CuTe_Xe, MMA_XE_8x16x8_F32TF32TF32F32_TT) {
MMA_Test<XE_8x16x8_F32TF32TF32F32_TT, 64, 64, 8, 16, 32, tfloat32_t,
tfloat32_t, float>(512, 512, 256);
}

#if (IGC_VERSION_MAJOR >= 2 && IGC_VERSION_MINOR >= 18)

TEST(PVC_CuTe_Xe, MMA_DPAS_S8_8x16) {
MMA_Test<XE_DPAS_TT<8, int32_t, int8_t>, 64, 64, 8, 16, 32, int8_t, int8_t,
int32_t>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_S8_4x16) {
MMA_Test<XE_DPAS_TT<4, int32_t, int8_t>, 32, 64, 4, 16, 32, int8_t, int8_t,
int32_t>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_S8_2x16) {
MMA_Test<XE_DPAS_TT<2, int32_t, int8_t>, 16, 64, 2, 16, 32, int8_t, int8_t,
int32_t>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_S8_1x16) {
MMA_Test<XE_DPAS_TT<1, int32_t, int8_t>, 8, 64, 1, 16, 32, int8_t, int8_t,
int32_t>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_U8_8x16) {
MMA_Test<XE_DPAS_TT<8, int32_t, uint8_t>, 64, 64, 8, 16, 32, uint8_t, uint8_t,
int32_t>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_U8_4x16) {
MMA_Test<XE_DPAS_TT<4, int32_t, uint8_t>, 32, 64, 4, 16, 32, uint8_t, uint8_t,
int32_t>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_U8_2x16) {
MMA_Test<XE_DPAS_TT<2, int32_t, uint8_t>, 16, 64, 2, 16, 32, uint8_t, uint8_t,
int32_t>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_U8_1x16) {
MMA_Test<XE_DPAS_TT<1, int32_t, uint8_t>, 8, 64, 1, 16, 32, uint8_t, uint8_t,
int32_t>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_BF16_8x16) {
MMA_Test<XE_DPAS_TT<8, float, bfloat16_t>, 256, 256, 32, 64, 32, bfloat16_t,
bfloat16_t, float>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_BF16_4x16) {
MMA_Test<XE_DPAS_TT<4, float, bfloat16_t>, 32, 64, 4, 16, 16, bfloat16_t,
bfloat16_t, float>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_BF16_2x16) {
MMA_Test<XE_DPAS_TT<2, float, bfloat16_t>, 16, 64, 2, 16, 16, bfloat16_t,
bfloat16_t, float>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_BF16_1x16) {
MMA_Test<XE_DPAS_TT<1, float, bfloat16_t>, 8, 64, 1, 16, 16, bfloat16_t,
bfloat16_t, float>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_F16_8x16) {
MMA_Test<XE_DPAS_TT<8, float, half_t>, 64, 64, 8, 16, 16, half_t, half_t,
float>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_F16_4x16) {
MMA_Test<XE_DPAS_TT<4, float, half_t>, 32, 64, 4, 16, 16, half_t, half_t,
float>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_F16_2x16) {
MMA_Test<XE_DPAS_TT<2, float, half_t>, 16, 64, 2, 16, 16, half_t, half_t,
float>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_F16_1x16) {
MMA_Test<XE_DPAS_TT<1, float, half_t>, 8, 64, 1, 16, 16, half_t, half_t,
float>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_TF32_8x16) {
MMA_Test<XE_DPAS_TT<8, float, tfloat32_t>, 64, 64, 8, 16, 32, tfloat32_t,
tfloat32_t, float>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_TF32_4x16) {
MMA_Test<XE_DPAS_TT<4, float, tfloat32_t>, 64, 64, 8, 16, 16, tfloat32_t,
tfloat32_t, float>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_TF32_2x16) {
MMA_Test<XE_DPAS_TT<2, float, tfloat32_t>, 64, 64, 8, 16, 16, tfloat32_t,
tfloat32_t, float>(512, 512, 256);
}

TEST(PVC_CuTe_Xe, MMA_DPAS_TF32_1x16) {
MMA_Test<XE_DPAS_TT<1, float, tfloat32_t>, 64, 64, 8, 16, 16, tfloat32_t,
tfloat32_t, float>(512, 512, 256);
}

#endif
43 changes: 43 additions & 0 deletions test/unit/cute/intel_xe/tiled_mma.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/***************************************************************************************************
* Copyright (c) 2025 - 2025 Codeplay Software Ltd. All rights reserved.
* Copyright (C) 2025 Intel Corporation, All rights reserved.
* SPDX-License-Identifier: BSD-3-Clause
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -69,3 +70,45 @@ TEST(PVC_CuTe_Xe, tiled_mma_2) {
check_tiled_mma<MMA_Atom<XE_8x16x16_F32BF16BF16F32_TT>, TileShape,
SubgroupLayout, ExpectedTiledMMA>();
}

TEST(PVC_CuTe_Xe, tiled_mma_dpas_3) {

using TileShape = Shape<_256, _256, _32>;
using SubgroupLayout = Layout<Shape<_8, _4, _1>, Stride<_4, _1, _0>>;
using ExpectedTiledMMA = TiledMMA<
MMA_Atom<XE_DPAS_TT<8, float, cute::bfloat16_t>>,
Layout<Shape<_8, _4, _1>, Stride<_4, _1, _0>>,
const Tile<Layout<Shape<_8, _8, _4>, Stride<_1, _32, _8>>,
Layout<Shape<_16, _4, _4>, Stride<_1, _64, _16>>,
decltype(coalesce(Layout<Shape<_32>, Stride<_1>>{}))>>;
check_tiled_mma<MMA_Atom<XE_DPAS_TT<8, float, cute::bfloat16_t>>, TileShape,
SubgroupLayout, ExpectedTiledMMA>();
}

TEST(PVC_CuTe_Xe, tiled_mma_dpas_4) {

using TileShape = Shape<_128, _64, _32>;
using SubgroupLayout = Layout<Shape<_4, _2, _1>, Stride<_2, _1, _0>>;
using ExpectedTiledMMA = TiledMMA<
MMA_Atom<XE_DPAS_TT<8, float, cute::bfloat16_t>>,
Layout<Shape<_4, _2, _1>, Stride<_2, _1, _0>>,
const Tile<Layout<Shape<_8, _4, _4>, Stride<_1, _32, _8>>,
Layout<Shape<_16, _2, _2>, Stride<_1, _32, _16>>,
decltype(coalesce(Layout<Shape<_32>, Stride<_1>>{}))>>;
check_tiled_mma<MMA_Atom<XE_DPAS_TT<8, float, cute::bfloat16_t>>, TileShape,
SubgroupLayout, ExpectedTiledMMA>();
}

TEST(PVC_CuTe_Xe, tiled_mma_dpas_5) {

using TileShape = Shape<_128, _64, _32>;
using SubgroupLayout = Layout<Shape<_4, _2, _2>, Stride<_2, _1, _8>>;
using ExpectedTiledMMA = TiledMMA<
MMA_Atom<XE_DPAS_TT<8, float, cute::bfloat16_t>>,
Layout<Shape<_4, _2, _2>, Stride<_2, _1, _8>>,
const Tile<Layout<Shape<_8, _4, _4>, Stride<_1, _32, _8>>,
Layout<Shape<_16, _2, _2>, Stride<_1, _32, _16>>,
decltype(coalesce(Layout<Shape<_32>, Stride<_1>>{}))>>;
check_tiled_mma<MMA_Atom<XE_DPAS_TT<8, float, cute::bfloat16_t>>, TileShape,
SubgroupLayout, ExpectedTiledMMA>();
}