Skip to content

Commit 4ca27f8

Browse files
committed
[SYCL][ESIMD][E2E] Add DG2 unified memory block_load tests
Signed-off-by: Sarnie, Nick <[email protected]>
1 parent d616546 commit 4ca27f8

14 files changed

+319
-155
lines changed

sycl/test-e2e/ESIMD/unified_memory_api/Inputs/block_load.hpp

+89-76
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ bool testUSM(queue Q, uint32_t Groups, uint32_t Threads,
144144
return Passed;
145145
}
146146

147-
template <typename T, bool TestPVCFeatures> bool testUSM(queue Q) {
147+
template <typename T, TestFeatures Features> bool testUSM(queue Q) {
148148
constexpr bool CheckMerge = true;
149149
constexpr bool CheckMask = true;
150150
constexpr bool CheckProperties = true;
@@ -154,7 +154,7 @@ template <typename T, bool TestPVCFeatures> bool testUSM(queue Q) {
154154

155155
bool Passed = true;
156156

157-
// Test block_load() that is available on Gen12 and PVC.
157+
// Test block_load() that is available on Gen12, DG2 and PVC.
158158
Passed &= testUSM<T, 1, !CheckMask, !CheckMerge, CheckProperties>(
159159
Q, 2, 4, AlignElemProps);
160160
Passed &= testUSM<T, 2, !CheckMask, !CheckMerge, CheckProperties>(
@@ -196,53 +196,56 @@ template <typename T, bool TestPVCFeatures> bool testUSM(queue Q) {
196196
Passed &= testUSM<T, 32, !CheckMask, !CheckMerge, !CheckProperties>(
197197
Q, 2, 4, Align16Props);
198198

199-
if constexpr (TestPVCFeatures) {
200-
// Using mask or cache hints adds the requirement to run tests on PVC.
201-
// Also, PVC variant currently requires a) power-or-two elements,
199+
if constexpr (Features == TestFeatures::PVC ||
200+
Features == TestFeatures::DG2) {
201+
// Using mask or cache hints adds the requirement to run tests on DG2/PVC.
202+
// Also, DG2/DG2/PVC variant currently requires a) power-or-two elements,
202203
// b) the number of bytes loaded per call must not exceed 512,
203204
// c) the alignment of USM ptr + offset to be 4 or 8-bytes(for 8-byte
204205
// element vectors).
205206

206207
constexpr size_t RequiredAlignment = sizeof(T) <= 4 ? 4 : 8;
207-
properties PVCProps{cache_hint_L1<cache_hint::streaming>,
208-
cache_hint_L2<cache_hint::cached>,
209-
alignment<RequiredAlignment>};
208+
properties DG2OrPVCProps{cache_hint_L1<cache_hint::streaming>,
209+
cache_hint_L2<cache_hint::cached>,
210+
alignment<RequiredAlignment>};
210211

211212
// Only d/q-words are supported now.
212213
// Thus we use this I32Factor for testing purposes and convenience.
213214
constexpr int I32Factor =
214215
std::max(static_cast<int>(sizeof(int) / sizeof(T)), 1);
215216
Passed &=
216217
testUSM<T, 1 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
217-
Q, 2, 4, PVCProps);
218+
Q, 2, 4, DG2OrPVCProps);
218219
Passed &=
219220
testUSM<T, 2 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
220-
Q, 5, 5, PVCProps);
221+
Q, 5, 5, DG2OrPVCProps);
221222
Passed &=
222223
testUSM<T, 4 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
223-
Q, 5, 5, PVCProps);
224+
Q, 5, 5, DG2OrPVCProps);
224225
Passed &=
225226
testUSM<T, 8 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
226-
Q, 5, 5, PVCProps);
227+
Q, 5, 5, DG2OrPVCProps);
227228
Passed &=
228229
testUSM<T, 16 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
229-
Q, 5, 5, PVCProps);
230+
Q, 5, 5, DG2OrPVCProps);
230231
Passed &=
231232
testUSM<T, 32 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
232-
Q, 2, 4, PVCProps);
233+
Q, 2, 4, DG2OrPVCProps);
233234

234235
// This call (potentially) and the next call (guaranteed) load the biggest
235236
// load-able chunk, which requires loading with 8-byte elements, which
236237
// requires the alignment to be 8-bytes or more.
237238
properties PVCAlign8Props{cache_hint_L1<cache_hint::streaming>,
238239
cache_hint_L2<cache_hint::cached>, alignment<8>};
239-
Passed &=
240-
testUSM<T, 64 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
241-
Q, 7, 1, PVCAlign8Props);
242-
if constexpr (sizeof(T) <= 4)
240+
if constexpr (Features == TestFeatures::PVC) {
243241
Passed &=
244-
testUSM<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
245-
Q, 1, 4, PVCAlign8Props);
242+
testUSM<T, 64 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
243+
Q, 7, 1, PVCAlign8Props);
244+
if constexpr (sizeof(T) <= 4)
245+
Passed &=
246+
testUSM<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
247+
Q, 1, 4, PVCAlign8Props);
248+
}
246249
} // TestPVCFeatures
247250

248251
return Passed;
@@ -350,7 +353,7 @@ bool testACC(queue Q, uint32_t Groups, uint32_t Threads,
350353
return Passed;
351354
}
352355

353-
template <typename T, bool TestPVCFeatures> bool testACC(queue Q) {
356+
template <typename T, TestFeatures Features> bool testACC(queue Q) {
354357
constexpr bool CheckMerge = true;
355358
constexpr bool CheckMask = true;
356359
constexpr bool CheckProperties = true;
@@ -361,7 +364,7 @@ template <typename T, bool TestPVCFeatures> bool testACC(queue Q) {
361364

362365
bool Passed = true;
363366

364-
// Test block_load() that is available on Gen12 and PVC:
367+
// Test block_load() that is available on Gen12, DG2 and PVC:
365368
// 1, 2, 4 or 8 16-byte loads.
366369
constexpr int NElemsInOword = 16 / sizeof(T);
367370
Passed &= testACC<T, NElemsInOword, !CheckMask, !CheckMerge, CheckProperties>(
@@ -381,18 +384,19 @@ template <typename T, bool TestPVCFeatures> bool testACC(queue Q) {
381384
testACC<T, NElemsInOword, !CheckMask, !CheckMerge, !CheckProperties>(
382385
Q, 2, 4, Align16Props);
383386

384-
if constexpr (TestPVCFeatures) {
385-
// Using mask or cache hints adds the requirement to run tests on PVC.
386-
// Also, PVC variant currently requires power-or-two elements and
387+
if constexpr (Features == TestFeatures::PVC ||
388+
Features == TestFeatures::DG2) {
389+
// Using mask or cache hints adds the requirement to run tests on DG2/PVC.
390+
// Also, DG2/PVC variant currently requires power-or-two elements and
387391
// the number of bytes loaded per call must not exceed 512.
388392

389393
constexpr int I32Factor =
390394
std::max(static_cast<int>(sizeof(int) / sizeof(T)), 1);
391-
properties PVCProps{cache_hint_L1<cache_hint::streaming>,
392-
cache_hint_L2<cache_hint::cached>,
393-
alignment<RequiredAlignment>};
395+
properties DG2OrPVCProps{cache_hint_L1<cache_hint::streaming>,
396+
cache_hint_L2<cache_hint::cached>,
397+
alignment<RequiredAlignment>};
394398

395-
// Test block_load() that is available on PVC:
399+
// Test block_load() that is available on DG2/PVC:
396400
// 1, 2, 3, 4, 8, ... N elements (up to 512-bytes).
397401
Passed &=
398402
testACC<T, 1 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
@@ -404,29 +408,31 @@ template <typename T, bool TestPVCFeatures> bool testACC(queue Q) {
404408
testACC<T, 3 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
405409
Q, 2, 8, MinReqAlignProps);
406410
Passed &= testACC<T, 4 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
407-
Q, 2, 4, PVCProps);
411+
Q, 2, 4, DG2OrPVCProps);
408412
Passed &= testACC<T, 8 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
409413
Q, 2, 4, MinReqAlignProps);
410414
Passed &=
411415
testACC<T, 16 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
412416
Q, 2, 4, MinReqAlignProps);
413417
Passed &=
414418
testACC<T, 32 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
415-
Q, 2, 4, PVCProps);
419+
Q, 2, 4, DG2OrPVCProps);
416420

417421
// This call (potentially) and the next call (guaranteed) load the biggest
418422
// load-able chunk, which requires loading with 8-byte elements, which
419423
// requires the alignment to be 8-bytes or more.
420424
properties PVCAlign8Props{cache_hint_L1<cache_hint::streaming>,
421425
cache_hint_L2<cache_hint::cached>, alignment<8>};
422-
Passed &=
423-
testACC<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
424-
Q, 2, 4, PVCAlign8Props);
425-
426-
if constexpr (sizeof(T) <= 4)
426+
if constexpr (Features == TestFeatures::PVC) {
427427
Passed &=
428-
testACC<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
428+
testACC<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
429429
Q, 2, 4, PVCAlign8Props);
430+
431+
if constexpr (sizeof(T) <= 4)
432+
Passed &=
433+
testACC<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
434+
Q, 2, 4, PVCAlign8Props);
435+
}
430436
} // TestPVCFeatures
431437

432438
return Passed;
@@ -540,7 +546,7 @@ bool testSLMAcc(queue Q, uint32_t Groups, uint32_t GroupSize,
540546
return Passed;
541547
}
542548

543-
template <typename T, bool TestPVCFeatures> bool testSLMAcc(queue Q) {
549+
template <typename T, TestFeatures Features> bool testSLMAcc(queue Q) {
544550
constexpr bool CheckMerge = true;
545551
constexpr bool CheckMask = true;
546552
constexpr bool CheckProperties = true;
@@ -592,46 +598,50 @@ template <typename T, bool TestPVCFeatures> bool testSLMAcc(queue Q) {
592598
Q, 2, 4, AlignElemProps);
593599
}
594600

595-
if constexpr (TestPVCFeatures) {
596-
// Using the mask adds the requirement to run tests on PVC.
597-
// Also, PVC variant currently requires power-or-two elements and
601+
if constexpr (Features == TestFeatures::PVC ||
602+
Features == TestFeatures::DG2) {
603+
604+
// Using the mask adds the requirement to run tests on DG2/PVC.
605+
// Also, DG2/PVC variant currently requires power-or-two elements and
598606
// the number of bytes loaded per call must not exceed 512.
599607

600608
constexpr int I32Factor =
601609
std::max(static_cast<int>(sizeof(int) / sizeof(T)), 1);
602610
constexpr size_t ReqiredAlignment = sizeof(T) <= 4 ? 4 : 8;
603-
properties PVCProps{alignment<ReqiredAlignment>};
611+
properties DG2OrPVCProps{alignment<ReqiredAlignment>};
604612

605-
// Test block_load() that is available on PVC:
613+
// Test block_load() that is available on DG2/PVC:
606614
// 1, 2, 3, 4, 8, ... N elements (up to 512-bytes).
607615
Passed &=
608616
testSLMAcc<T, 1 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
609-
Q, 2, 4, PVCProps);
617+
Q, 2, 4, DG2OrPVCProps);
610618
Passed &=
611619
testSLMAcc<T, 2 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
612-
Q, 1, 4, PVCProps);
620+
Q, 1, 4, DG2OrPVCProps);
613621
Passed &=
614622
testSLMAcc<T, 3 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
615-
Q, 2, 8, PVCProps);
623+
Q, 2, 8, DG2OrPVCProps);
616624
Passed &=
617625
testSLMAcc<T, 4 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
618-
Q, 2, 4, PVCProps);
626+
Q, 2, 4, DG2OrPVCProps);
619627
Passed &=
620628
testSLMAcc<T, 8 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
621-
Q, 2, 4, PVCProps);
629+
Q, 2, 4, DG2OrPVCProps);
622630
Passed &=
623631
testSLMAcc<T, 16 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
624-
Q, 2, 4, PVCProps);
632+
Q, 2, 4, DG2OrPVCProps);
625633
Passed &=
626634
testSLMAcc<T, 32 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
627-
Q, 2, 4, PVCProps);
628-
Passed &=
629-
testSLMAcc<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
630-
Q, 2, 4, PVCProps);
635+
Q, 2, 4, DG2OrPVCProps);
636+
if constexpr (Features == TestFeatures::PVC) {
637+
Passed &=
638+
testSLMAcc<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
639+
Q, 2, 4, DG2OrPVCProps);
631640

632-
if constexpr (sizeof(T) <= 4)
633-
Passed &= testSLMAcc<T, 128 * I32Factor, CheckMask, CheckMerge,
634-
CheckProperties>(Q, 2, 4, Align16Props);
641+
if constexpr (sizeof(T) <= 4)
642+
Passed &= testSLMAcc<T, 128 * I32Factor, CheckMask, CheckMerge,
643+
CheckProperties>(Q, 2, 4, Align16Props);
644+
}
635645
} // TestPVCFeatures
636646

637647
return Passed;
@@ -735,7 +745,7 @@ bool testSLM(queue Q, uint32_t Groups, LoadPropertiesT LoadProperties) {
735745
return Passed;
736746
}
737747

738-
template <typename T, bool TestPVCFeatures> bool testSLM(queue Q) {
748+
template <typename T, TestFeatures Features> bool testSLM(queue Q) {
739749
constexpr bool CheckMerge = true;
740750
constexpr bool CheckMask = true;
741751
constexpr bool CheckProperties = true;
@@ -786,45 +796,48 @@ template <typename T, bool TestPVCFeatures> bool testSLM(queue Q) {
786796
Q, 2, AlignElemProps);
787797
}
788798

789-
if constexpr (TestPVCFeatures) {
790-
// Using the mask adds the requirement to run tests on PVC.
791-
// Also, PVC variant currently requires power-or-two elements and
799+
if constexpr (Features == TestFeatures::PVC ||
800+
Features == TestFeatures::DG2) {
801+
// Using the mask adds the requirement to run tests on DG2/PVC.
802+
// Also, DG2/PVC variant currently requires power-or-two elements and
792803
// the number of bytes loaded per call must not exceed 512.
793804

794805
constexpr int I32Factor =
795806
std::max(static_cast<int>(sizeof(int) / sizeof(T)), 1);
796807
constexpr size_t RequiredAlignment = sizeof(T) <= 4 ? 4 : 8;
797-
properties PVCProps{alignment<RequiredAlignment>};
808+
properties DG2OrPVCProps{alignment<RequiredAlignment>};
798809

799-
// Test block_load() that is available on PVC:
810+
// Test block_load() that is available on DG2/PVC:
800811
// 1, 2, 3, 4, 8, ... N elements (up to 512-bytes).
801812
Passed &=
802813
testSLM<T, 1 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
803-
Q, 2, PVCProps);
814+
Q, 2, DG2OrPVCProps);
804815
Passed &= testSLM<T, 2 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
805-
Q, 1, PVCProps);
816+
Q, 1, DG2OrPVCProps);
806817
Passed &=
807818
testSLM<T, 3 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
808-
Q, 2, PVCProps);
819+
Q, 2, DG2OrPVCProps);
809820
Passed &= testSLM<T, 4 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
810-
Q, 2, PVCProps);
821+
Q, 2, DG2OrPVCProps);
811822
Passed &=
812823
testSLM<T, 8 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
813-
Q, 2, PVCProps);
824+
Q, 2, DG2OrPVCProps);
814825
Passed &=
815826
testSLM<T, 16 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
816-
Q, 2, PVCProps);
827+
Q, 2, DG2OrPVCProps);
817828
Passed &=
818829
testSLM<T, 32 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
819-
Q, 2, PVCProps);
820-
Passed &=
821-
testSLM<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
822-
Q, 2, PVCProps);
823-
824-
if constexpr (sizeof(T) <= 4)
830+
Q, 2, DG2OrPVCProps);
831+
if constexpr (Features == TestFeatures::PVC) {
825832
Passed &=
826-
testSLM<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
827-
Q, 2, Align16Props);
833+
testSLM<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
834+
Q, 2, DG2OrPVCProps);
835+
836+
if constexpr (sizeof(T) <= 4)
837+
Passed &=
838+
testSLM<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
839+
Q, 2, Align16Props);
840+
}
828841
} // TestPVCFeatures
829842

830843
return Passed;

sycl/test-e2e/ESIMD/unified_memory_api/Inputs/common.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,5 @@ constexpr size_t getAlignment(PropertiesT Props) {
5252
static_assert(RequestedAlignment >= RequiredAlignment, "Too small alignment");
5353
return RequestedAlignment;
5454
}
55+
56+
enum class TestFeatures { Generic, DG2, PVC };

sycl/test-e2e/ESIMD/unified_memory_api/block_load_acc.cpp

+9-10
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,19 @@ int main() {
1919
auto Q = queue{gpu_selector_v};
2020
esimd_test::printTestLabel(Q);
2121

22-
constexpr bool TestPVCFeatures = true;
22+
constexpr auto TestFeatures = TestFeatures::Generic;
2323
bool Passed = true;
2424

25-
Passed &= testACC<int8_t, !TestPVCFeatures>(Q);
26-
Passed &= testACC<int16_t, !TestPVCFeatures>(Q);
25+
Passed &= testACC<int8_t, TestFeatures>(Q);
26+
Passed &= testACC<int16_t, TestFeatures>(Q);
2727
if (Q.get_device().has(sycl::aspect::fp16))
28-
Passed &= testACC<sycl::half, !TestPVCFeatures>(Q);
29-
Passed &= testACC<uint32_t, !TestPVCFeatures>(Q);
30-
Passed &= testACC<float, !TestPVCFeatures>(Q);
31-
Passed &=
32-
testACC<ext::intel::experimental::esimd::tfloat32, !TestPVCFeatures>(Q);
33-
Passed &= testACC<int64_t, !TestPVCFeatures>(Q);
28+
Passed &= testACC<sycl::half, TestFeatures>(Q);
29+
Passed &= testACC<uint32_t, TestFeatures>(Q);
30+
Passed &= testACC<float, TestFeatures>(Q);
31+
Passed &= testACC<ext::intel::experimental::esimd::tfloat32, TestFeatures>(Q);
32+
Passed &= testACC<int64_t, TestFeatures>(Q);
3433
if (Q.get_device().has(sycl::aspect::fp64))
35-
Passed &= testACC<double, !TestPVCFeatures>(Q);
34+
Passed &= testACC<double, TestFeatures>(Q);
3635

3736
std::cout << (Passed ? "Passed\n" : "FAILED\n");
3837
return Passed ? 0 : 1;

0 commit comments

Comments
 (0)