@@ -144,7 +144,7 @@ bool testUSM(queue Q, uint32_t Groups, uint32_t Threads,
144
144
return Passed;
145
145
}
146
146
147
- template <typename T, bool TestPVCFeatures > bool testUSM (queue Q) {
147
+ template <typename T, TestFeatures Features > bool testUSM (queue Q) {
148
148
constexpr bool CheckMerge = true ;
149
149
constexpr bool CheckMask = true ;
150
150
constexpr bool CheckProperties = true ;
@@ -154,7 +154,7 @@ template <typename T, bool TestPVCFeatures> bool testUSM(queue Q) {
154
154
155
155
bool Passed = true ;
156
156
157
- // Test block_load() that is available on Gen12 and PVC.
157
+ // Test block_load() that is available on Gen12, DG2 and PVC.
158
158
Passed &= testUSM<T, 1 , !CheckMask, !CheckMerge, CheckProperties>(
159
159
Q, 2 , 4 , AlignElemProps);
160
160
Passed &= testUSM<T, 2 , !CheckMask, !CheckMerge, CheckProperties>(
@@ -196,53 +196,56 @@ template <typename T, bool TestPVCFeatures> bool testUSM(queue Q) {
196
196
Passed &= testUSM<T, 32 , !CheckMask, !CheckMerge, !CheckProperties>(
197
197
Q, 2 , 4 , Align16Props);
198
198
199
- if constexpr (TestPVCFeatures) {
200
- // Using mask or cache hints adds the requirement to run tests on PVC.
201
- // Also, PVC variant currently requires a) power-or-two elements,
199
+ if constexpr (Features == TestFeatures::PVC ||
200
+ Features == TestFeatures::DG2) {
201
+ // Using mask or cache hints adds the requirement to run tests on DG2/PVC.
202
+ // Also, DG2/DG2/PVC variant currently requires a) power-or-two elements,
202
203
// b) the number of bytes loaded per call must not exceed 512,
203
204
// c) the alignment of USM ptr + offset to be 4 or 8-bytes(for 8-byte
204
205
// element vectors).
205
206
206
207
constexpr size_t RequiredAlignment = sizeof (T) <= 4 ? 4 : 8 ;
207
- properties PVCProps {cache_hint_L1<cache_hint::streaming>,
208
- cache_hint_L2<cache_hint::cached>,
209
- alignment<RequiredAlignment>};
208
+ properties DG2OrPVCProps {cache_hint_L1<cache_hint::streaming>,
209
+ cache_hint_L2<cache_hint::cached>,
210
+ alignment<RequiredAlignment>};
210
211
211
212
// Only d/q-words are supported now.
212
213
// Thus we use this I32Factor for testing purposes and convenience.
213
214
constexpr int I32Factor =
214
215
std::max (static_cast <int >(sizeof (int ) / sizeof (T)), 1 );
215
216
Passed &=
216
217
testUSM<T, 1 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
217
- Q, 2 , 4 , PVCProps );
218
+ Q, 2 , 4 , DG2OrPVCProps );
218
219
Passed &=
219
220
testUSM<T, 2 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
220
- Q, 5 , 5 , PVCProps );
221
+ Q, 5 , 5 , DG2OrPVCProps );
221
222
Passed &=
222
223
testUSM<T, 4 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
223
- Q, 5 , 5 , PVCProps );
224
+ Q, 5 , 5 , DG2OrPVCProps );
224
225
Passed &=
225
226
testUSM<T, 8 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
226
- Q, 5 , 5 , PVCProps );
227
+ Q, 5 , 5 , DG2OrPVCProps );
227
228
Passed &=
228
229
testUSM<T, 16 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
229
- Q, 5 , 5 , PVCProps );
230
+ Q, 5 , 5 , DG2OrPVCProps );
230
231
Passed &=
231
232
testUSM<T, 32 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
232
- Q, 2 , 4 , PVCProps );
233
+ Q, 2 , 4 , DG2OrPVCProps );
233
234
234
235
// This call (potentially) and the next call (guaranteed) load the biggest
235
236
// load-able chunk, which requires loading with 8-byte elements, which
236
237
// requires the alignment to be 8-bytes or more.
237
238
properties PVCAlign8Props{cache_hint_L1<cache_hint::streaming>,
238
239
cache_hint_L2<cache_hint::cached>, alignment<8 >};
239
- Passed &=
240
- testUSM<T, 64 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
241
- Q, 7 , 1 , PVCAlign8Props);
242
- if constexpr (sizeof (T) <= 4 )
240
+ if constexpr (Features == TestFeatures::PVC) {
243
241
Passed &=
244
- testUSM<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
245
- Q, 1 , 4 , PVCAlign8Props);
242
+ testUSM<T, 64 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
243
+ Q, 7 , 1 , PVCAlign8Props);
244
+ if constexpr (sizeof (T) <= 4 )
245
+ Passed &=
246
+ testUSM<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
247
+ Q, 1 , 4 , PVCAlign8Props);
248
+ }
246
249
} // TestPVCFeatures
247
250
248
251
return Passed;
@@ -350,7 +353,7 @@ bool testACC(queue Q, uint32_t Groups, uint32_t Threads,
350
353
return Passed;
351
354
}
352
355
353
- template <typename T, bool TestPVCFeatures > bool testACC (queue Q) {
356
+ template <typename T, TestFeatures Features > bool testACC (queue Q) {
354
357
constexpr bool CheckMerge = true ;
355
358
constexpr bool CheckMask = true ;
356
359
constexpr bool CheckProperties = true ;
@@ -361,7 +364,7 @@ template <typename T, bool TestPVCFeatures> bool testACC(queue Q) {
361
364
362
365
bool Passed = true ;
363
366
364
- // Test block_load() that is available on Gen12 and PVC:
367
+ // Test block_load() that is available on Gen12, DG2 and PVC:
365
368
// 1, 2, 4 or 8 16-byte loads.
366
369
constexpr int NElemsInOword = 16 / sizeof (T);
367
370
Passed &= testACC<T, NElemsInOword, !CheckMask, !CheckMerge, CheckProperties>(
@@ -381,18 +384,19 @@ template <typename T, bool TestPVCFeatures> bool testACC(queue Q) {
381
384
testACC<T, NElemsInOword, !CheckMask, !CheckMerge, !CheckProperties>(
382
385
Q, 2 , 4 , Align16Props);
383
386
384
- if constexpr (TestPVCFeatures) {
385
- // Using mask or cache hints adds the requirement to run tests on PVC.
386
- // Also, PVC variant currently requires power-or-two elements and
387
+ if constexpr (Features == TestFeatures::PVC ||
388
+ Features == TestFeatures::DG2) {
389
+ // Using mask or cache hints adds the requirement to run tests on DG2/PVC.
390
+ // Also, DG2/PVC variant currently requires power-or-two elements and
387
391
// the number of bytes loaded per call must not exceed 512.
388
392
389
393
constexpr int I32Factor =
390
394
std::max (static_cast <int >(sizeof (int ) / sizeof (T)), 1 );
391
- properties PVCProps {cache_hint_L1<cache_hint::streaming>,
392
- cache_hint_L2<cache_hint::cached>,
393
- alignment<RequiredAlignment>};
395
+ properties DG2OrPVCProps {cache_hint_L1<cache_hint::streaming>,
396
+ cache_hint_L2<cache_hint::cached>,
397
+ alignment<RequiredAlignment>};
394
398
395
- // Test block_load() that is available on PVC:
399
+ // Test block_load() that is available on DG2/ PVC:
396
400
// 1, 2, 3, 4, 8, ... N elements (up to 512-bytes).
397
401
Passed &=
398
402
testACC<T, 1 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
@@ -404,29 +408,31 @@ template <typename T, bool TestPVCFeatures> bool testACC(queue Q) {
404
408
testACC<T, 3 * I32Factor, !CheckMask, !CheckMerge, CheckProperties>(
405
409
Q, 2 , 8 , MinReqAlignProps);
406
410
Passed &= testACC<T, 4 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
407
- Q, 2 , 4 , PVCProps );
411
+ Q, 2 , 4 , DG2OrPVCProps );
408
412
Passed &= testACC<T, 8 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
409
413
Q, 2 , 4 , MinReqAlignProps);
410
414
Passed &=
411
415
testACC<T, 16 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
412
416
Q, 2 , 4 , MinReqAlignProps);
413
417
Passed &=
414
418
testACC<T, 32 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
415
- Q, 2 , 4 , PVCProps );
419
+ Q, 2 , 4 , DG2OrPVCProps );
416
420
417
421
// This call (potentially) and the next call (guaranteed) load the biggest
418
422
// load-able chunk, which requires loading with 8-byte elements, which
419
423
// requires the alignment to be 8-bytes or more.
420
424
properties PVCAlign8Props{cache_hint_L1<cache_hint::streaming>,
421
425
cache_hint_L2<cache_hint::cached>, alignment<8 >};
422
- Passed &=
423
- testACC<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
424
- Q, 2 , 4 , PVCAlign8Props);
425
-
426
- if constexpr (sizeof (T) <= 4 )
426
+ if constexpr (Features == TestFeatures::PVC) {
427
427
Passed &=
428
- testACC<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
428
+ testACC<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
429
429
Q, 2 , 4 , PVCAlign8Props);
430
+
431
+ if constexpr (sizeof (T) <= 4 )
432
+ Passed &=
433
+ testACC<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
434
+ Q, 2 , 4 , PVCAlign8Props);
435
+ }
430
436
} // TestPVCFeatures
431
437
432
438
return Passed;
@@ -540,7 +546,7 @@ bool testSLMAcc(queue Q, uint32_t Groups, uint32_t GroupSize,
540
546
return Passed;
541
547
}
542
548
543
- template <typename T, bool TestPVCFeatures > bool testSLMAcc (queue Q) {
549
+ template <typename T, TestFeatures Features > bool testSLMAcc (queue Q) {
544
550
constexpr bool CheckMerge = true ;
545
551
constexpr bool CheckMask = true ;
546
552
constexpr bool CheckProperties = true ;
@@ -592,46 +598,50 @@ template <typename T, bool TestPVCFeatures> bool testSLMAcc(queue Q) {
592
598
Q, 2 , 4 , AlignElemProps);
593
599
}
594
600
595
- if constexpr (TestPVCFeatures) {
596
- // Using the mask adds the requirement to run tests on PVC.
597
- // Also, PVC variant currently requires power-or-two elements and
601
+ if constexpr (Features == TestFeatures::PVC ||
602
+ Features == TestFeatures::DG2) {
603
+
604
+ // Using the mask adds the requirement to run tests on DG2/PVC.
605
+ // Also, DG2/PVC variant currently requires power-or-two elements and
598
606
// the number of bytes loaded per call must not exceed 512.
599
607
600
608
constexpr int I32Factor =
601
609
std::max (static_cast <int >(sizeof (int ) / sizeof (T)), 1 );
602
610
constexpr size_t ReqiredAlignment = sizeof (T) <= 4 ? 4 : 8 ;
603
- properties PVCProps {alignment<ReqiredAlignment>};
611
+ properties DG2OrPVCProps {alignment<ReqiredAlignment>};
604
612
605
- // Test block_load() that is available on PVC:
613
+ // Test block_load() that is available on DG2/ PVC:
606
614
// 1, 2, 3, 4, 8, ... N elements (up to 512-bytes).
607
615
Passed &=
608
616
testSLMAcc<T, 1 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
609
- Q, 2 , 4 , PVCProps );
617
+ Q, 2 , 4 , DG2OrPVCProps );
610
618
Passed &=
611
619
testSLMAcc<T, 2 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
612
- Q, 1 , 4 , PVCProps );
620
+ Q, 1 , 4 , DG2OrPVCProps );
613
621
Passed &=
614
622
testSLMAcc<T, 3 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
615
- Q, 2 , 8 , PVCProps );
623
+ Q, 2 , 8 , DG2OrPVCProps );
616
624
Passed &=
617
625
testSLMAcc<T, 4 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
618
- Q, 2 , 4 , PVCProps );
626
+ Q, 2 , 4 , DG2OrPVCProps );
619
627
Passed &=
620
628
testSLMAcc<T, 8 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
621
- Q, 2 , 4 , PVCProps );
629
+ Q, 2 , 4 , DG2OrPVCProps );
622
630
Passed &=
623
631
testSLMAcc<T, 16 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
624
- Q, 2 , 4 , PVCProps );
632
+ Q, 2 , 4 , DG2OrPVCProps );
625
633
Passed &=
626
634
testSLMAcc<T, 32 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
627
- Q, 2 , 4 , PVCProps);
628
- Passed &=
629
- testSLMAcc<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
630
- Q, 2 , 4 , PVCProps);
635
+ Q, 2 , 4 , DG2OrPVCProps);
636
+ if constexpr (Features == TestFeatures::PVC) {
637
+ Passed &=
638
+ testSLMAcc<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
639
+ Q, 2 , 4 , DG2OrPVCProps);
631
640
632
- if constexpr (sizeof (T) <= 4 )
633
- Passed &= testSLMAcc<T, 128 * I32Factor, CheckMask, CheckMerge,
634
- CheckProperties>(Q, 2 , 4 , Align16Props);
641
+ if constexpr (sizeof (T) <= 4 )
642
+ Passed &= testSLMAcc<T, 128 * I32Factor, CheckMask, CheckMerge,
643
+ CheckProperties>(Q, 2 , 4 , Align16Props);
644
+ }
635
645
} // TestPVCFeatures
636
646
637
647
return Passed;
@@ -735,7 +745,7 @@ bool testSLM(queue Q, uint32_t Groups, LoadPropertiesT LoadProperties) {
735
745
return Passed;
736
746
}
737
747
738
- template <typename T, bool TestPVCFeatures > bool testSLM (queue Q) {
748
+ template <typename T, TestFeatures Features > bool testSLM (queue Q) {
739
749
constexpr bool CheckMerge = true ;
740
750
constexpr bool CheckMask = true ;
741
751
constexpr bool CheckProperties = true ;
@@ -786,45 +796,48 @@ template <typename T, bool TestPVCFeatures> bool testSLM(queue Q) {
786
796
Q, 2 , AlignElemProps);
787
797
}
788
798
789
- if constexpr (TestPVCFeatures) {
790
- // Using the mask adds the requirement to run tests on PVC.
791
- // Also, PVC variant currently requires power-or-two elements and
799
+ if constexpr (Features == TestFeatures::PVC ||
800
+ Features == TestFeatures::DG2) {
801
+ // Using the mask adds the requirement to run tests on DG2/PVC.
802
+ // Also, DG2/PVC variant currently requires power-or-two elements and
792
803
// the number of bytes loaded per call must not exceed 512.
793
804
794
805
constexpr int I32Factor =
795
806
std::max (static_cast <int >(sizeof (int ) / sizeof (T)), 1 );
796
807
constexpr size_t RequiredAlignment = sizeof (T) <= 4 ? 4 : 8 ;
797
- properties PVCProps {alignment<RequiredAlignment>};
808
+ properties DG2OrPVCProps {alignment<RequiredAlignment>};
798
809
799
- // Test block_load() that is available on PVC:
810
+ // Test block_load() that is available on DG2/ PVC:
800
811
// 1, 2, 3, 4, 8, ... N elements (up to 512-bytes).
801
812
Passed &=
802
813
testSLM<T, 1 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
803
- Q, 2 , PVCProps );
814
+ Q, 2 , DG2OrPVCProps );
804
815
Passed &= testSLM<T, 2 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
805
- Q, 1 , PVCProps );
816
+ Q, 1 , DG2OrPVCProps );
806
817
Passed &=
807
818
testSLM<T, 3 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
808
- Q, 2 , PVCProps );
819
+ Q, 2 , DG2OrPVCProps );
809
820
Passed &= testSLM<T, 4 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
810
- Q, 2 , PVCProps );
821
+ Q, 2 , DG2OrPVCProps );
811
822
Passed &=
812
823
testSLM<T, 8 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
813
- Q, 2 , PVCProps );
824
+ Q, 2 , DG2OrPVCProps );
814
825
Passed &=
815
826
testSLM<T, 16 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
816
- Q, 2 , PVCProps );
827
+ Q, 2 , DG2OrPVCProps );
817
828
Passed &=
818
829
testSLM<T, 32 * I32Factor, CheckMask, !CheckMerge, CheckProperties>(
819
- Q, 2 , PVCProps);
820
- Passed &=
821
- testSLM<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
822
- Q, 2 , PVCProps);
823
-
824
- if constexpr (sizeof (T) <= 4 )
830
+ Q, 2 , DG2OrPVCProps);
831
+ if constexpr (Features == TestFeatures::PVC) {
825
832
Passed &=
826
- testSLM<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
827
- Q, 2 , Align16Props);
833
+ testSLM<T, 64 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
834
+ Q, 2 , DG2OrPVCProps);
835
+
836
+ if constexpr (sizeof (T) <= 4 )
837
+ Passed &=
838
+ testSLM<T, 128 * I32Factor, CheckMask, CheckMerge, CheckProperties>(
839
+ Q, 2 , Align16Props);
840
+ }
828
841
} // TestPVCFeatures
829
842
830
843
return Passed;
0 commit comments