Skip to content

Commit 298f1c2

Browse files
authored
Revert "Add missing intrinsics to cuda headers" (#144755)
Reverts #143664 as it breaks CUDA compilation.
1 parent 77bc254 commit 298f1c2

File tree

1 file changed

+0
-284
lines changed

1 file changed

+0
-284
lines changed

clang/lib/Headers/__clang_cuda_intrinsics.h

Lines changed: 0 additions & 284 deletions
Original file line numberDiff line numberDiff line change
@@ -479,290 +479,6 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,
479479
return ret;
480480
}
481481

482-
#pragma push_macro("__INTRINSIC_LOAD")
483-
#define __INTRINSIC_LOAD(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \
484-
__Clobber) \
485-
inline __device__ __DeclType __FnName(const __DeclType *__ptr) { \
486-
__TmpType __ret; \
487-
asm(__AsmOp " %0, [%1];" : __AsmType(__ret) : "l"(__ptr)__Clobber); \
488-
return (__DeclType)__ret; \
489-
}
490-
491-
#pragma push_macro("__INTRINSIC_LOAD2")
492-
#define __INTRINSIC_LOAD2(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \
493-
__Clobber) \
494-
inline __device__ __DeclType __FnName(const __DeclType *__ptr) { \
495-
__DeclType __ret; \
496-
__TmpType __tmp; \
497-
asm(__AsmOp " {%0,%1}, [%2];" \
498-
: __AsmType(__tmp.x), __AsmType(__tmp.y) \
499-
: "l"(__ptr)__Clobber); \
500-
using __ElementType = decltype(__ret.x); \
501-
__ret.x = (__ElementType)(__tmp.x); \
502-
__ret.y = (__ElementType)__tmp.y; \
503-
return __ret; \
504-
}
505-
506-
#pragma push_macro("__INTRINSIC_LOAD4")
507-
#define __INTRINSIC_LOAD4(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \
508-
__Clobber) \
509-
inline __device__ __DeclType __FnName(const __DeclType *__ptr) { \
510-
__DeclType __ret; \
511-
__TmpType __tmp; \
512-
asm(__AsmOp " {%0,%1,%2,%3}, [%4];" \
513-
: __AsmType(__tmp.x), __AsmType(__tmp.y), __AsmType(__tmp.z), \
514-
__AsmType(__tmp.w) \
515-
: "l"(__ptr)__Clobber); \
516-
using __ElementType = decltype(__ret.x); \
517-
__ret.x = (__ElementType)__tmp.x; \
518-
__ret.y = (__ElementType)__tmp.y; \
519-
__ret.z = (__ElementType)__tmp.z; \
520-
__ret.w = (__ElementType)__tmp.w; \
521-
return __ret; \
522-
}
523-
524-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", char, unsigned int, "=r", );
525-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", signed char, unsigned int, "=r", );
526-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s16", short, unsigned short, "=h", );
527-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s32", int, unsigned int, "=r", );
528-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s64", long long, unsigned long long,
529-
"=l", );
530-
531-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s8", char2, int2, "=r", );
532-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s8", char4, int4, "=r", );
533-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s16", short2, short2, "=h", );
534-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s16", short4, short4, "=h", );
535-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s32", int2, int2, "=r", );
536-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s32", int4, int4, "=r", );
537-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s64 ", longlong2, longlong2, "=l", );
538-
539-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u8", unsigned char, unsigned int,
540-
"=r", );
541-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u16", unsigned short, unsigned short,
542-
"=h", );
543-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u32", unsigned int, unsigned int,
544-
"=r", );
545-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u64", unsigned long long,
546-
unsigned long long, "=l", );
547-
548-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u8", uchar2, int2, "=r", );
549-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u8", uchar4, int4, "=r", );
550-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u16", ushort2, ushort2, "=h", );
551-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u16", ushort4, ushort4, "=h", );
552-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u32", uint2, uint2, "=r", );
553-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u32", uint4, uint4, "=r", );
554-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u64", ulonglong2, ulonglong2,
555-
"=l", );
556-
557-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.f32", float, float, "=f", );
558-
__INTRINSIC_LOAD(__ldcg, "ld.global.cg.f64", double, double, "=d", );
559-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.f32", float2, float2, "=f", );
560-
__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.f32", float4, float4, "=f", );
561-
__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.f64", double2, double2, "=d", );
562-
563-
inline __device__ long __ldcg(const long *__ptr) {
564-
unsigned long __ret;
565-
if (sizeof(long) == 8) {
566-
asm("ld.global.cg.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr));
567-
} else {
568-
asm("ld.global.cg.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr));
569-
}
570-
return (long)__ret;
571-
}
572-
573-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u8", unsigned char, unsigned int,
574-
"=r", : "memory");
575-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u16", unsigned short, unsigned short,
576-
"=h", : "memory");
577-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u32", unsigned int, unsigned int,
578-
"=r", : "memory");
579-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u64", unsigned long long,
580-
unsigned long long, "=l", : "memory");
581-
582-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s8", char, unsigned int,
583-
"=r", : "memory");
584-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s8", signed char, unsigned int,
585-
"=r", : "memory");
586-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s16", short, unsigned short,
587-
"=h", : "memory");
588-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s32", int, unsigned int,
589-
"=r", : "memory");
590-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s64", long long, unsigned long long,
591-
"=l", : "memory");
592-
593-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u8", uchar2, uint2,
594-
"=r", : "memory");
595-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u8", uchar4, uint4,
596-
"=r", : "memory");
597-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u16", ushort2, ushort2,
598-
"=h", : "memory");
599-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u16", ushort4, ushort4,
600-
"=h", : "memory");
601-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u32", uint2, uint2,
602-
"=r", : "memory");
603-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u32", uint4, uint4,
604-
"=r", : "memory");
605-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u64", ulonglong2, ulonglong2,
606-
"=l", : "memory");
607-
608-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s8", char2, int2, "=r", : "memory");
609-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s8", char4, int4, "=r", : "memory");
610-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s16", short2, short2,
611-
"=h", : "memory");
612-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s16", short4, short4,
613-
"=h", : "memory");
614-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s32", int2, int2, "=r", : "memory");
615-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s32", int4, int4, "=r", : "memory");
616-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s64", longlong2, longlong2,
617-
"=l", : "memory");
618-
619-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.f32", float, float, "=f", : "memory");
620-
__INTRINSIC_LOAD(__ldcv, "ld.global.cv.f64", double, double, "=d", : "memory");
621-
622-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.f32", float2, float2,
623-
"=f", : "memory");
624-
__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.f32", float4, float4,
625-
"=f", : "memory");
626-
__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.f64", double2, double2,
627-
"=d", : "memory");
628-
629-
inline __device__ long __ldcv(const long *__ptr) {
630-
unsigned long __ret;
631-
if (sizeof(long) == 8) {
632-
asm("ld.global.cv.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr));
633-
} else {
634-
asm("ld.global.cv.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr));
635-
}
636-
return (long)__ret;
637-
}
638-
639-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s8", char, unsigned int, "=r", );
640-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s8", signed char, signed int, "=r", );
641-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s16", short, unsigned short, "=h", );
642-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s32", int, unsigned int, "=r", );
643-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s64", long long, unsigned long long,
644-
"=l", );
645-
646-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s8", char2, int2, "=r", );
647-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s8", char4, int4, "=r", );
648-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s16", short2, short2, "=h", );
649-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s16", short4, short4, "=h", );
650-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s32", int2, int2, "=r", );
651-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s32", int4, int4, "=r", );
652-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s64", longlong2, longlong2, "=l", );
653-
654-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u8", unsigned char, unsigned int,
655-
"=r", );
656-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u16", unsigned short, unsigned short,
657-
"=h", );
658-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u32", unsigned int, unsigned int,
659-
"=r", );
660-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u64", unsigned long long,
661-
unsigned long long, "=l", );
662-
663-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u8", uchar2, uint2, "=r", );
664-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u8", uchar4, uint4, "=r", );
665-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u16", ushort2, ushort2, "=h", );
666-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u16", ushort4, ushort4, "=h", );
667-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u32", uint2, uint2, "=r", );
668-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u32", uint4, uint4, "=r", );
669-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u64", ulonglong2, ulonglong2,
670-
"=l", );
671-
672-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.f32", float, float, "=f", );
673-
__INTRINSIC_LOAD(__ldcs, "ld.global.cs.f64", double, double, "=d", );
674-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.f32", float2, float2, "=f", );
675-
__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.f32", float4, float4, "=f", );
676-
__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.f64", double2, double2, "=d", );
677-
678-
#pragma pop_macro("__INTRINSIC_LOAD")
679-
#pragma pop_macro("__INTRINSIC_LOAD2")
680-
#pragma pop_macro("__INTRINSIC_LOAD4")
681-
682-
inline __device__ long __ldcs(const long *__ptr) {
683-
unsigned long __ret;
684-
if (sizeof(long) == 8) {
685-
asm("ld.global.cs.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr));
686-
} else {
687-
asm("ld.global.cs.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr));
688-
}
689-
return (long)__ret;
690-
}
691-
692-
#pragma push_macro("__INTRINSIC_STORE")
693-
#define __INTRINSIC_STORE(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType) \
694-
inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) { \
695-
__TmpType __tmp = (__TmpType)__value; \
696-
asm(__AsmOp " [%0], %1;" ::"l"(__ptr), __AsmType(__tmp) : "memory"); \
697-
}
698-
699-
#pragma push_macro("__INTRINSIC_STORE2")
700-
#define __INTRINSIC_STORE2(__FnName, __AsmOp, __DeclType, __TmpType, \
701-
__AsmType) \
702-
inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) { \
703-
__TmpType __tmp; \
704-
using __ElementType = decltype(__tmp.x); \
705-
__tmp.x = (__ElementType)(__value.x); \
706-
__tmp.y = (__ElementType)(__value.y); \
707-
asm(__AsmOp " [%0], {%1,%2};" ::"l"(__ptr), __AsmType(__tmp.x), \
708-
__AsmType(__tmp.y) \
709-
: "memory"); \
710-
}
711-
712-
#pragma push_macro("__INTRINSIC_STORE4")
713-
#define __INTRINSIC_STORE4(__FnName, __AsmOp, __DeclType, __TmpType, \
714-
__AsmType) \
715-
inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) { \
716-
__TmpType __tmp; \
717-
using __ElementType = decltype(__tmp.x); \
718-
__tmp.x = (__ElementType)(__value.x); \
719-
__tmp.y = (__ElementType)(__value.y); \
720-
__tmp.z = (__ElementType)(__value.z); \
721-
__tmp.w = (__ElementType)(__value.w); \
722-
asm(__AsmOp " [%0], {%1,%2,%3,%4};" ::"l"(__ptr), __AsmType(__tmp.x), \
723-
__AsmType(__tmp.y), __AsmType(__tmp.z), __AsmType(__tmp.w) \
724-
: "memory"); \
725-
}
726-
727-
__INTRINSIC_STORE(__stwt, "st.global.wt.s8", char, int, "r");
728-
__INTRINSIC_STORE(__stwt, "st.global.wt.s8", signed char, int, "r");
729-
__INTRINSIC_STORE(__stwt, "st.global.wt.s16", short, short, "h");
730-
__INTRINSIC_STORE(__stwt, "st.global.wt.s32", int, int, "r");
731-
__INTRINSIC_STORE(__stwt, "st.global.wt.s64", long long, long long, "l");
732-
733-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s8", char2, int2, "r");
734-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s8", char4, int4, "r");
735-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s16", short2, short2, "h");
736-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s16", short4, short4, "h");
737-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s32", int2, int2, "r");
738-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s32", int4, int4, "r");
739-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s64", longlong2, longlong2, "l");
740-
741-
__INTRINSIC_STORE(__stwt, "st.global.wt.u8", unsigned char, int, "r");
742-
__INTRINSIC_STORE(__stwt, "st.global.wt.u16", unsigned short, unsigned short,
743-
"h");
744-
__INTRINSIC_STORE(__stwt, "st.global.wt.u32", unsigned int, unsigned int, "r");
745-
__INTRINSIC_STORE(__stwt, "st.global.wt.u64", unsigned long long,
746-
unsigned long long, "l");
747-
748-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u8", uchar2, uchar2, "r");
749-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u8", uchar4, uint4, "r");
750-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u16", ushort2, ushort2, "h");
751-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u16", ushort4, ushort4, "h");
752-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u32", uint2, uint2, "r");
753-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u32", uint4, uint4, "r");
754-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u64", ulonglong2, ulonglong2, "l");
755-
756-
__INTRINSIC_STORE(__stwt, "st.global.wt.f32", float, float, "f");
757-
__INTRINSIC_STORE(__stwt, "st.global.wt.f64", double, double, "d");
758-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.f32", float2, float2, "f");
759-
__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.f32", float4, float4, "f");
760-
__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.f64", double2, double2, "d");
761-
762-
#pragma pop_macro("__INTRINSIC_STORE")
763-
#pragma pop_macro("__INTRINSIC_STORE2")
764-
#pragma pop_macro("__INTRINSIC_STORE4")
765-
766482
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
767483

768484
#if CUDA_VERSION >= 11000

0 commit comments

Comments
 (0)