-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[Clang][SME] Refactor checkArmStreamingBuiltin. #145941
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[Clang][SME] Refactor checkArmStreamingBuiltin. #145941
Conversation
Rather than filtering the calling function's features the PR splits the builtin guard into distinct non-streaming and streaming guards that are compared to the active features in full. This has no affect on the current builtin definitions[1] but will allow us in the future to reference SVE features within streaming builtin guards and SME features within non-streaming builtin guards. [1] The change uncovered an issue whereby a couple of builtins where tagged with VerifyRuntimeMode but did not include both streaming and non-streaming guards. Some of those builtins are available in SME2p1 but to keep the PR mostly NFC I've ensured they are only available in non-streaming mode, thus matching the existing implementation.
@llvm/pr-subscribers-clang Author: Paul Walker (paulwalker-arm) ChangesRather than filtering the calling function's features the PR splits the builtin guard into distinct non-streaming and streaming guards that are compared to the active features in full. This has no affect on the current builtin definitions[1] but will allow us in the future to reference SVE features within streaming builtin guards and SME features within non-streaming builtin guards. [1] The change uncovered an issue whereby a couple of builtins where tagged with VerifyRuntimeMode but did not include both streaming and non-streaming guards. Some of those builtins are available in SME2p1 but to keep the PR mostly NFC I've ensured they are only available in non-streaming mode, thus matching the existing implementation. Full diff: https://github.com/llvm/llvm-project/pull/145941.diff 3 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 1b3131be78452..e292b455ece92 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -264,22 +264,22 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>;
}
-multiclass StructLoad<string name, string proto, string i> {
- def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
+multiclass StructLoad<string name, string proto, string i, list<FlagType> f = []> {
+ def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, !listconcat(f, [IsStructLoad])>;
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
- def: SInst<name, proto, "b", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
+ def: SInst<name, proto, "b", MergeNone, i, !listconcat(f, [IsStructLoad])>;
}
}
// Load N-element structure into N vectors (scalar base)
-defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret">;
-defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret">;
-defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret">;
+defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>;
+defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>;
+defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>;
// Load N-element structure into N vectors (scalar base, VL displacement)
-defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret">;
-defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret">;
-defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret">;
+defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>;
+defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>;
+defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>;
// Load one octoword and replicate (scalar base)
let SVETargetGuard = "sve,f64mm", SMETargetGuard = InvalidMode in {
@@ -434,21 +434,21 @@ def SVST1H_SCATTER_INDEX_S : MInst<"svst1h_scatter[_{2}base]_index[_{d}]", "v
def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">;
} // let SVETargetGuard = "sve"
-multiclass StructStore<string name, string proto, string i> {
- def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
+multiclass StructStore<string name, string proto, string i, list<FlagType> f = []> {
+ def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, !listconcat(f, [IsStructStore])>;
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
- def: SInst<name, proto, "b", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
+ def: SInst<name, proto, "b", MergeNone, i, !listconcat(f, [IsStructStore])>;
}
}
// Store N vectors into N-element structure (scalar base)
-defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2">;
-defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3">;
-defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4">;
+defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2", [VerifyRuntimeMode]>;
+defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3", [VerifyRuntimeMode]>;
+defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4", [VerifyRuntimeMode]>;
// Store N vectors into N-element structure (scalar base, VL displacement)
-defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2">;
-defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3">;
-defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">;
+defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2", [VerifyRuntimeMode]>;
+defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3", [VerifyRuntimeMode]>;
+defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4", [VerifyRuntimeMode]>;
// Store one vector, with no truncation, non-temporal (scalar base)
def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">;
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index f7965ff889bba..06f6b90ca2c08 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -569,34 +569,39 @@ static bool checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
// * When compiling for SVE only, the caller must be in non-streaming mode.
// * When compiling for both SVE and SME, the caller can be in either mode.
if (BuiltinType == SemaARM::VerifyRuntimeMode) {
- llvm::StringMap<bool> CallerFeatureMapWithoutSVE;
- S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSVE, FD);
- CallerFeatureMapWithoutSVE["sve"] = false;
+ llvm::StringMap<bool> CallerFeatures;
+ S.Context.getFunctionFeatureMap(CallerFeatures, FD);
// Avoid emitting diagnostics for a function that can never compile.
- if (FnType == SemaARM::ArmStreaming && !CallerFeatureMapWithoutSVE["sme"])
+ if (FnType == SemaARM::ArmStreaming && !CallerFeatures["sme"])
return false;
- llvm::StringMap<bool> CallerFeatureMapWithoutSME;
- S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSME, FD);
- CallerFeatureMapWithoutSME["sme"] = false;
+ const auto FindTopLevelPipe = [](const char *S) {
+ unsigned Depth = 0;
+ unsigned I = 0, E = strlen(S);
+ for (; I < E; ++I) {
+ if (S[I] == '|' && Depth == 0)
+ break;
+ if (S[I] == '(')
+ ++Depth;
+ else if (S[I] == ')')
+ --Depth;
+ }
+ return I;
+ };
+
+ const char *RequiredFeatures =
+ S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID);
+ unsigned PipeIdx = FindTopLevelPipe(RequiredFeatures);
+ assert(PipeIdx != 0 && PipeIdx != strlen(RequiredFeatures) &&
+ "Expected feature string of the form 'SVE-EXPR|SME-EXPR'");
+ StringRef NonStreamingBuiltinGuard = StringRef(RequiredFeatures, PipeIdx);
+ StringRef StreamingBuiltinGuard = StringRef(RequiredFeatures + PipeIdx + 1);
- // We know the builtin requires either some combination of SVE flags, or
- // some combination of SME flags, but we need to figure out which part
- // of the required features is satisfied by the target features.
- //
- // For a builtin with target guard 'sve2p1|sme2', if we compile with
- // '+sve2p1,+sme', then we know that it satisfies the 'sve2p1' part if we
- // evaluate the features for '+sve2p1,+sme,+nosme'.
- //
- // Similarly, if we compile with '+sve2,+sme2', then we know it satisfies
- // the 'sme2' part if we evaluate the features for '+sve2,+sme2,+nosve'.
- StringRef BuiltinTargetGuards(
- S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID));
bool SatisfiesSVE = Builtin::evaluateRequiredTargetFeatures(
- BuiltinTargetGuards, CallerFeatureMapWithoutSME);
+ NonStreamingBuiltinGuard, CallerFeatures);
bool SatisfiesSME = Builtin::evaluateRequiredTargetFeatures(
- BuiltinTargetGuards, CallerFeatureMapWithoutSVE);
+ StreamingBuiltinGuard, CallerFeatures);
if ((SatisfiesSVE && SatisfiesSME) ||
(SatisfiesSVE && FnType == SemaARM::ArmStreamingCompatible))
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index b506546110983..e2da20efaba5a 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1901,6 +1901,9 @@ void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) {
if (!Def->isFlagSet(VerifyRuntimeMode) && !Def->getSVEGuard().empty() &&
!Def->getSMEGuard().empty())
report_fatal_error("Missing VerifyRuntimeMode flag");
+ if (Def->isFlagSet(VerifyRuntimeMode) &&
+ (Def->getSVEGuard().empty() || Def->getSMEGuard().empty()))
+ report_fatal_error("VerifyRuntimeMode requires SVE and SME guards");
if (Def->isFlagSet(IsStreamingFlag))
StreamingMap["ArmStreaming"].insert(Def->getMangledName());
|
@llvm/pr-subscribers-backend-arm Author: Paul Walker (paulwalker-arm) ChangesRather than filtering the calling function's features the PR splits the builtin guard into distinct non-streaming and streaming guards that are compared to the active features in full. This has no affect on the current builtin definitions[1] but will allow us in the future to reference SVE features within streaming builtin guards and SME features within non-streaming builtin guards. [1] The change uncovered an issue whereby a couple of builtins where tagged with VerifyRuntimeMode but did not include both streaming and non-streaming guards. Some of those builtins are available in SME2p1 but to keep the PR mostly NFC I've ensured they are only available in non-streaming mode, thus matching the existing implementation. Full diff: https://github.com/llvm/llvm-project/pull/145941.diff 3 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 1b3131be78452..e292b455ece92 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -264,22 +264,22 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>;
}
-multiclass StructLoad<string name, string proto, string i> {
- def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
+multiclass StructLoad<string name, string proto, string i, list<FlagType> f = []> {
+ def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, !listconcat(f, [IsStructLoad])>;
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
- def: SInst<name, proto, "b", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
+ def: SInst<name, proto, "b", MergeNone, i, !listconcat(f, [IsStructLoad])>;
}
}
// Load N-element structure into N vectors (scalar base)
-defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret">;
-defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret">;
-defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret">;
+defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>;
+defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>;
+defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>;
// Load N-element structure into N vectors (scalar base, VL displacement)
-defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret">;
-defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret">;
-defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret">;
+defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>;
+defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>;
+defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>;
// Load one octoword and replicate (scalar base)
let SVETargetGuard = "sve,f64mm", SMETargetGuard = InvalidMode in {
@@ -434,21 +434,21 @@ def SVST1H_SCATTER_INDEX_S : MInst<"svst1h_scatter[_{2}base]_index[_{d}]", "v
def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">;
} // let SVETargetGuard = "sve"
-multiclass StructStore<string name, string proto, string i> {
- def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
+multiclass StructStore<string name, string proto, string i, list<FlagType> f = []> {
+ def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, !listconcat(f, [IsStructStore])>;
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
- def: SInst<name, proto, "b", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
+ def: SInst<name, proto, "b", MergeNone, i, !listconcat(f, [IsStructStore])>;
}
}
// Store N vectors into N-element structure (scalar base)
-defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2">;
-defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3">;
-defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4">;
+defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2", [VerifyRuntimeMode]>;
+defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3", [VerifyRuntimeMode]>;
+defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4", [VerifyRuntimeMode]>;
// Store N vectors into N-element structure (scalar base, VL displacement)
-defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2">;
-defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3">;
-defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">;
+defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2", [VerifyRuntimeMode]>;
+defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3", [VerifyRuntimeMode]>;
+defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4", [VerifyRuntimeMode]>;
// Store one vector, with no truncation, non-temporal (scalar base)
def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">;
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index f7965ff889bba..06f6b90ca2c08 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -569,34 +569,39 @@ static bool checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
// * When compiling for SVE only, the caller must be in non-streaming mode.
// * When compiling for both SVE and SME, the caller can be in either mode.
if (BuiltinType == SemaARM::VerifyRuntimeMode) {
- llvm::StringMap<bool> CallerFeatureMapWithoutSVE;
- S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSVE, FD);
- CallerFeatureMapWithoutSVE["sve"] = false;
+ llvm::StringMap<bool> CallerFeatures;
+ S.Context.getFunctionFeatureMap(CallerFeatures, FD);
// Avoid emitting diagnostics for a function that can never compile.
- if (FnType == SemaARM::ArmStreaming && !CallerFeatureMapWithoutSVE["sme"])
+ if (FnType == SemaARM::ArmStreaming && !CallerFeatures["sme"])
return false;
- llvm::StringMap<bool> CallerFeatureMapWithoutSME;
- S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSME, FD);
- CallerFeatureMapWithoutSME["sme"] = false;
+ const auto FindTopLevelPipe = [](const char *S) {
+ unsigned Depth = 0;
+ unsigned I = 0, E = strlen(S);
+ for (; I < E; ++I) {
+ if (S[I] == '|' && Depth == 0)
+ break;
+ if (S[I] == '(')
+ ++Depth;
+ else if (S[I] == ')')
+ --Depth;
+ }
+ return I;
+ };
+
+ const char *RequiredFeatures =
+ S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID);
+ unsigned PipeIdx = FindTopLevelPipe(RequiredFeatures);
+ assert(PipeIdx != 0 && PipeIdx != strlen(RequiredFeatures) &&
+ "Expected feature string of the form 'SVE-EXPR|SME-EXPR'");
+ StringRef NonStreamingBuiltinGuard = StringRef(RequiredFeatures, PipeIdx);
+ StringRef StreamingBuiltinGuard = StringRef(RequiredFeatures + PipeIdx + 1);
- // We know the builtin requires either some combination of SVE flags, or
- // some combination of SME flags, but we need to figure out which part
- // of the required features is satisfied by the target features.
- //
- // For a builtin with target guard 'sve2p1|sme2', if we compile with
- // '+sve2p1,+sme', then we know that it satisfies the 'sve2p1' part if we
- // evaluate the features for '+sve2p1,+sme,+nosme'.
- //
- // Similarly, if we compile with '+sve2,+sme2', then we know it satisfies
- // the 'sme2' part if we evaluate the features for '+sve2,+sme2,+nosve'.
- StringRef BuiltinTargetGuards(
- S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID));
bool SatisfiesSVE = Builtin::evaluateRequiredTargetFeatures(
- BuiltinTargetGuards, CallerFeatureMapWithoutSME);
+ NonStreamingBuiltinGuard, CallerFeatures);
bool SatisfiesSME = Builtin::evaluateRequiredTargetFeatures(
- BuiltinTargetGuards, CallerFeatureMapWithoutSVE);
+ StreamingBuiltinGuard, CallerFeatures);
if ((SatisfiesSVE && SatisfiesSME) ||
(SatisfiesSVE && FnType == SemaARM::ArmStreamingCompatible))
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index b506546110983..e2da20efaba5a 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1901,6 +1901,9 @@ void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) {
if (!Def->isFlagSet(VerifyRuntimeMode) && !Def->getSVEGuard().empty() &&
!Def->getSMEGuard().empty())
report_fatal_error("Missing VerifyRuntimeMode flag");
+ if (Def->isFlagSet(VerifyRuntimeMode) &&
+ (Def->getSVEGuard().empty() || Def->getSMEGuard().empty()))
+ report_fatal_error("VerifyRuntimeMode requires SVE and SME guards");
if (Def->isFlagSet(IsStreamingFlag))
StreamingMap["ArmStreaming"].insert(Def->getMangledName());
|
@llvm/pr-subscribers-backend-aarch64 Author: Paul Walker (paulwalker-arm) ChangesRather than filtering the calling function's features the PR splits the builtin guard into distinct non-streaming and streaming guards that are compared to the active features in full. This has no affect on the current builtin definitions[1] but will allow us in the future to reference SVE features within streaming builtin guards and SME features within non-streaming builtin guards. [1] The change uncovered an issue whereby a couple of builtins where tagged with VerifyRuntimeMode but did not include both streaming and non-streaming guards. Some of those builtins are available in SME2p1 but to keep the PR mostly NFC I've ensured they are only available in non-streaming mode, thus matching the existing implementation. Full diff: https://github.com/llvm/llvm-project/pull/145941.diff 3 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 1b3131be78452..e292b455ece92 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -264,22 +264,22 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>;
}
-multiclass StructLoad<string name, string proto, string i> {
- def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
+multiclass StructLoad<string name, string proto, string i, list<FlagType> f = []> {
+ def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, !listconcat(f, [IsStructLoad])>;
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
- def: SInst<name, proto, "b", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
+ def: SInst<name, proto, "b", MergeNone, i, !listconcat(f, [IsStructLoad])>;
}
}
// Load N-element structure into N vectors (scalar base)
-defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret">;
-defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret">;
-defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret">;
+defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>;
+defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>;
+defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>;
// Load N-element structure into N vectors (scalar base, VL displacement)
-defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret">;
-defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret">;
-defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret">;
+defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>;
+defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>;
+defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>;
// Load one octoword and replicate (scalar base)
let SVETargetGuard = "sve,f64mm", SMETargetGuard = InvalidMode in {
@@ -434,21 +434,21 @@ def SVST1H_SCATTER_INDEX_S : MInst<"svst1h_scatter[_{2}base]_index[_{d}]", "v
def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">;
} // let SVETargetGuard = "sve"
-multiclass StructStore<string name, string proto, string i> {
- def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
+multiclass StructStore<string name, string proto, string i, list<FlagType> f = []> {
+ def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, !listconcat(f, [IsStructStore])>;
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
- def: SInst<name, proto, "b", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
+ def: SInst<name, proto, "b", MergeNone, i, !listconcat(f, [IsStructStore])>;
}
}
// Store N vectors into N-element structure (scalar base)
-defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2">;
-defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3">;
-defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4">;
+defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2", [VerifyRuntimeMode]>;
+defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3", [VerifyRuntimeMode]>;
+defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4", [VerifyRuntimeMode]>;
// Store N vectors into N-element structure (scalar base, VL displacement)
-defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2">;
-defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3">;
-defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">;
+defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2", [VerifyRuntimeMode]>;
+defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3", [VerifyRuntimeMode]>;
+defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4", [VerifyRuntimeMode]>;
// Store one vector, with no truncation, non-temporal (scalar base)
def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">;
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index f7965ff889bba..06f6b90ca2c08 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -569,34 +569,39 @@ static bool checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
// * When compiling for SVE only, the caller must be in non-streaming mode.
// * When compiling for both SVE and SME, the caller can be in either mode.
if (BuiltinType == SemaARM::VerifyRuntimeMode) {
- llvm::StringMap<bool> CallerFeatureMapWithoutSVE;
- S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSVE, FD);
- CallerFeatureMapWithoutSVE["sve"] = false;
+ llvm::StringMap<bool> CallerFeatures;
+ S.Context.getFunctionFeatureMap(CallerFeatures, FD);
// Avoid emitting diagnostics for a function that can never compile.
- if (FnType == SemaARM::ArmStreaming && !CallerFeatureMapWithoutSVE["sme"])
+ if (FnType == SemaARM::ArmStreaming && !CallerFeatures["sme"])
return false;
- llvm::StringMap<bool> CallerFeatureMapWithoutSME;
- S.Context.getFunctionFeatureMap(CallerFeatureMapWithoutSME, FD);
- CallerFeatureMapWithoutSME["sme"] = false;
+ const auto FindTopLevelPipe = [](const char *S) {
+ unsigned Depth = 0;
+ unsigned I = 0, E = strlen(S);
+ for (; I < E; ++I) {
+ if (S[I] == '|' && Depth == 0)
+ break;
+ if (S[I] == '(')
+ ++Depth;
+ else if (S[I] == ')')
+ --Depth;
+ }
+ return I;
+ };
+
+ const char *RequiredFeatures =
+ S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID);
+ unsigned PipeIdx = FindTopLevelPipe(RequiredFeatures);
+ assert(PipeIdx != 0 && PipeIdx != strlen(RequiredFeatures) &&
+ "Expected feature string of the form 'SVE-EXPR|SME-EXPR'");
+ StringRef NonStreamingBuiltinGuard = StringRef(RequiredFeatures, PipeIdx);
+ StringRef StreamingBuiltinGuard = StringRef(RequiredFeatures + PipeIdx + 1);
- // We know the builtin requires either some combination of SVE flags, or
- // some combination of SME flags, but we need to figure out which part
- // of the required features is satisfied by the target features.
- //
- // For a builtin with target guard 'sve2p1|sme2', if we compile with
- // '+sve2p1,+sme', then we know that it satisfies the 'sve2p1' part if we
- // evaluate the features for '+sve2p1,+sme,+nosme'.
- //
- // Similarly, if we compile with '+sve2,+sme2', then we know it satisfies
- // the 'sme2' part if we evaluate the features for '+sve2,+sme2,+nosve'.
- StringRef BuiltinTargetGuards(
- S.Context.BuiltinInfo.getRequiredFeatures(BuiltinID));
bool SatisfiesSVE = Builtin::evaluateRequiredTargetFeatures(
- BuiltinTargetGuards, CallerFeatureMapWithoutSME);
+ NonStreamingBuiltinGuard, CallerFeatures);
bool SatisfiesSME = Builtin::evaluateRequiredTargetFeatures(
- BuiltinTargetGuards, CallerFeatureMapWithoutSVE);
+ StreamingBuiltinGuard, CallerFeatures);
if ((SatisfiesSVE && SatisfiesSME) ||
(SatisfiesSVE && FnType == SemaARM::ArmStreamingCompatible))
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index b506546110983..e2da20efaba5a 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1901,6 +1901,9 @@ void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) {
if (!Def->isFlagSet(VerifyRuntimeMode) && !Def->getSVEGuard().empty() &&
!Def->getSMEGuard().empty())
report_fatal_error("Missing VerifyRuntimeMode flag");
+ if (Def->isFlagSet(VerifyRuntimeMode) &&
+ (Def->getSVEGuard().empty() || Def->getSMEGuard().empty()))
+ report_fatal_error("VerifyRuntimeMode requires SVE and SME guards");
if (Def->isFlagSet(IsStreamingFlag))
StreamingMap["ArmStreaming"].insert(Def->getMangledName());
|
ping |
Rather than filtering the calling function's features the PR splits the builtin guard into distinct non-streaming and streaming guards that are compared to the active features in full.
This has no affect on the current builtin definitions[1] but will allow us in the future to reference SVE features within streaming builtin guards and SME features within non-streaming builtin guards.
[1] The change uncovered an issue whereby a couple of builtins where tagged with VerifyRuntimeMode but did not include both streaming and non-streaming guards. Some of those builtins are available in SME2p1 but to keep the PR mostly NFC I've ensured they are only available in non-streaming mode, thus matching the existing implementation.