Skip to content

Commit cf005a3

Browse files
[AArch64] Add support for -mlong-calls code generation
This patch implements backend support for -mlong-calls on AArch64 targets. When enabled, calls to external functions are lowered to an indirect call via an address computed using `adrp` and `add` rather than a direct `bl` instruction, which is limited to a ±128MB PC-relative offset. This is particularly useful when code and/or data exceeds the 26-bit immediate range of `bl`, such as in large binaries or link-time-optimized builds. Key changes: - In SelectionDAG lowering (`LowerCall`), detect `-mlong-calls` and emit: - `adrp + add` address calculation - `blr` indirect call instruction This patch ensures that long-calls are emitted correctly for both GlobalAddress and ExternalSymbol call targets. Tested: - New codegen tests under `llvm/test/CodeGen/AArch64/aarch64-long-calls.ll` - Verified `adrp + add + blr` output in `.s` for global and external functions
1 parent 2b3e07f commit cf005a3

File tree

8 files changed

+103
-8
lines changed

8 files changed

+103
-8
lines changed

clang/lib/Driver/ToolChains/Arch/AArch64.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,12 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
466466

467467
if (Args.getLastArg(options::OPT_mno_bti_at_return_twice))
468468
Features.push_back("+no-bti-at-return-twice");
469+
470+
if (Arg *A = Args.getLastArg(options::OPT_mlong_calls,
471+
options::OPT_mno_long_calls)) {
472+
if (A->getOption().matches(options::OPT_mlong_calls))
473+
Features.push_back("+long-calls");
474+
}
469475
}
470476

471477
void aarch64::setPAuthABIInTriple(const Driver &D, const ArgList &Args,

llvm/lib/Target/AArch64/AArch64Features.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,10 @@ def FeatureDisableFastIncVL : SubtargetFeature<"disable-fast-inc-vl",
825825
"HasDisableFastIncVL", "true",
826826
"Do not prefer INC/DEC, ALL, { 1, 2, 4 } over ADDVL">;
827827

828+
def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
829+
"Generate calls via indirect call "
830+
"instructions">;
831+
828832
//===----------------------------------------------------------------------===//
829833
// Architectures.
830834
//

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9286,8 +9286,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
92869286
Callee = DAG.getTargetGlobalAddress(CalledGlobal, DL, PtrVT, 0, OpFlags);
92879287
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
92889288
} else {
9289-
const GlobalValue *GV = G->getGlobal();
9290-
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
9289+
if (Subtarget->genLongCalls())
9290+
Callee = getAddr(G, DAG, OpFlags);
9291+
else {
9292+
const GlobalValue *GV = G->getGlobal();
9293+
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
9294+
}
92919295
}
92929296
} else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
92939297
bool UseGot = (getTargetMachine().getCodeModel() == CodeModel::Large &&
@@ -9298,7 +9302,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
92989302
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
92999303
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
93009304
} else {
9301-
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
9305+
if (Subtarget->genLongCalls())
9306+
Callee = getAddr(S, DAG, 0);
9307+
else
9308+
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
93029309
}
93039310
}
93049311

llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,6 +1351,17 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
13511351
DstOp(getLLTForType(*F.getType(), DL)).addDefToMIB(MRI, MIB);
13521352
MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_GOT);
13531353
Info.Callee = MachineOperand::CreateReg(MIB.getReg(0), false);
1354+
} else if (Subtarget.genLongCalls()) {
1355+
// If -mlong-calls are enabled, materialize the symbol/global with MO_PAGE
1356+
// to allow ADRP+LDR relocation sequence for calls beyond 128MB range.
1357+
auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_GLOBAL_VALUE);
1358+
DstOp(getLLTForType(*F.getType(), DL)).addDefToMIB(MRI, MIB);
1359+
if (Info.Callee.isGlobal()) {
1360+
const GlobalValue *GV = Info.Callee.getGlobal();
1361+
MIB.addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
1362+
} else if (Info.Callee.isSymbol())
1363+
MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_PAGE);
1364+
Info.Callee = MachineOperand::CreateReg(MIB.getReg(0), false);
13541365
}
13551366
Opc = getCallOpcode(MF, Info.Callee.isReg(), false, Info.PAI, MRI);
13561367
}

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2706,6 +2706,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
27062706
"Expected small code model");
27072707
auto Op1 = BaseMI->getOperand(1);
27082708
auto Op2 = I.getOperand(2);
2709+
if (Subtarget->genLongCalls() && Op1.isSymbol()) {
2710+
auto MovAddr =
2711+
MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2712+
.addExternalSymbol(Op1.getSymbolName(), Op1.getTargetFlags())
2713+
.addExternalSymbol(Op2.getSymbolName(), Op2.getTargetFlags());
2714+
I.eraseFromParent();
2715+
return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2716+
}
27092717
auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
27102718
.addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
27112719
Op1.getTargetFlags())

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,15 +1571,21 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
15711571
// By splitting this here, we can optimize accesses in the small code model by
15721572
// folding in the G_ADD_LOW into the load/store offset.
15731573
auto &GlobalOp = MI.getOperand(1);
1574-
// Don't modify an intrinsic call.
1575-
if (GlobalOp.isSymbol())
1576-
return true;
1574+
if (GlobalOp.isSymbol()) {
1575+
// For symbol operands, use long call expansion if required.
1576+
return ST->genLongCalls()
1577+
? legalizeSmallCMSymbol(MI, MRI, MIRBuilder, Observer)
1578+
: true;
1579+
}
15771580
const auto* GV = GlobalOp.getGlobal();
15781581
if (GV->isThreadLocal())
15791582
return true; // Don't want to modify TLS vars.
15801583

15811584
auto &TM = ST->getTargetLowering()->getTargetMachine();
1582-
unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
1585+
unsigned OpFlags = 0;
1586+
1587+
if (!ST->genLongCalls())
1588+
OpFlags = ST->ClassifyGlobalReference(GV, TM);
15831589

15841590
if (OpFlags & AArch64II::MO_GOT)
15851591
return true;
@@ -1621,6 +1627,30 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
16211627
return true;
16221628
}
16231629

1630+
bool AArch64LegalizerInfo::legalizeSmallCMSymbol(
1631+
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
1632+
GISelChangeObserver &Observer) const {
1633+
assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
1634+
// We do this custom legalization to convert G_GLOBAL_VALUE into target ADRP +
1635+
// G_ADD_LOW instructions.
1636+
// By splitting this here, we can optimize accesses in the small code model by
1637+
// folding in the G_ADD_LOW into the load/store offset.
1638+
auto &SymbolOp = MI.getOperand(1);
1639+
1640+
Register DstReg = MI.getOperand(0).getReg();
1641+
auto ADRP =
1642+
MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
1643+
.addExternalSymbol(SymbolOp.getSymbolName(), AArch64II::MO_PAGE);
1644+
// Set the regclass on the dest reg too.
1645+
MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
1646+
1647+
MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
1648+
.addExternalSymbol(SymbolOp.getSymbolName(),
1649+
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1650+
MI.eraseFromParent();
1651+
return true;
1652+
}
1653+
16241654
bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
16251655
MachineInstr &MI) const {
16261656
MachineIRBuilder &MIB = Helper.MIRBuilder;

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ class AArch64LegalizerInfo : public LegalizerInfo {
4242
bool legalizeShlAshrLshr(MachineInstr &MI, MachineRegisterInfo &MRI,
4343
MachineIRBuilder &MIRBuilder,
4444
GISelChangeObserver &Observer) const;
45-
45+
bool legalizeSmallCMSymbol(MachineInstr &MI, MachineRegisterInfo &MRI,
46+
MachineIRBuilder &MIRBuilder,
47+
GISelChangeObserver &Observer) const;
4648
bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
4749
MachineIRBuilder &MIRBuilder,
4850
GISelChangeObserver &Observer) const;
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; RUN: llc -O2 -mtriple=aarch64-linux-gnu -mcpu=generic -mattr=+long-calls < %s | FileCheck %s
2+
; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mcpu=generic -mattr=+long-calls < %s | FileCheck %s
3+
4+
declare void @far_func()
5+
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
6+
7+
define void @test() {
8+
entry:
9+
call void @far_func()
10+
ret void
11+
}
12+
13+
define void @test2(ptr %dst, i8 %val, i64 %len) {
14+
entry:
15+
call void @llvm.memset.p0.i64(ptr %dst, i8 %val, i64 %len, i1 false)
16+
ret void
17+
}
18+
19+
; CHECK-LABEL: test:
20+
; CHECK: adrp {{x[0-9]+}}, far_func
21+
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, :lo12:far_func
22+
; CHECK: blr {{x[0-9]+}}
23+
24+
; CHECK-LABEL: test2:
25+
; CHECK: adrp {{x[0-9]+}}, memset
26+
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, :lo12:memset
27+
; CHECK: blr {{x[0-9]+}}

0 commit comments

Comments
 (0)