Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 2efa31b

Browse files
committedFeb 1, 2019
Auto merge of #57937 - denzp:nvptx, r=nagisa
NVPTX target specification This change adds a built-in `nvptx64-nvidia-cuda` GPGPU no-std target specification and a basic PTX assembly smoke tests. The approach is taken here and the target spec is based on `ptx-linker`, a project started about 1.5 years ago. Key feature: bitcode object files being linked with LTO into the final module on the linker's side. Prior to this change, the linker used a `ld` linker-flavor, but I think, having the special CLI convention is a more reliable way. Questions about further progress on reliable CUDA workflow with Rust: 1. Is it possible to create a test suite `codegen-asm` to verify end-to-end integration with LLVM backend? 1. How would it be better to organise no-std `compile-fail` tests: add `#![no_std]` where possible and mark others as `ignore-nvptx` directive, or alternatively, introduce `compile-fail-no-std` test suite? 1. Can we have the `ptx-linker` eventually be integrated as `rls` or `clippy`? Hopefully, this should allow to statically link against LLVM used in Rust and get rid of the [current hacky solution](https://github.com/denzp/rustc-llvm-proxy). 1. Am I missing some methods from `rustc_codegen_ssa::back::linker::Linker` that can be useful for bitcode-only linking? Currently, there are no major public CUDA projects written in Rust I'm aware of, but I'm expecting to have a built-in target will create a solid foundation for further experiments and awesome crates. Related to #38789 Fixes #38787 Fixes #38786
2 parents 852701a + 49931fd commit 2efa31b

File tree

19 files changed

+483
-61
lines changed

19 files changed

+483
-61
lines changed
 

‎.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ matrix:
168168
if: branch = auto
169169
- env: IMAGE=i686-gnu-nopt
170170
if: branch = auto
171-
- env: IMAGE=wasm32-unknown
171+
- env: IMAGE=test-various
172172
if: branch = auto
173173
- env: IMAGE=x86_64-gnu
174174
if: branch = auto

‎src/bootstrap/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,7 @@ impl Build {
831831
!target.contains("msvc") &&
832832
!target.contains("emscripten") &&
833833
!target.contains("wasm32") &&
834+
!target.contains("nvptx") &&
834835
!target.contains("fuchsia") {
835836
Some(self.cc(target))
836837
} else {

‎src/bootstrap/sanity.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ pub fn check(build: &mut Build) {
156156
panic!("the iOS target is only supported on macOS");
157157
}
158158

159-
if target.contains("-none-") {
159+
if target.contains("-none-") || target.contains("nvptx") {
160160
if build.no_std(*target).is_none() {
161161
let target = build.config.target_config.entry(target.clone())
162162
.or_default();
@@ -165,7 +165,7 @@ pub fn check(build: &mut Build) {
165165
}
166166

167167
if build.no_std(*target) == Some(false) {
168-
panic!("All the *-none-* targets are no-std targets")
168+
panic!("All the *-none-* and nvptx* targets are no-std targets")
169169
}
170170
}
171171

‎src/ci/docker/dist-various-2/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ ENV TARGETS=$TARGETS,x86_64-sun-solaris
7070
ENV TARGETS=$TARGETS,x86_64-unknown-linux-gnux32
7171
ENV TARGETS=$TARGETS,x86_64-unknown-cloudabi
7272
ENV TARGETS=$TARGETS,x86_64-fortanix-unknown-sgx
73+
ENV TARGETS=$TARGETS,nvptx64-nvidia-cuda
7374

7475
ENV X86_FORTANIX_SGX_LIBS="/x86_64-fortanix-unknown-sgx/lib/"
7576

‎src/ci/docker/wasm32-unknown/Dockerfile renamed to ‎src/ci/docker/test-various/Dockerfile

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1313
gdb \
1414
xz-utils
1515

16+
# FIXME: build the `ptx-linker` instead.
17+
RUN curl -sL https://github.com/denzp/rust-ptx-linker/releases/download/v0.9.0-alpha.2/rust-ptx-linker.linux64.tar.gz | \
18+
tar -xzvC /usr/bin
19+
1620
RUN curl -sL https://nodejs.org/dist/v9.2.0/node-v9.2.0-linux-x64.tar.xz | \
17-
tar -xJ
21+
tar -xJ
1822

1923
COPY scripts/sccache.sh /scripts/
2024
RUN sh /scripts/sccache.sh
2125

22-
ENV TARGETS=wasm32-unknown-unknown
23-
2426
ENV RUST_CONFIGURE_ARGS \
2527
--set build.nodejs=/node-v9.2.0-linux-x64/bin/node \
2628
--set rust.lld
@@ -31,11 +33,18 @@ ENV RUST_CONFIGURE_ARGS \
3133
# other contexts as well
3234
ENV NO_DEBUG_ASSERTIONS=1
3335

34-
ENV SCRIPT python2.7 /checkout/x.py test --target $TARGETS \
36+
ENV WASM_TARGETS=wasm32-unknown-unknown
37+
ENV WASM_SCRIPT python2.7 /checkout/x.py test --target $WASM_TARGETS \
3538
src/test/run-make \
3639
src/test/ui \
3740
src/test/run-pass \
3841
src/test/compile-fail \
3942
src/test/mir-opt \
4043
src/test/codegen-units \
41-
src/libcore \
44+
src/libcore
45+
46+
ENV NVPTX_TARGETS=nvptx64-nvidia-cuda
47+
ENV NVPTX_SCRIPT python2.7 /checkout/x.py test --target $NVPTX_TARGETS \
48+
src/test/run-make
49+
50+
ENV SCRIPT $WASM_SCRIPT && $NVPTX_SCRIPT

‎src/librustc/ty/context.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,6 +1675,12 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
16751675
}
16761676
false
16771677
}
1678+
1679+
/// Determine whether identifiers in the assembly have strict naming rules.
1680+
/// Currently, only NVPTX* targets need it.
1681+
pub fn has_strict_asm_symbol_naming(&self) -> bool {
1682+
self.gcx.sess.target.target.arch.contains("nvptx")
1683+
}
16781684
}
16791685

16801686
impl<'a, 'tcx> TyCtxt<'a, 'tcx, 'tcx> {

‎src/librustc_codegen_ssa/back/link.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ pub fn linker_and_flavor(sess: &Session) -> (PathBuf, LinkerFlavor) {
149149
LinkerFlavor::Ld => "ld",
150150
LinkerFlavor::Msvc => "link.exe",
151151
LinkerFlavor::Lld(_) => "lld",
152+
LinkerFlavor::PtxLinker => "rust-ptx-linker",
152153
}), flavor)),
153154
(Some(linker), None) => {
154155
let stem = if linker.extension().and_then(|ext| ext.to_str()) == Some("exe") {

‎src/librustc_codegen_ssa/back/linker.rs

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use rustc::hir::def_id::{LOCAL_CRATE, CrateNum};
1313
use rustc::middle::dependency_format::Linkage;
1414
use rustc::session::Session;
1515
use rustc::session::config::{self, CrateType, OptLevel, DebugInfo,
16-
CrossLangLto};
16+
CrossLangLto, Lto};
1717
use rustc::ty::TyCtxt;
1818
use rustc_target::spec::{LinkerFlavor, LldFlavor};
1919
use serialize::{json, Encoder};
@@ -83,6 +83,10 @@ impl LinkerInfo {
8383
LinkerFlavor::Lld(LldFlavor::Wasm) => {
8484
Box::new(WasmLd::new(cmd, sess, self)) as Box<dyn Linker>
8585
}
86+
87+
LinkerFlavor::PtxLinker => {
88+
Box::new(PtxLinker { cmd, sess }) as Box<dyn Linker>
89+
}
8690
}
8791
}
8892
}
@@ -1080,3 +1084,129 @@ fn exported_symbols(tcx: TyCtxt, crate_type: CrateType) -> Vec<String> {
10801084

10811085
symbols
10821086
}
1087+
1088+
/// Much simplified and explicit CLI for the NVPTX linker. The linker operates
1089+
/// with bitcode and uses LLVM backend to generate a PTX assembly.
1090+
pub struct PtxLinker<'a> {
1091+
cmd: Command,
1092+
sess: &'a Session,
1093+
}
1094+
1095+
impl<'a> Linker for PtxLinker<'a> {
1096+
fn link_rlib(&mut self, path: &Path) {
1097+
self.cmd.arg("--rlib").arg(path);
1098+
}
1099+
1100+
fn link_whole_rlib(&mut self, path: &Path) {
1101+
self.cmd.arg("--rlib").arg(path);
1102+
}
1103+
1104+
fn include_path(&mut self, path: &Path) {
1105+
self.cmd.arg("-L").arg(path);
1106+
}
1107+
1108+
fn debuginfo(&mut self) {
1109+
self.cmd.arg("--debug");
1110+
}
1111+
1112+
fn add_object(&mut self, path: &Path) {
1113+
self.cmd.arg("--bitcode").arg(path);
1114+
}
1115+
1116+
fn args(&mut self, args: &[String]) {
1117+
self.cmd.args(args);
1118+
}
1119+
1120+
fn optimize(&mut self) {
1121+
match self.sess.lto() {
1122+
Lto::Thin | Lto::Fat | Lto::ThinLocal => {
1123+
self.cmd.arg("-Olto");
1124+
},
1125+
1126+
Lto::No => { },
1127+
};
1128+
}
1129+
1130+
fn output_filename(&mut self, path: &Path) {
1131+
self.cmd.arg("-o").arg(path);
1132+
}
1133+
1134+
fn finalize(&mut self) -> Command {
1135+
// Provide the linker with fallback to internal `target-cpu`.
1136+
self.cmd.arg("--fallback-arch").arg(match self.sess.opts.cg.target_cpu {
1137+
Some(ref s) => s,
1138+
None => &self.sess.target.target.options.cpu
1139+
});
1140+
1141+
::std::mem::replace(&mut self.cmd, Command::new(""))
1142+
}
1143+
1144+
fn link_dylib(&mut self, _lib: &str) {
1145+
panic!("external dylibs not supported")
1146+
}
1147+
1148+
fn link_rust_dylib(&mut self, _lib: &str, _path: &Path) {
1149+
panic!("external dylibs not supported")
1150+
}
1151+
1152+
fn link_staticlib(&mut self, _lib: &str) {
1153+
panic!("staticlibs not supported")
1154+
}
1155+
1156+
fn link_whole_staticlib(&mut self, _lib: &str, _search_path: &[PathBuf]) {
1157+
panic!("staticlibs not supported")
1158+
}
1159+
1160+
fn framework_path(&mut self, _path: &Path) {
1161+
panic!("frameworks not supported")
1162+
}
1163+
1164+
fn link_framework(&mut self, _framework: &str) {
1165+
panic!("frameworks not supported")
1166+
}
1167+
1168+
fn position_independent_executable(&mut self) {
1169+
}
1170+
1171+
fn full_relro(&mut self) {
1172+
}
1173+
1174+
fn partial_relro(&mut self) {
1175+
}
1176+
1177+
fn no_relro(&mut self) {
1178+
}
1179+
1180+
fn build_static_executable(&mut self) {
1181+
}
1182+
1183+
fn gc_sections(&mut self, _keep_metadata: bool) {
1184+
}
1185+
1186+
fn pgo_gen(&mut self) {
1187+
}
1188+
1189+
fn no_default_libraries(&mut self) {
1190+
}
1191+
1192+
fn build_dylib(&mut self, _out_filename: &Path) {
1193+
}
1194+
1195+
fn export_symbols(&mut self, _tmpdir: &Path, _crate_type: CrateType) {
1196+
}
1197+
1198+
fn subsystem(&mut self, _subsystem: &str) {
1199+
}
1200+
1201+
fn no_position_independent_executable(&mut self) {
1202+
}
1203+
1204+
fn group_start(&mut self) {
1205+
}
1206+
1207+
fn group_end(&mut self) {
1208+
}
1209+
1210+
fn cross_lang_lto(&mut self) {
1211+
}
1212+
}

‎src/librustc_codegen_utils/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#![feature(nll)]
1313
#![allow(unused_attributes)]
1414
#![feature(rustc_diagnostic_macros)]
15+
#![feature(in_band_lifetimes)]
1516

1617
#![recursion_limit="256"]
1718

‎src/librustc_codegen_utils/symbol_names.rs

Lines changed: 75 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ fn get_symbol_hash<'a, 'tcx>(
221221
}
222222

223223
fn def_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, def_id: DefId) -> ty::SymbolName {
224-
let mut buffer = SymbolPathBuffer::new();
224+
let mut buffer = SymbolPathBuffer::new(tcx);
225225
item_path::with_forced_absolute_paths(|| {
226226
tcx.push_item_path(&mut buffer, def_id, false);
227227
});
@@ -317,7 +317,7 @@ fn compute_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, instance: Instance
317317

318318
let hash = get_symbol_hash(tcx, def_id, instance, instance_ty, substs);
319319

320-
let mut buf = SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id));
320+
let mut buf = SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id), tcx);
321321

322322
if instance.is_vtable_shim() {
323323
buf.push("{{vtable-shim}}");
@@ -343,22 +343,25 @@ fn compute_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, instance: Instance
343343
struct SymbolPathBuffer {
344344
result: String,
345345
temp_buf: String,
346+
strict_naming: bool,
346347
}
347348

348349
impl SymbolPathBuffer {
349-
fn new() -> Self {
350+
fn new(tcx: TyCtxt<'_, '_, '_>) -> Self {
350351
let mut result = SymbolPathBuffer {
351352
result: String::with_capacity(64),
352353
temp_buf: String::with_capacity(16),
354+
strict_naming: tcx.has_strict_asm_symbol_naming(),
353355
};
354356
result.result.push_str("_ZN"); // _Z == Begin name-sequence, N == nested
355357
result
356358
}
357359

358-
fn from_interned(symbol: ty::SymbolName) -> Self {
360+
fn from_interned(symbol: ty::SymbolName, tcx: TyCtxt<'_, '_, '_>) -> Self {
359361
let mut result = SymbolPathBuffer {
360362
result: String::with_capacity(64),
361363
temp_buf: String::with_capacity(16),
364+
strict_naming: tcx.has_strict_asm_symbol_naming(),
362365
};
363366
result.result.push_str(&symbol.as_str());
364367
result
@@ -375,68 +378,88 @@ impl SymbolPathBuffer {
375378
let _ = write!(self.result, "17h{:016x}E", hash);
376379
self.result
377380
}
378-
}
379381

380-
impl ItemPathBuffer for SymbolPathBuffer {
381-
fn root_mode(&self) -> &RootMode {
382-
const ABSOLUTE: &RootMode = &RootMode::Absolute;
383-
ABSOLUTE
384-
}
385-
386-
fn push(&mut self, text: &str) {
382+
// Name sanitation. LLVM will happily accept identifiers with weird names, but
383+
// gas doesn't!
384+
// gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $
385+
// NVPTX assembly has more strict naming rules than gas, so additionally, dots
386+
// are replaced with '$' there.
387+
fn sanitize_and_append(&mut self, s: &str) {
387388
self.temp_buf.clear();
388-
let need_underscore = sanitize(&mut self.temp_buf, text);
389+
390+
for c in s.chars() {
391+
match c {
392+
// Escape these with $ sequences
393+
'@' => self.temp_buf.push_str("$SP$"),
394+
'*' => self.temp_buf.push_str("$BP$"),
395+
'&' => self.temp_buf.push_str("$RF$"),
396+
'<' => self.temp_buf.push_str("$LT$"),
397+
'>' => self.temp_buf.push_str("$GT$"),
398+
'(' => self.temp_buf.push_str("$LP$"),
399+
')' => self.temp_buf.push_str("$RP$"),
400+
',' => self.temp_buf.push_str("$C$"),
401+
402+
'-' | ':' => if self.strict_naming {
403+
// NVPTX doesn't support these characters in symbol names.
404+
self.temp_buf.push('$')
405+
}
406+
else {
407+
// '.' doesn't occur in types and functions, so reuse it
408+
// for ':' and '-'
409+
self.temp_buf.push('.')
410+
},
411+
412+
'.' => if self.strict_naming {
413+
self.temp_buf.push('$')
414+
}
415+
else {
416+
self.temp_buf.push('.')
417+
},
418+
419+
// These are legal symbols
420+
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '$' => self.temp_buf.push(c),
421+
422+
_ => {
423+
self.temp_buf.push('$');
424+
for c in c.escape_unicode().skip(1) {
425+
match c {
426+
'{' => {}
427+
'}' => self.temp_buf.push('$'),
428+
c => self.temp_buf.push(c),
429+
}
430+
}
431+
}
432+
}
433+
}
434+
435+
let need_underscore = {
436+
// Underscore-qualify anything that didn't start as an ident.
437+
!self.temp_buf.is_empty()
438+
&& self.temp_buf.as_bytes()[0] != '_' as u8
439+
&& !(self.temp_buf.as_bytes()[0] as char).is_xid_start()
440+
};
441+
389442
let _ = write!(
390443
self.result,
391444
"{}",
392445
self.temp_buf.len() + (need_underscore as usize)
393446
);
447+
394448
if need_underscore {
395449
self.result.push('_');
396450
}
451+
397452
self.result.push_str(&self.temp_buf);
398453
}
399454
}
400455

401-
// Name sanitation. LLVM will happily accept identifiers with weird names, but
402-
// gas doesn't!
403-
// gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $
404-
//
405-
// returns true if an underscore must be added at the start
406-
pub fn sanitize(result: &mut String, s: &str) -> bool {
407-
for c in s.chars() {
408-
match c {
409-
// Escape these with $ sequences
410-
'@' => result.push_str("$SP$"),
411-
'*' => result.push_str("$BP$"),
412-
'&' => result.push_str("$RF$"),
413-
'<' => result.push_str("$LT$"),
414-
'>' => result.push_str("$GT$"),
415-
'(' => result.push_str("$LP$"),
416-
')' => result.push_str("$RP$"),
417-
',' => result.push_str("$C$"),
418-
419-
// '.' doesn't occur in types and functions, so reuse it
420-
// for ':' and '-'
421-
'-' | ':' => result.push('.'),
422-
423-
// These are legal symbols
424-
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '.' | '$' => result.push(c),
425-
426-
_ => {
427-
result.push('$');
428-
for c in c.escape_unicode().skip(1) {
429-
match c {
430-
'{' => {}
431-
'}' => result.push('$'),
432-
c => result.push(c),
433-
}
434-
}
435-
}
436-
}
456+
impl ItemPathBuffer for SymbolPathBuffer {
457+
fn root_mode(&self) -> &RootMode {
458+
const ABSOLUTE: &RootMode = &RootMode::Absolute;
459+
ABSOLUTE
437460
}
438461

439-
// Underscore-qualify anything that didn't start as an ident.
440-
!result.is_empty() && result.as_bytes()[0] != '_' as u8
441-
&& !(result.as_bytes()[0] as char).is_xid_start()
462+
fn push(&mut self, text: &str) {
463+
self.sanitize_and_append(text);
464+
}
442465
}

‎src/librustc_target/spec/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ pub enum LinkerFlavor {
7575
Ld,
7676
Msvc,
7777
Lld(LldFlavor),
78+
PtxLinker,
7879
}
7980

8081
#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash,
@@ -143,6 +144,7 @@ flavor_mappings! {
143144
((LinkerFlavor::Gcc), "gcc"),
144145
((LinkerFlavor::Ld), "ld"),
145146
((LinkerFlavor::Msvc), "msvc"),
147+
((LinkerFlavor::PtxLinker), "ptx-linker"),
146148
((LinkerFlavor::Lld(LldFlavor::Wasm)), "wasm-ld"),
147149
((LinkerFlavor::Lld(LldFlavor::Ld64)), "ld64.lld"),
148150
((LinkerFlavor::Lld(LldFlavor::Ld)), "ld.lld"),
@@ -455,6 +457,8 @@ supported_targets! {
455457
("x86_64-fortanix-unknown-sgx", x86_64_fortanix_unknown_sgx),
456458

457459
("x86_64-unknown-uefi", x86_64_unknown_uefi),
460+
461+
("nvptx64-nvidia-cuda", nvptx64_nvidia_cuda),
458462
}
459463

460464
/// Everything `rustc` knows about how to compile for a specific target.
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
use spec::{LinkerFlavor, Target, TargetOptions, TargetResult, PanicStrategy, MergeFunctions};
2+
use spec::abi::Abi;
3+
4+
pub fn target() -> TargetResult {
5+
Ok(Target {
6+
arch: "nvptx64".to_string(),
7+
data_layout: "e-i64:64-i128:128-v16:16-v32:32-n16:32:64".to_string(),
8+
llvm_target: "nvptx64-nvidia-cuda".to_string(),
9+
10+
target_os: "cuda".to_string(),
11+
target_vendor: "nvidia".to_string(),
12+
target_env: String::new(),
13+
14+
linker_flavor: LinkerFlavor::PtxLinker,
15+
16+
target_endian: "little".to_string(),
17+
target_pointer_width: "64".to_string(),
18+
target_c_int_width: "32".to_string(),
19+
20+
options: TargetOptions {
21+
// The linker can be installed from `crates.io`.
22+
linker: Some("rust-ptx-linker".to_string()),
23+
24+
// With `ptx-linker` approach, it can be later overriden via link flags.
25+
cpu: "sm_30".to_string(),
26+
27+
// FIXME: create tests for the atomics.
28+
max_atomic_width: Some(64),
29+
30+
// Unwinding on CUDA is neither feasible nor useful.
31+
panic_strategy: PanicStrategy::Abort,
32+
33+
// Needed to use `dylib` and `bin` crate types and the linker.
34+
dynamic_linking: true,
35+
executables: true,
36+
37+
// Avoid using dylib because it contain metadata not supported
38+
// by LLVM NVPTX backend.
39+
only_cdylib: true,
40+
41+
// Let the `ptx-linker` to handle LLVM lowering into MC / assembly.
42+
obj_is_bitcode: true,
43+
44+
// Convinient and predicable naming scheme.
45+
dll_prefix: "".to_string(),
46+
dll_suffix: ".ptx".to_string(),
47+
exe_suffix: ".ptx".to_string(),
48+
49+
// Disable MergeFunctions LLVM optimisation pass because it can
50+
// produce kernel functions that call other kernel functions.
51+
// This behavior is not supported by PTX ISA.
52+
merge_functions: MergeFunctions::Disabled,
53+
54+
// FIXME: enable compilation tests for the target and
55+
// create the tests for this.
56+
abi_blacklist: vec![
57+
Abi::Cdecl,
58+
Abi::Stdcall,
59+
Abi::Fastcall,
60+
Abi::Vectorcall,
61+
Abi::Thiscall,
62+
Abi::Aapcs,
63+
Abi::Win64,
64+
Abi::SysV64,
65+
Abi::Msp430Interrupt,
66+
Abi::X86Interrupt,
67+
Abi::AmdGpuKernel,
68+
],
69+
70+
.. Default::default()
71+
},
72+
})
73+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
-include ../../run-make-fulldeps/tools.mk
2+
3+
ifeq ($(TARGET),nvptx64-nvidia-cuda)
4+
all:
5+
$(RUSTC) main.rs --crate-type="bin" --target $(TARGET) -O -C link-arg=--arch=sm_60 -o $(TMPDIR)/main.link_arg.ptx
6+
$(RUSTC) main.rs --crate-type="bin" --target $(TARGET) -O -C target-cpu=sm_60 -o $(TMPDIR)/main.target_cpu.ptx
7+
8+
FileCheck main.rs --input-file $(TMPDIR)/main.link_arg.ptx
9+
FileCheck main.rs --input-file $(TMPDIR)/main.target_cpu.ptx
10+
else
11+
all:
12+
endif
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#![no_std]
2+
#![no_main]
3+
#![deny(warnings)]
4+
#![feature(abi_ptx, core_intrinsics)]
5+
6+
// Check the overriden CUDA arch.
7+
// CHECK: .target sm_60
8+
// CHECK: .address_size 64
9+
10+
// Verify that no extra function declarations are present.
11+
// CHECK-NOT: .func
12+
13+
// CHECK-LABEL: .visible .entry top_kernel(
14+
#[no_mangle]
15+
pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
16+
// CHECK: add.s32 %{{r[0-9]+}}, %{{r[0-9]+}}, 5;
17+
*b = *a + 5;
18+
}
19+
20+
// Verify that no extra function definitions are there.
21+
// CHECK-NOT: .func
22+
// CHECK-NOT: .entry
23+
24+
#[panic_handler]
25+
unsafe fn breakpoint_panic_handler(_: &::core::panic::PanicInfo) -> ! {
26+
core::intrinsics::breakpoint();
27+
core::hint::unreachable_unchecked();
28+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
-include ../../run-make-fulldeps/tools.mk
2+
3+
ifeq ($(TARGET),nvptx64-nvidia-cuda)
4+
all:
5+
$(RUSTC) dep.rs --crate-type="rlib" --target $(TARGET)
6+
$(RUSTC) kernel.rs --crate-type="cdylib" -O --target $(TARGET)
7+
FileCheck kernel.rs --input-file $(TMPDIR)/kernel.ptx
8+
else
9+
all:
10+
endif
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#![no_std]
2+
#![deny(warnings)]
3+
4+
#[inline(never)]
5+
#[no_mangle]
6+
pub fn wrapping_external_fn(a: u32) -> u32 {
7+
a.wrapping_mul(a)
8+
}
9+
10+
#[inline(never)]
11+
#[no_mangle]
12+
pub fn panicking_external_fn(a: u32) -> u32 {
13+
a * a
14+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#![no_std]
2+
#![deny(warnings)]
3+
#![feature(abi_ptx, core_intrinsics)]
4+
5+
extern crate dep;
6+
7+
// Verify the default CUDA arch.
8+
// CHECK: .target sm_30
9+
// CHECK: .address_size 64
10+
11+
// Make sure declarations are there.
12+
// CHECK: .func (.param .b32 func_retval0) wrapping_external_fn
13+
// CHECK: .func (.param .b32 func_retval0) panicking_external_fn
14+
// CHECK: .func [[PANIC_HANDLER:_ZN4core9panicking5panic[a-zA-Z0-9]+]]
15+
16+
// CHECK-LABEL: .visible .entry top_kernel(
17+
#[no_mangle]
18+
pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
19+
// CHECK: call.uni (retval0),
20+
// CHECK-NEXT: wrapping_external_fn
21+
// CHECK: ld.param.b32 %[[LHS:r[0-9]+]], [retval0+0];
22+
let lhs = dep::wrapping_external_fn(*a);
23+
24+
// CHECK: call.uni (retval0),
25+
// CHECK-NEXT: panicking_external_fn
26+
// CHECK: ld.param.b32 %[[RHS:r[0-9]+]], [retval0+0];
27+
let rhs = dep::panicking_external_fn(*a);
28+
29+
// CHECK: add.s32 %[[RES:r[0-9]+]], %[[RHS]], %[[LHS]];
30+
// CHECK: st.global.u32 [%{{rd[0-9]+}}], %[[RES]];
31+
*b = lhs + rhs;
32+
}
33+
34+
// Verify that external function bodies are available.
35+
// CHECK-LABEL: .func (.param .b32 func_retval0) wrapping_external_fn
36+
// CHECK: {
37+
// CHECK: st.param.b32 [func_retval0+0], %{{r[0-9]+}};
38+
// CHECK: }
39+
40+
// Also verify panic behavior.
41+
// CHECK-LABEL: .func (.param .b32 func_retval0) panicking_external_fn
42+
// CHECK: {
43+
// CHECK: %{{p[0-9]+}} bra [[PANIC_LABEL:[a-zA-Z0-9_]+]];
44+
// CHECK: [[PANIC_LABEL]]:
45+
// CHECK: call.uni
46+
// CHECK: [[PANIC_HANDLER]]
47+
// CHECK: }
48+
49+
// Verify whether out dummy panic formatter has a correct body.
50+
// CHECK: .func [[PANIC_FMT:_ZN4core9panicking9panic_fmt[a-zA-Z0-9]+]]()
51+
// CHECK: {
52+
// CHECK: trap;
53+
// CHECK: }
54+
55+
#[panic_handler]
56+
unsafe fn breakpoint_panic_handler(_: &::core::panic::PanicInfo) -> ! {
57+
core::intrinsics::breakpoint();
58+
core::hint::unreachable_unchecked();
59+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
-include ../../run-make-fulldeps/tools.mk
2+
3+
ifeq ($(TARGET),nvptx64-nvidia-cuda)
4+
all:
5+
$(RUSTC) kernel.rs --crate-type="rlib" --emit asm,llvm-ir -O --target $(TARGET)
6+
FileCheck kernel.rs --input-file $(TMPDIR)/kernel.s
7+
else
8+
all:
9+
endif
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#![no_std]
2+
#![deny(warnings)]
3+
#![feature(abi_ptx)]
4+
5+
// Verify the default CUDA arch.
6+
// CHECK: .target sm_30
7+
// CHECK: .address_size 64
8+
9+
// Verify function name doesn't contain unacceaptable characters.
10+
// CHECK: .func (.param .b32 func_retval0) [[IMPL_FN:_ZN[a-zA-Z0-9$_]+square[a-zA-Z0-9$_]+]]
11+
12+
// CHECK-LABEL: .visible .entry top_kernel(
13+
#[no_mangle]
14+
pub unsafe extern "ptx-kernel" fn top_kernel(a: *const u32, b: *mut u32) {
15+
// CHECK: call.uni (retval0),
16+
// CHECK-NEXT: [[IMPL_FN]]
17+
*b = deep::private::MyStruct::new(*a).square();
18+
}
19+
20+
pub mod deep {
21+
pub mod private {
22+
pub struct MyStruct<T>(T);
23+
24+
impl MyStruct<u32> {
25+
pub fn new(a: u32) -> Self {
26+
MyStruct(a)
27+
}
28+
29+
#[inline(never)]
30+
pub fn square(&self) -> u32 {
31+
self.0.wrapping_mul(self.0)
32+
}
33+
}
34+
}
35+
}
36+
37+
// Verify that external function bodies are available.
38+
// CHECK: .func (.param .b32 func_retval0) [[IMPL_FN]]
39+
// CHECK: {
40+
// CHECK: mul.lo.s32 %{{r[0-9]+}}, %{{r[0-9]+}}, %{{r[0-9]+}}
41+
// CHECK: }

0 commit comments

Comments
 (0)
Please sign in to comment.