Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/test/Driver/clang-linker-wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
// CHK-CMDS-AOT-NV-NEXT: sycl-post-link{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc
// CHK-CMDS-AOT-NV-NEXT: clang{{.*}} -o [[CLANGOUT:.*]] -dumpdir a.out.nvptx64.sm_50.img. --target=nvptx64-nvidia-cuda -march={{.*}}
// CHK-CMDS-AOT-NV-NEXT: ptxas{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]]
// CHK-CMDS-AOT-NV-NEXT: fatbinary{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]]
// CHK-CMDS-AOT-NV-NEXT: fatbinary{{.*}} --create [[FATBINOUT:[^ ]+]]{{.*}}[[CLANGOUT]]{{.*}}[[PTXASOUT]]
// CHK-CMDS-AOT-NV-NEXT: offload-wrapper: output: [[WRAPPEROUT:.*]].bc, input: [[FATBINOUT]]
// CHK-CMDS-AOT-NV-NEXT: clang{{.*}} -c -o [[LLCOUT:.*]] [[WRAPPEROUT]]
// CHK-CMDS-AOT-NV-NEXT: "{{.*}}/ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
Expand Down Expand Up @@ -166,7 +166,7 @@
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: clang{{.*}} -c -o [[LLCOUT1:.*]] [[WRAPPEROUT1]]
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: clang{{.*}} -o [[CLANGOUT:.*]] -dumpdir a.out.nvptx64.sm_50.img. --target=nvptx64-nvidia-cuda -march={{.*}}
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: ptxas{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]]
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: fatbinary{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]]
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: fatbinary{{.*}}--create [[FATBINOUT:[^ ]+]]{{.*}}[[CLANGOUT]]{{.*}}[[PTXASOUT]]
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: offload-wrapper: output: [[WRAPPEROUT:.*]].bc, input: [[FATBINOUT]]
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: clang{{.*}} -c -o [[LLCOUT2:.*]] [[WRAPPEROUT]]
// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}/ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT1]] [[LLCOUT2]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o
Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/linker-wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ __attribute__((visibility("protected"), used)) int x;

// CUDA: clang{{.*}} -o [[IMG_SM70:.+]] -dumpdir a.out.nvptx64.sm_70.img. --target=nvptx64-nvidia-cuda -march=sm_70
// CUDA: clang{{.*}} -o [[IMG_SM52:.+]] -dumpdir a.out.nvptx64.sm_52.img. --target=nvptx64-nvidia-cuda -march=sm_52
// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file=[[IMG_SM70]] --image=profile=sm_52,file=[[IMG_SM52]]
// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image{{(3)?}}={{(profile=sm_70,file=|kind=elf,sm=70,file=)}}[[IMG_SM70]] --image{{(3)?}}={{(profile=sm_52,file=|kind=elf,sm=52,file=)}}[[IMG_SM52]]
// CUDA: usr/bin/ld{{.*}} {{.*}}.openmp.image.{{.*}}.o {{.*}}.cuda.image.{{.*}}.o

// RUN: llvm-offload-binary -o %t.out \
Expand Down Expand Up @@ -240,7 +240,7 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-CUDA

// RELOCATABLE-LINK-CUDA: clang{{.*}} -o {{.*}}.img -dumpdir a.out.nvptx64.sm_89.img. --target=nvptx64-nvidia-cuda
// RELOCATABLE-LINK-CUDA: fatbinary{{.*}} -64 --create {{.*}}.fatbin --image=profile=sm_89,file={{.*}}.img
// RELOCATABLE-LINK-CUDA: fatbinary{{.*}} -64 --create {{.*}}.fatbin --image{{(3)?}}={{(profile=sm_89,file=|kind=elf,sm=89,file=)}}{{.*}}.img
// RELOCATABLE-LINK-CUDA: /usr/bin/ld.lld{{.*}}-r
// RELOCATABLE-LINK-CUDA: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading

Expand Down
66 changes: 49 additions & 17 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,9 +457,8 @@ void printVersion(raw_ostream &OS) {
}

namespace nvptx {
Expected<StringRef>
fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
const ArgList &Args) {
Expected<StringRef> fatbinary(ArrayRef<OffloadingImage> Images,
const ArgList &Args) {
llvm::TimeTraceScope TimeScope("NVPTX fatbinary");
// NVPTX uses the fatbinary program to bundle the linked images.
Expected<std::string> FatBinaryPath =
Expand All @@ -481,9 +480,26 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
CmdArgs.push_back(Triple.isArch64Bit() ? "-64" : "-32");
CmdArgs.push_back("--create");
CmdArgs.push_back(*TempFileOrErr);
for (const auto &[File, Arch] : InputFiles)
CmdArgs.push_back(
Args.MakeArgString("--image=profile=" + Arch + ",file=" + File));
for (const OffloadingImage &Image : Images) {
StringRef File = Image.Image->getBufferIdentifier();
StringRef Arch = Image.StringData.lookup("arch");

// Determine the kind based on image type
const char *Kind = "elf";
if (Image.TheImageKind == ImageKind::IMG_PTX)
Kind = "ptx";

// Extract numeric SM value from arch
// Arch can be "sm_75", "compute_75", or just "75"
StringRef SMValue = Arch;
if (Arch.starts_with("sm_"))
SMValue = Arch.drop_front(3);
else if (Arch.starts_with("compute_"))
SMValue = Arch.drop_front(8);

CmdArgs.push_back(Args.MakeArgString("--image3=kind=" + Twine(Kind) +
",sm=" + SMValue + ",file=" + File));
}

if (Error Err = executeCommands(*FatBinaryPath, CmdArgs))
return std::move(Err);
Expand Down Expand Up @@ -1992,12 +2008,7 @@ bundleSYCL(ArrayRef<OffloadingImage> Images) {

Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
bundleCuda(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
for (const OffloadingImage &Image : Images)
InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(),
Image.StringData.lookup("arch")));

auto FileOrErr = nvptx::fatbinary(InputFiles, Args);
auto FileOrErr = nvptx::fatbinary(Images, Args);
if (!FileOrErr)
return FileOrErr.takeError();

Expand Down Expand Up @@ -2279,7 +2290,7 @@ linkAndWrapDeviceFiles(ArrayRef<SmallVector<OffloadFile>> LinkerInputFiles,
}
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) {
SmallVector<StringRef> Files = {SplitModules[I].ModuleFilePath};
SmallVector<std::pair<StringRef, StringRef>, 4> BundlerInputFiles;
SmallVector<OffloadingImage, 4> BundlerImages;
auto ClangOutputOrErr =
linkDevice(Files, LinkerArgs, true /* IsSYCLKind */,
CompileLinkOptionsOrErr->first);
Expand All @@ -2292,14 +2303,35 @@ linkAndWrapDeviceFiles(ArrayRef<SmallVector<OffloadFile>> LinkerInputFiles,
nvptx::ptxas(*ClangOutputOrErr, LinkerArgs, Arch);
if (!PtxasOutputOrErr)
return PtxasOutputOrErr.takeError();
BundlerInputFiles.emplace_back(*ClangOutputOrErr, VirtualArch);
BundlerInputFiles.emplace_back(*PtxasOutputOrErr, Arch);
auto BundledFileOrErr =
nvptx::fatbinary(BundlerInputFiles, LinkerArgs);

// Create OffloadingImage for PTX output
OffloadingImage PtxImage;
PtxImage.TheImageKind = ImageKind::IMG_PTX;
PtxImage.TheOffloadKind = OffloadKind::OFK_Cuda;
PtxImage.StringData["arch"] = VirtualArch;
auto PtxBuffer = MemoryBuffer::getFile(*ClangOutputOrErr);
if (!PtxBuffer)
return createFileError(*ClangOutputOrErr, PtxBuffer.getError());
PtxImage.Image = std::move(*PtxBuffer);
BundlerImages.push_back(std::move(PtxImage));

// Create OffloadingImage for Cubin output
OffloadingImage CubinImage;
CubinImage.TheImageKind = ImageKind::IMG_Cubin;
CubinImage.TheOffloadKind = OffloadKind::OFK_Cuda;
CubinImage.StringData["arch"] = Arch;
auto CubinBuffer = MemoryBuffer::getFile(*PtxasOutputOrErr);
if (!CubinBuffer)
return createFileError(*PtxasOutputOrErr, CubinBuffer.getError());
CubinImage.Image = std::move(*CubinBuffer);
BundlerImages.push_back(std::move(CubinImage));

auto BundledFileOrErr = nvptx::fatbinary(BundlerImages, LinkerArgs);
if (!BundledFileOrErr)
return BundledFileOrErr.takeError();
SplitModules[I].ModuleFilePath = *BundledFileOrErr;
} else if (Triple.isAMDGCN()) {
SmallVector<std::pair<StringRef, StringRef>, 4> BundlerInputFiles;
BundlerInputFiles.emplace_back(*ClangOutputOrErr, Arch);
auto BundledFileOrErr =
amdgcn::fatbinary(BundlerInputFiles, LinkerArgs);
Expand Down
Loading