diff --git a/clang/test/Driver/clang-linker-wrapper.cpp b/clang/test/Driver/clang-linker-wrapper.cpp index 96181605b3c7b..d0f3afdd2600d 100644 --- a/clang/test/Driver/clang-linker-wrapper.cpp +++ b/clang/test/Driver/clang-linker-wrapper.cpp @@ -121,7 +121,7 @@ // CHK-CMDS-AOT-NV-NEXT: sycl-post-link{{.*}} SYCL_POST_LINK_OPTIONS -o [[SYCLPOSTLINKOUT:.*]].table [[SECONDLLVMLINKOUT]].bc // CHK-CMDS-AOT-NV-NEXT: clang{{.*}} -o [[CLANGOUT:.*]] -dumpdir a.out.nvptx64.sm_50.img. --target=nvptx64-nvidia-cuda -march={{.*}} // CHK-CMDS-AOT-NV-NEXT: ptxas{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]] -// CHK-CMDS-AOT-NV-NEXT: fatbinary{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]] +// CHK-CMDS-AOT-NV-NEXT: fatbinary{{.*}} --create [[FATBINOUT:[^ ]+]]{{.*}}[[CLANGOUT]]{{.*}}[[PTXASOUT]] // CHK-CMDS-AOT-NV-NEXT: offload-wrapper: output: [[WRAPPEROUT:.*]].bc, input: [[FATBINOUT]] // CHK-CMDS-AOT-NV-NEXT: clang{{.*}} -c -o [[LLCOUT:.*]] [[WRAPPEROUT]] // CHK-CMDS-AOT-NV-NEXT: "{{.*}}/ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o @@ -166,7 +166,7 @@ // CHK-CMDS-AOT-NV-EMBED-IR-NEXT: clang{{.*}} -c -o [[LLCOUT1:.*]] [[WRAPPEROUT1]] // CHK-CMDS-AOT-NV-EMBED-IR-NEXT: clang{{.*}} -o [[CLANGOUT:.*]] -dumpdir a.out.nvptx64.sm_50.img. --target=nvptx64-nvidia-cuda -march={{.*}} // CHK-CMDS-AOT-NV-EMBED-IR-NEXT: ptxas{{.*}} --output-file [[PTXASOUT:.*]] [[CLANGOUT]] -// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: fatbinary{{.*}} --create [[FATBINOUT:.*]] --image=profile={{.*}},file=[[CLANGOUT]] --image=profile={{.*}},file=[[PTXASOUT]] +// CHK-CMDS-AOT-NV-EMBED-IR-NEXT: fatbinary{{.*}}--create [[FATBINOUT:[^ ]+]]{{.*}}[[CLANGOUT]]{{.*}}[[PTXASOUT]] // CHK-CMDS-AOT-NV-EMBED-IR-NEXT: offload-wrapper: output: [[WRAPPEROUT:.*]].bc, input: [[FATBINOUT]] // CHK-CMDS-AOT-NV-EMBED-IR-NEXT: clang{{.*}} -c -o [[LLCOUT2:.*]] [[WRAPPEROUT]] // CHK-CMDS-AOT-NV-EMBED-IR-NEXT: "{{.*}}/ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[LLCOUT1]] [[LLCOUT2]] HOST_LIB_PATH HOST_STAT_LIB {{.*}}.o diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 07daac39cc7e9..275addc203c26 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -106,7 +106,7 @@ __attribute__((visibility("protected"), used)) int x; // CUDA: clang{{.*}} -o [[IMG_SM70:.+]] -dumpdir a.out.nvptx64.sm_70.img. --target=nvptx64-nvidia-cuda -march=sm_70 // CUDA: clang{{.*}} -o [[IMG_SM52:.+]] -dumpdir a.out.nvptx64.sm_52.img. --target=nvptx64-nvidia-cuda -march=sm_52 -// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file=[[IMG_SM70]] --image=profile=sm_52,file=[[IMG_SM52]] +// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image{{(3)?}}={{(profile=sm_70,file=|kind=elf,sm=70,file=)}}[[IMG_SM70]] --image{{(3)?}}={{(profile=sm_52,file=|kind=elf,sm=52,file=)}}[[IMG_SM52]] // CUDA: usr/bin/ld{{.*}} {{.*}}.openmp.image.{{.*}}.o {{.*}}.cuda.image.{{.*}}.o // RUN: llvm-offload-binary -o %t.out \ @@ -240,7 +240,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-CUDA // RELOCATABLE-LINK-CUDA: clang{{.*}} -o {{.*}}.img -dumpdir a.out.nvptx64.sm_89.img. --target=nvptx64-nvidia-cuda -// RELOCATABLE-LINK-CUDA: fatbinary{{.*}} -64 --create {{.*}}.fatbin --image=profile=sm_89,file={{.*}}.img +// RELOCATABLE-LINK-CUDA: fatbinary{{.*}} -64 --create {{.*}}.fatbin --image{{(3)?}}={{(profile=sm_89,file=|kind=elf,sm=89,file=)}}{{.*}}.img // RELOCATABLE-LINK-CUDA: /usr/bin/ld.lld{{.*}}-r // RELOCATABLE-LINK-CUDA: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 23f94ade52d09..d81451663ab5d 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -457,9 +457,8 @@ void printVersion(raw_ostream &OS) { } namespace nvptx { -Expected -fatbinary(ArrayRef> InputFiles, - const ArgList &Args) { +Expected fatbinary(ArrayRef Images, + const ArgList &Args) { llvm::TimeTraceScope TimeScope("NVPTX fatbinary"); // NVPTX uses the fatbinary program to bundle the linked images. Expected FatBinaryPath = @@ -481,9 +480,26 @@ fatbinary(ArrayRef> InputFiles, CmdArgs.push_back(Triple.isArch64Bit() ? "-64" : "-32"); CmdArgs.push_back("--create"); CmdArgs.push_back(*TempFileOrErr); - for (const auto &[File, Arch] : InputFiles) - CmdArgs.push_back( - Args.MakeArgString("--image=profile=" + Arch + ",file=" + File)); + for (const OffloadingImage &Image : Images) { + StringRef File = Image.Image->getBufferIdentifier(); + StringRef Arch = Image.StringData.lookup("arch"); + + // Determine the kind based on image type + const char *Kind = "elf"; + if (Image.TheImageKind == ImageKind::IMG_PTX) + Kind = "ptx"; + + // Extract numeric SM value from arch + // Arch can be "sm_75", "compute_75", or just "75" + StringRef SMValue = Arch; + if (Arch.starts_with("sm_")) + SMValue = Arch.drop_front(3); + else if (Arch.starts_with("compute_")) + SMValue = Arch.drop_front(8); + + CmdArgs.push_back(Args.MakeArgString("--image3=kind=" + Twine(Kind) + + ",sm=" + SMValue + ",file=" + File)); + } if (Error Err = executeCommands(*FatBinaryPath, CmdArgs)) return std::move(Err); @@ -1992,12 +2008,7 @@ bundleSYCL(ArrayRef Images) { Expected>> bundleCuda(ArrayRef Images, const ArgList &Args) { - SmallVector, 4> InputFiles; - for (const OffloadingImage &Image : Images) - InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(), - Image.StringData.lookup("arch"))); - - auto FileOrErr = nvptx::fatbinary(InputFiles, Args); + auto FileOrErr = nvptx::fatbinary(Images, Args); if (!FileOrErr) return FileOrErr.takeError(); @@ -2279,7 +2290,7 @@ linkAndWrapDeviceFiles(ArrayRef> LinkerInputFiles, } for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { SmallVector Files = {SplitModules[I].ModuleFilePath}; - SmallVector, 4> BundlerInputFiles; + SmallVector BundlerImages; auto ClangOutputOrErr = linkDevice(Files, LinkerArgs, true /* IsSYCLKind */, CompileLinkOptionsOrErr->first); @@ -2292,14 +2303,35 @@ linkAndWrapDeviceFiles(ArrayRef> LinkerInputFiles, nvptx::ptxas(*ClangOutputOrErr, LinkerArgs, Arch); if (!PtxasOutputOrErr) return PtxasOutputOrErr.takeError(); - BundlerInputFiles.emplace_back(*ClangOutputOrErr, VirtualArch); - BundlerInputFiles.emplace_back(*PtxasOutputOrErr, Arch); - auto BundledFileOrErr = - nvptx::fatbinary(BundlerInputFiles, LinkerArgs); + + // Create OffloadingImage for PTX output + OffloadingImage PtxImage; + PtxImage.TheImageKind = ImageKind::IMG_PTX; + PtxImage.TheOffloadKind = OffloadKind::OFK_Cuda; + PtxImage.StringData["arch"] = VirtualArch; + auto PtxBuffer = MemoryBuffer::getFile(*ClangOutputOrErr); + if (!PtxBuffer) + return createFileError(*ClangOutputOrErr, PtxBuffer.getError()); + PtxImage.Image = std::move(*PtxBuffer); + BundlerImages.push_back(std::move(PtxImage)); + + // Create OffloadingImage for Cubin output + OffloadingImage CubinImage; + CubinImage.TheImageKind = ImageKind::IMG_Cubin; + CubinImage.TheOffloadKind = OffloadKind::OFK_Cuda; + CubinImage.StringData["arch"] = Arch; + auto CubinBuffer = MemoryBuffer::getFile(*PtxasOutputOrErr); + if (!CubinBuffer) + return createFileError(*PtxasOutputOrErr, CubinBuffer.getError()); + CubinImage.Image = std::move(*CubinBuffer); + BundlerImages.push_back(std::move(CubinImage)); + + auto BundledFileOrErr = nvptx::fatbinary(BundlerImages, LinkerArgs); if (!BundledFileOrErr) return BundledFileOrErr.takeError(); SplitModules[I].ModuleFilePath = *BundledFileOrErr; } else if (Triple.isAMDGCN()) { + SmallVector, 4> BundlerInputFiles; BundlerInputFiles.emplace_back(*ClangOutputOrErr, Arch); auto BundledFileOrErr = amdgcn::fatbinary(BundlerInputFiles, LinkerArgs);