From d2ed973482fdd800013658e83a61709b29e0a80f Mon Sep 17 00:00:00 2001 From: V3n3RiX Date: Thu, 27 Jun 2024 07:59:40 +0100 Subject: gentoo auto-resync : 27:06:2024 - 07:59:39 --- ...omgr-6.0.0-extend-isa-compatibility-check.patch | 204 +++++++++++++++++++++ .../files/rocm-comgr-6.1.0-dont-add-nogpulib.patch | 31 ++++ .../rocm-comgr-6.1.0-enforce-oop-compiler.patch | 26 +++ .../rocm-comgr-6.1.0-fix-comgr-default-flags.patch | 51 ++++++ .../files/rocm-comgr-6.1.0-llvm-18-compat.patch | 79 ++++++++ 5 files changed, 391 insertions(+) create mode 100644 dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch create mode 100644 dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-dont-add-nogpulib.patch create mode 100644 dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-enforce-oop-compiler.patch create mode 100644 dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-fix-comgr-default-flags.patch create mode 100644 dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-llvm-18-compat.patch (limited to 'dev-libs/rocm-comgr/files') diff --git a/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch b/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch new file mode 100644 index 000000000000..e65400c792e4 --- /dev/null +++ b/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch @@ -0,0 +1,204 @@ +Load kernels when compatible by ISA, e. g. if AMDGPU_TARGETS is set +to gfx1030 and some application was started on gfx1036, it loads gfx1030 kernel. + +Based on Debian patch by Cordell Bloor +https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0026-extend-hip-isa-compatibility-check.patch +--- comgr.orig/src/comgr-metadata.cpp ++++ comgr/src/comgr-metadata.cpp +@@ -923,23 +923,86 @@ static constexpr const char *CLANG_OFFLOAD_BUNDLER_MAGIC = + static constexpr size_t OffloadBundleMagicLen = + strLiteralLength(CLANG_OFFLOAD_BUNDLER_MAGIC); + +-bool isCompatibleIsaName(StringRef IsaName, StringRef CodeObjectIsaName) { ++struct GfxPattern { ++ std::string root; ++ std::string suffixes; ++}; ++ ++static bool matches(const GfxPattern& p, StringRef s) { ++ if (p.root.size() + 1 != s.size()) { ++ return false; ++ } ++ if (0 != std::memcmp(p.root.data(), s.data(), p.root.size())) { ++ return false; ++ } ++ return p.suffixes.find(s[p.root.size()]) != std::string::npos; ++} ++ ++static bool isGfx900EquivalentProcessor(StringRef processor) { ++ return matches(GfxPattern{"gfx90", "029c"}, processor); ++} ++ ++static bool isGfx900SupersetProcessor(StringRef processor) { ++ return matches(GfxPattern{"gfx90", "0269c"}, processor); ++} ++ ++static bool isGfx1030EquivalentProcessor(StringRef processor) { ++ return matches(GfxPattern{"gfx103", "0123456"}, processor); ++} ++ ++static bool isGfx1010EquivalentProcessor(StringRef processor) { ++ return matches(GfxPattern{"gfx101", "0"}, processor); ++} ++ ++static bool isGfx1010SupersetProcessor(StringRef processor) { ++ return matches(GfxPattern{"gfx101", "0123"}, processor); ++} ++ ++enum CompatibilityScore { ++ CS_EXACT_MATCH = 1 << 4, ++ CS_PROCESSOR_MATCH = 1 << 3, ++ CS_PROCESSOR_COMPATIBLE = 1 << 2, ++ CS_XNACK_SPECIALIZED = 1 << 1, ++ CS_SRAM_ECC_SPECIALIZED = 1 << 0, ++ CS_INCOMPATIBLE = 0, ++}; ++ ++static int getProcessorCompatibilityScore(StringRef CodeObjectProcessor, ++ StringRef AgentProcessor) { ++ if (CodeObjectProcessor == AgentProcessor) { ++ return CS_PROCESSOR_MATCH; ++ } ++ ++ bool compatible = false; ++ if (isGfx900SupersetProcessor(AgentProcessor)) { ++ compatible = isGfx900EquivalentProcessor(CodeObjectProcessor); ++ } else if (isGfx1010SupersetProcessor(AgentProcessor)) { ++ compatible = isGfx1010EquivalentProcessor(CodeObjectProcessor); ++ } else if (isGfx1030EquivalentProcessor(AgentProcessor)) { ++ compatible = isGfx1030EquivalentProcessor(CodeObjectProcessor); ++ } ++ ++ return compatible ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE; ++} ++ ++static int getCompatiblityScore(StringRef IsaName, StringRef CodeObjectIsaName) { + if (IsaName == CodeObjectIsaName) { +- return true; ++ return CS_EXACT_MATCH; + } + + TargetIdentifier CodeObjectIdent; + if (parseTargetIdentifier(CodeObjectIsaName, CodeObjectIdent)) { +- return false; ++ return CS_INCOMPATIBLE; + } + + TargetIdentifier IsaIdent; + if (parseTargetIdentifier(IsaName, IsaIdent)) { +- return false; ++ return CS_INCOMPATIBLE; + } + +- if (CodeObjectIdent.Processor != IsaIdent.Processor) { +- return false; ++ int ProcessorScore = getProcessorCompatibilityScore(CodeObjectIdent.Processor, IsaIdent.Processor); ++ if (ProcessorScore == CS_INCOMPATIBLE) { ++ return CS_INCOMPATIBLE; + } + + char CodeObjectXnack = ' ', CodeObjectSramecc = ' '; +@@ -963,18 +1026,23 @@ bool isCompatibleIsaName(StringRef IsaName, StringRef CodeObjectIsaName) { + } + } + ++ int XnackBonus = 0; + if (CodeObjectXnack != ' ') { + if (CodeObjectXnack != IsaXnack) { +- return false; ++ return CS_INCOMPATIBLE; + } ++ XnackBonus = CS_XNACK_SPECIALIZED; + } + ++ int SrameccBonus = 0; + if (CodeObjectSramecc != ' ') { + if (CodeObjectSramecc != IsaSramecc) { +- return false; ++ return CS_INCOMPATIBLE; + } ++ SrameccBonus = CS_SRAM_ECC_SPECIALIZED; + } +- return true; ++ ++ return ProcessorScore + XnackBonus + SrameccBonus; + } + + amd_comgr_status_t +@@ -992,14 +1060,21 @@ lookUpCodeObjectInSharedObject(DataObject *DataP, + return Status; + } + ++ int MaxScore = 0; ++ unsigned MaxScoreItem; + for (unsigned J = 0; J < QueryListSize; J++) { +- if (isCompatibleIsaName(QueryList[J].isa, IsaName)) { +- QueryList[J].offset = 0; +- QueryList[J].size = DataP->Size; +- break; ++ int Score = getCompatiblityScore(QueryList[J].isa, IsaName); ++ if (Score > MaxScore) { ++ MaxScore = Score; ++ MaxScoreItem = J; + } + } + ++ if (MaxScore) { ++ QueryList[MaxScoreItem].offset = 0; ++ QueryList[MaxScoreItem].size = DataP->Size; ++ } ++ + return AMD_COMGR_STATUS_SUCCESS; + } + +@@ -1011,7 +1086,6 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP, + return lookUpCodeObjectInSharedObject(DataP, QueryList, QueryListSize); + } + +- int Seen = 0; + BinaryStreamReader Reader(StringRef(DataP->Data, DataP->Size), + support::little); + +@@ -1037,6 +1111,8 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP, + QueryList[I].size = 0; + } + ++ std::vector QueryListScores(QueryListSize); ++ + // For each code object, extract BundleEntryID information, and check that + // against each ISA in the QueryList + for (uint64_t I = 0; I < NumOfCodeObjects; I++) { +@@ -1069,28 +1145,22 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP, + } + + for (unsigned J = 0; J < QueryListSize; J++) { +- // If this QueryList item has already been found to be compatible with ++ // If this QueryList item has exact match with + // another BundleEntryID, no need to check against the current + // BundleEntryID +- if (QueryList[J].size != 0) { ++ if (QueryListScores[J] == CS_EXACT_MATCH) { + continue; + } + + // If the QueryList Isa is compatible with the BundleEntryID, set the + // QueryList offset/size to this BundleEntryID +- if (isCompatibleIsaName(QueryList[J].isa, OffloadAndTargetId.second)) { ++ int Score = getCompatiblityScore(QueryList[J].isa, OffloadAndTargetId.second); ++ if (Score > QueryListScores[J]) { ++ QueryListScores[J] = Score; + QueryList[J].offset = BundleEntryCodeObjectOffset; + QueryList[J].size = BundleEntryCodeObjectSize; +- Seen++; +- break; + } + } +- +- // Stop iterating over BundleEntryIDs once we have populated the entire +- // QueryList +- if (Seen == (int) QueryListSize) { +- break; +- } + } + + return AMD_COMGR_STATUS_SUCCESS; diff --git a/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-dont-add-nogpulib.patch b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-dont-add-nogpulib.patch new file mode 100644 index 000000000000..526318f5bbd9 --- /dev/null +++ b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-dont-add-nogpulib.patch @@ -0,0 +1,31 @@ +From 179ec2e67bf882c6bccb27f81db3d80f7eb9946e Mon Sep 17 00:00:00 2001 +From: Jacob Lambert +Date: Fri, 12 Apr 2024 13:56:42 -0700 +Subject: [PATCH] [Comgr] Don't add -nogpulib option for assembley action + +We can omit setting -nogpulib even without a -rocm-path=. option +when calling the assembly action. This avoids the following warning: + +warning: argument unused during compilation: '-nogpulib' +Change-Id: I66d512befbafd9382f050c45a0d3950985e8ae38 +--- + amd/comgr/src/comgr-compiler.cpp | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/comgr-compiler.cpp b/src/comgr-compiler.cpp +index 143ab4e4f3db..21b233fa94b2 100644 +--- a/src/comgr-compiler.cpp ++++ b/src/comgr-compiler.cpp +@@ -1758,6 +1758,9 @@ amd_comgr_status_t AMDGPUCompiler::assembleToRelocatable() { + Args.push_back("-x"); + Args.push_back("assembler"); + ++ // -nogpulib option not needed for assembling to relocatable ++ NoGpuLib = false; ++ + return processFiles(AMD_COMGR_DATA_KIND_RELOCATABLE, ".o"); + } + +-- +2.44.0 + diff --git a/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-enforce-oop-compiler.patch b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-enforce-oop-compiler.patch new file mode 100644 index 000000000000..99cbf2f22ce6 --- /dev/null +++ b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-enforce-oop-compiler.patch @@ -0,0 +1,26 @@ +In-process compilation breaks compile_source_to_executable test, as it attempts to +build .so as fatbin, and some options does not work with unpatched LLVM. +--- a/src/comgr-compiler.cpp ++++ b/src/comgr-compiler.cpp +@@ -1226,10 +1226,7 @@ amd_comgr_status_t AMDGPUCompiler::compileToFatBin() { + return AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT; + } + +- // This is a workaround to support HIP OOP Fatbin Compilation +- CompileOOP = true; + auto Status = processFiles(AMD_COMGR_DATA_KIND_FATBIN, ".fatbin"); +- CompileOOP = false; + + return Status; + } +--- a/src/comgr-compiler.h ++++ b/src/comgr-compiler.h +@@ -102,7 +102,7 @@ class AMDGPUCompiler { + std::string ClangIncludePath; + std::string ClangIncludePath2; + /// Perform out-of-process compilation. +- bool CompileOOP = false; ++ bool CompileOOP = true; + /// Precompiled header file paths. + llvm::SmallVector, 2> PrecompiledHeaders; + /// Arguments common to all driver invocations in the current action. diff --git a/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-fix-comgr-default-flags.patch b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-fix-comgr-default-flags.patch new file mode 100644 index 000000000000..d885da08ddae --- /dev/null +++ b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-fix-comgr-default-flags.patch @@ -0,0 +1,51 @@ +Remove HIP/ROCM includes ("-isystem /usr/include"), as they break inclusion of . +Add inclusion of Clang resource dir (e.g. /usr/lib/clang/17), as it is used in hip runtime like that. +Remove hardcoded target to fix HIP on musl. + +Issues: +* https://github.com/ROCm/clr/issues/82 +* https://github.com/ROCm/llvm-project/issues/92 +--- a/src/comgr-compiler.cpp ++++ b/src/comgr-compiler.cpp +@@ -1028,9 +1028,8 @@ AMDGPUCompiler::addTargetIdentifierFlags(llvm::StringRef IdentStr, + } + + amd_comgr_status_t AMDGPUCompiler::addCompilationFlags() { +- HIPIncludePath = (Twine(env::getHIPPath()) + "/include").str(); +- // HIP headers depend on hsa.h which is in ROCM_DIR/include. +- ROCMIncludePath = (Twine(env::getROCMPath()) + "/include").str(); ++ // Allow to include (used in some hip files) ++ ClangIncludePath = @CLANG_RESOURCE_DIR@; + + Args.push_back("-x"); + +@@ -1051,13 +1050,9 @@ amd_comgr_status_t AMDGPUCompiler::addCompilationFlags() { + case AMD_COMGR_LANGUAGE_HIP: + Args.push_back("hip"); + Args.push_back("-std=c++11"); +- Args.push_back("-target"); +- Args.push_back("x86_64-unknown-linux-gnu"); + Args.push_back("--cuda-device-only"); + Args.push_back("-isystem"); +- Args.push_back(ROCMIncludePath.c_str()); +- Args.push_back("-isystem"); +- Args.push_back(HIPIncludePath.c_str()); ++ Args.push_back(ClangIncludePath.c_str()); + break; + default: + return AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT; +--- a/src/comgr-compiler.h ++++ b/src/comgr-compiler.h +@@ -95,12 +95,7 @@ class AMDGPUCompiler { + /// User supplied target GPU Arch. + std::string GPUArch; + std::string OffloadArch; +- /// ROCM include Path +- std::string ROCMIncludePath; +- /// HIP and Clang Include Paths +- std::string HIPIncludePath; + std::string ClangIncludePath; +- std::string ClangIncludePath2; + /// Perform out-of-process compilation. + bool CompileOOP = false; + /// Precompiled header file paths. diff --git a/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-llvm-18-compat.patch b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-llvm-18-compat.patch new file mode 100644 index 000000000000..df008e4230d7 --- /dev/null +++ b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-llvm-18-compat.patch @@ -0,0 +1,79 @@ +ROCm 6.0.0 and 6.0.2 releases use mix between LLVM 17 and 18 +forked as https://github.com/RadeonOpenCompute/llvm-project +which makes some libraries compatible with LLVM 17, +while other require LLVM 18. + +Backports: +* https://github.com/ROCm/llvm-project/commit/6cbc4dc91dfeb1cf2295cb350866e0b3a07dfee4 +* https://github.com/ROCm/llvm-project/commit/179ec2e67bf882c6bccb27f81db3d80f7eb9946e +* https://github.com/ROCm/llvm-project/commit/ee123c3d1706bc4346511b1a9032020782576350 +--- a/src/comgr-compiler.cpp ++++ b/src/comgr-compiler.cpp +@@ -205,7 +205,11 @@ bool AssemblerInvocation::createFromArgs(AssemblerInvocation &Opts, + // Parse the arguments. + const OptTable &OptTbl = getDriverOptTable(); + ++#if LLVM_VERSION_MAJOR == 17 + const unsigned IncludedFlagsBitmask = options::CC1AsOption; ++#else ++ llvm::opt::Visibility IncludedFlagsBitmask(options::CC1AsOption); ++#endif + unsigned MissingArgIndex, MissingArgCount; + InputArgList Args = OptTbl.ParseArgs(Argv, MissingArgIndex, MissingArgCount, + IncludedFlagsBitmask); +@@ -1041,11 +1045,15 @@ amd_comgr_status_t AMDGPUCompiler::addCompilationFlags() { + Args.push_back("cl"); + Args.push_back("-std=cl1.2"); + Args.push_back("-cl-no-stdinc"); ++ Args.push_back("-mllvm"); ++ Args.push_back("-amdgpu-internalize-symbols"); + break; + case AMD_COMGR_LANGUAGE_OPENCL_2_0: + Args.push_back("cl"); + Args.push_back("-std=cl2.0"); + Args.push_back("-cl-no-stdinc"); ++ Args.push_back("-mllvm"); ++ Args.push_back("-amdgpu-internalize-symbols"); + break; + case AMD_COMGR_LANGUAGE_HIP: + Args.push_back("hip"); +@@ -1605,6 +1613,9 @@ amd_comgr_status_t AMDGPUCompiler::assembleToRelocatable() { + Args.push_back("-x"); + Args.push_back("assembler"); + ++ // -nogpulib option not needed for assembling to relocatable ++ NoGpuLib = false; ++ + return processFiles(AMD_COMGR_DATA_KIND_RELOCATABLE, ".o"); + } + +--- a/src/comgr-metadata.cpp ++++ b/src/comgr-metadata.cpp +@@ -1087,7 +1087,12 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP, + } + + BinaryStreamReader Reader(StringRef(DataP->Data, DataP->Size), +- support::little); ++#if LLVM_VERSION_MAJOR == 17 ++ support::little ++#else ++ llvm::endianness::little ++#endif ++ ); + + StringRef Magic; + if (auto EC = Reader.readFixedString(Magic, OffloadBundleMagicLen)) { +--- a/test/compile_log_remarks_test.c ++++ b/test/compile_log_remarks_test.c +@@ -107,7 +107,11 @@ int main(int argc, char *argv[]) { + AMD_COMGR_DATA_KIND_SOURCE, 1); + + checkLogs("AMD_COMGR_ACTION_CODEGEN_BC_TO_ASSEMBLY", DataSetAsm, ++#if LLVM_VERSION_MAJOR == 17 + "remark: :0:0: 8 stack bytes in function " ++#else ++ "remark: :0:0: 8 stack bytes in function 'f' " ++#endif + "[-Rpass-analysis=prologepilog]"); + + Status = amd_comgr_destroy_data_set(DataSetCl); -- cgit v1.2.3