summaryrefslogtreecommitdiff
path: root/dev-libs/rocm-comgr/files
diff options
context:
space:
mode:
authorV3n3RiX <venerix@koprulu.sector>2024-06-27 07:59:40 +0100
committerV3n3RiX <venerix@koprulu.sector>2024-06-27 07:59:40 +0100
commitd2ed973482fdd800013658e83a61709b29e0a80f (patch)
tree57ea7666a57b5a05a4c8866e4915e90b4a6e7c94 /dev-libs/rocm-comgr/files
parent9f6a82a85d400d6ae7de04c43cee88dbc6bc4da0 (diff)
gentoo auto-resync : 27:06:2024 - 07:59:39
Diffstat (limited to 'dev-libs/rocm-comgr/files')
-rw-r--r--dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch204
-rw-r--r--dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-dont-add-nogpulib.patch31
-rw-r--r--dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-enforce-oop-compiler.patch26
-rw-r--r--dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-fix-comgr-default-flags.patch51
-rw-r--r--dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-llvm-18-compat.patch79
5 files changed, 391 insertions, 0 deletions
diff --git a/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch b/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch
new file mode 100644
index 000000000000..e65400c792e4
--- /dev/null
+++ b/dev-libs/rocm-comgr/files/rocm-comgr-6.0.0-extend-isa-compatibility-check.patch
@@ -0,0 +1,204 @@
+Load kernels when compatible by ISA, e. g. if AMDGPU_TARGETS is set
+to gfx1030 and some application was started on gfx1036, it loads gfx1030 kernel.
+
+Based on Debian patch by Cordell Bloor <cgmb@slerp.xyz>
+https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0026-extend-hip-isa-compatibility-check.patch
+--- comgr.orig/src/comgr-metadata.cpp
++++ comgr/src/comgr-metadata.cpp
+@@ -923,23 +923,86 @@ static constexpr const char *CLANG_OFFLOAD_BUNDLER_MAGIC =
+ static constexpr size_t OffloadBundleMagicLen =
+ strLiteralLength(CLANG_OFFLOAD_BUNDLER_MAGIC);
+
+-bool isCompatibleIsaName(StringRef IsaName, StringRef CodeObjectIsaName) {
++struct GfxPattern {
++ std::string root;
++ std::string suffixes;
++};
++
++static bool matches(const GfxPattern& p, StringRef s) {
++ if (p.root.size() + 1 != s.size()) {
++ return false;
++ }
++ if (0 != std::memcmp(p.root.data(), s.data(), p.root.size())) {
++ return false;
++ }
++ return p.suffixes.find(s[p.root.size()]) != std::string::npos;
++}
++
++static bool isGfx900EquivalentProcessor(StringRef processor) {
++ return matches(GfxPattern{"gfx90", "029c"}, processor);
++}
++
++static bool isGfx900SupersetProcessor(StringRef processor) {
++ return matches(GfxPattern{"gfx90", "0269c"}, processor);
++}
++
++static bool isGfx1030EquivalentProcessor(StringRef processor) {
++ return matches(GfxPattern{"gfx103", "0123456"}, processor);
++}
++
++static bool isGfx1010EquivalentProcessor(StringRef processor) {
++ return matches(GfxPattern{"gfx101", "0"}, processor);
++}
++
++static bool isGfx1010SupersetProcessor(StringRef processor) {
++ return matches(GfxPattern{"gfx101", "0123"}, processor);
++}
++
++enum CompatibilityScore {
++ CS_EXACT_MATCH = 1 << 4,
++ CS_PROCESSOR_MATCH = 1 << 3,
++ CS_PROCESSOR_COMPATIBLE = 1 << 2,
++ CS_XNACK_SPECIALIZED = 1 << 1,
++ CS_SRAM_ECC_SPECIALIZED = 1 << 0,
++ CS_INCOMPATIBLE = 0,
++};
++
++static int getProcessorCompatibilityScore(StringRef CodeObjectProcessor,
++ StringRef AgentProcessor) {
++ if (CodeObjectProcessor == AgentProcessor) {
++ return CS_PROCESSOR_MATCH;
++ }
++
++ bool compatible = false;
++ if (isGfx900SupersetProcessor(AgentProcessor)) {
++ compatible = isGfx900EquivalentProcessor(CodeObjectProcessor);
++ } else if (isGfx1010SupersetProcessor(AgentProcessor)) {
++ compatible = isGfx1010EquivalentProcessor(CodeObjectProcessor);
++ } else if (isGfx1030EquivalentProcessor(AgentProcessor)) {
++ compatible = isGfx1030EquivalentProcessor(CodeObjectProcessor);
++ }
++
++ return compatible ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE;
++}
++
++static int getCompatiblityScore(StringRef IsaName, StringRef CodeObjectIsaName) {
+ if (IsaName == CodeObjectIsaName) {
+- return true;
++ return CS_EXACT_MATCH;
+ }
+
+ TargetIdentifier CodeObjectIdent;
+ if (parseTargetIdentifier(CodeObjectIsaName, CodeObjectIdent)) {
+- return false;
++ return CS_INCOMPATIBLE;
+ }
+
+ TargetIdentifier IsaIdent;
+ if (parseTargetIdentifier(IsaName, IsaIdent)) {
+- return false;
++ return CS_INCOMPATIBLE;
+ }
+
+- if (CodeObjectIdent.Processor != IsaIdent.Processor) {
+- return false;
++ int ProcessorScore = getProcessorCompatibilityScore(CodeObjectIdent.Processor, IsaIdent.Processor);
++ if (ProcessorScore == CS_INCOMPATIBLE) {
++ return CS_INCOMPATIBLE;
+ }
+
+ char CodeObjectXnack = ' ', CodeObjectSramecc = ' ';
+@@ -963,18 +1026,23 @@ bool isCompatibleIsaName(StringRef IsaName, StringRef CodeObjectIsaName) {
+ }
+ }
+
++ int XnackBonus = 0;
+ if (CodeObjectXnack != ' ') {
+ if (CodeObjectXnack != IsaXnack) {
+- return false;
++ return CS_INCOMPATIBLE;
+ }
++ XnackBonus = CS_XNACK_SPECIALIZED;
+ }
+
++ int SrameccBonus = 0;
+ if (CodeObjectSramecc != ' ') {
+ if (CodeObjectSramecc != IsaSramecc) {
+- return false;
++ return CS_INCOMPATIBLE;
+ }
++ SrameccBonus = CS_SRAM_ECC_SPECIALIZED;
+ }
+- return true;
++
++ return ProcessorScore + XnackBonus + SrameccBonus;
+ }
+
+ amd_comgr_status_t
+@@ -992,14 +1060,21 @@ lookUpCodeObjectInSharedObject(DataObject *DataP,
+ return Status;
+ }
+
++ int MaxScore = 0;
++ unsigned MaxScoreItem;
+ for (unsigned J = 0; J < QueryListSize; J++) {
+- if (isCompatibleIsaName(QueryList[J].isa, IsaName)) {
+- QueryList[J].offset = 0;
+- QueryList[J].size = DataP->Size;
+- break;
++ int Score = getCompatiblityScore(QueryList[J].isa, IsaName);
++ if (Score > MaxScore) {
++ MaxScore = Score;
++ MaxScoreItem = J;
+ }
+ }
+
++ if (MaxScore) {
++ QueryList[MaxScoreItem].offset = 0;
++ QueryList[MaxScoreItem].size = DataP->Size;
++ }
++
+ return AMD_COMGR_STATUS_SUCCESS;
+ }
+
+@@ -1011,7 +1086,6 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP,
+ return lookUpCodeObjectInSharedObject(DataP, QueryList, QueryListSize);
+ }
+
+- int Seen = 0;
+ BinaryStreamReader Reader(StringRef(DataP->Data, DataP->Size),
+ support::little);
+
+@@ -1037,6 +1111,8 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP,
+ QueryList[I].size = 0;
+ }
+
++ std::vector<int> QueryListScores(QueryListSize);
++
+ // For each code object, extract BundleEntryID information, and check that
+ // against each ISA in the QueryList
+ for (uint64_t I = 0; I < NumOfCodeObjects; I++) {
+@@ -1069,28 +1145,22 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP,
+ }
+
+ for (unsigned J = 0; J < QueryListSize; J++) {
+- // If this QueryList item has already been found to be compatible with
++ // If this QueryList item has exact match with
+ // another BundleEntryID, no need to check against the current
+ // BundleEntryID
+- if (QueryList[J].size != 0) {
++ if (QueryListScores[J] == CS_EXACT_MATCH) {
+ continue;
+ }
+
+ // If the QueryList Isa is compatible with the BundleEntryID, set the
+ // QueryList offset/size to this BundleEntryID
+- if (isCompatibleIsaName(QueryList[J].isa, OffloadAndTargetId.second)) {
++ int Score = getCompatiblityScore(QueryList[J].isa, OffloadAndTargetId.second);
++ if (Score > QueryListScores[J]) {
++ QueryListScores[J] = Score;
+ QueryList[J].offset = BundleEntryCodeObjectOffset;
+ QueryList[J].size = BundleEntryCodeObjectSize;
+- Seen++;
+- break;
+ }
+ }
+-
+- // Stop iterating over BundleEntryIDs once we have populated the entire
+- // QueryList
+- if (Seen == (int) QueryListSize) {
+- break;
+- }
+ }
+
+ return AMD_COMGR_STATUS_SUCCESS;
diff --git a/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-dont-add-nogpulib.patch b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-dont-add-nogpulib.patch
new file mode 100644
index 000000000000..526318f5bbd9
--- /dev/null
+++ b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-dont-add-nogpulib.patch
@@ -0,0 +1,31 @@
+From 179ec2e67bf882c6bccb27f81db3d80f7eb9946e Mon Sep 17 00:00:00 2001
+From: Jacob Lambert <jacob.lambert@amd.com>
+Date: Fri, 12 Apr 2024 13:56:42 -0700
+Subject: [PATCH] [Comgr] Don't add -nogpulib option for assembley action
+
+We can omit setting -nogpulib even without a -rocm-path=. option
+when calling the assembly action. This avoids the following warning:
+
+warning: argument unused during compilation: '-nogpulib'
+Change-Id: I66d512befbafd9382f050c45a0d3950985e8ae38
+---
+ amd/comgr/src/comgr-compiler.cpp | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/src/comgr-compiler.cpp b/src/comgr-compiler.cpp
+index 143ab4e4f3db..21b233fa94b2 100644
+--- a/src/comgr-compiler.cpp
++++ b/src/comgr-compiler.cpp
+@@ -1758,6 +1758,9 @@ amd_comgr_status_t AMDGPUCompiler::assembleToRelocatable() {
+ Args.push_back("-x");
+ Args.push_back("assembler");
+
++ // -nogpulib option not needed for assembling to relocatable
++ NoGpuLib = false;
++
+ return processFiles(AMD_COMGR_DATA_KIND_RELOCATABLE, ".o");
+ }
+
+--
+2.44.0
+
diff --git a/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-enforce-oop-compiler.patch b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-enforce-oop-compiler.patch
new file mode 100644
index 000000000000..99cbf2f22ce6
--- /dev/null
+++ b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-enforce-oop-compiler.patch
@@ -0,0 +1,26 @@
+In-process compilation breaks compile_source_to_executable test, as it attempts to
+build .so as fatbin, and some options does not work with unpatched LLVM.
+--- a/src/comgr-compiler.cpp
++++ b/src/comgr-compiler.cpp
+@@ -1226,10 +1226,7 @@ amd_comgr_status_t AMDGPUCompiler::compileToFatBin() {
+ return AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT;
+ }
+
+- // This is a workaround to support HIP OOP Fatbin Compilation
+- CompileOOP = true;
+ auto Status = processFiles(AMD_COMGR_DATA_KIND_FATBIN, ".fatbin");
+- CompileOOP = false;
+
+ return Status;
+ }
+--- a/src/comgr-compiler.h
++++ b/src/comgr-compiler.h
+@@ -102,7 +102,7 @@ class AMDGPUCompiler {
+ std::string ClangIncludePath;
+ std::string ClangIncludePath2;
+ /// Perform out-of-process compilation.
+- bool CompileOOP = false;
++ bool CompileOOP = true;
+ /// Precompiled header file paths.
+ llvm::SmallVector<llvm::SmallString<128>, 2> PrecompiledHeaders;
+ /// Arguments common to all driver invocations in the current action.
diff --git a/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-fix-comgr-default-flags.patch b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-fix-comgr-default-flags.patch
new file mode 100644
index 000000000000..d885da08ddae
--- /dev/null
+++ b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-fix-comgr-default-flags.patch
@@ -0,0 +1,51 @@
+Remove HIP/ROCM includes ("-isystem /usr/include"), as they break inclusion of <math.h>.
+Add inclusion of Clang resource dir (e.g. /usr/lib/clang/17), as it is used in hip runtime like that.
+Remove hardcoded target to fix HIP on musl.
+
+Issues:
+* https://github.com/ROCm/clr/issues/82
+* https://github.com/ROCm/llvm-project/issues/92
+--- a/src/comgr-compiler.cpp
++++ b/src/comgr-compiler.cpp
+@@ -1028,9 +1028,8 @@ AMDGPUCompiler::addTargetIdentifierFlags(llvm::StringRef IdentStr,
+ }
+
+ amd_comgr_status_t AMDGPUCompiler::addCompilationFlags() {
+- HIPIncludePath = (Twine(env::getHIPPath()) + "/include").str();
+- // HIP headers depend on hsa.h which is in ROCM_DIR/include.
+- ROCMIncludePath = (Twine(env::getROCMPath()) + "/include").str();
++ // Allow to include <include/cuda_wrappers/algorithm> (used in some hip files)
++ ClangIncludePath = @CLANG_RESOURCE_DIR@;
+
+ Args.push_back("-x");
+
+@@ -1051,13 +1050,9 @@ amd_comgr_status_t AMDGPUCompiler::addCompilationFlags() {
+ case AMD_COMGR_LANGUAGE_HIP:
+ Args.push_back("hip");
+ Args.push_back("-std=c++11");
+- Args.push_back("-target");
+- Args.push_back("x86_64-unknown-linux-gnu");
+ Args.push_back("--cuda-device-only");
+ Args.push_back("-isystem");
+- Args.push_back(ROCMIncludePath.c_str());
+- Args.push_back("-isystem");
+- Args.push_back(HIPIncludePath.c_str());
++ Args.push_back(ClangIncludePath.c_str());
+ break;
+ default:
+ return AMD_COMGR_STATUS_ERROR_INVALID_ARGUMENT;
+--- a/src/comgr-compiler.h
++++ b/src/comgr-compiler.h
+@@ -95,12 +95,7 @@ class AMDGPUCompiler {
+ /// User supplied target GPU Arch.
+ std::string GPUArch;
+ std::string OffloadArch;
+- /// ROCM include Path
+- std::string ROCMIncludePath;
+- /// HIP and Clang Include Paths
+- std::string HIPIncludePath;
+ std::string ClangIncludePath;
+- std::string ClangIncludePath2;
+ /// Perform out-of-process compilation.
+ bool CompileOOP = false;
+ /// Precompiled header file paths.
diff --git a/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-llvm-18-compat.patch b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-llvm-18-compat.patch
new file mode 100644
index 000000000000..df008e4230d7
--- /dev/null
+++ b/dev-libs/rocm-comgr/files/rocm-comgr-6.1.0-llvm-18-compat.patch
@@ -0,0 +1,79 @@
+ROCm 6.0.0 and 6.0.2 releases use mix between LLVM 17 and 18
+forked as https://github.com/RadeonOpenCompute/llvm-project
+which makes some libraries compatible with LLVM 17,
+while other require LLVM 18.
+
+Backports:
+* https://github.com/ROCm/llvm-project/commit/6cbc4dc91dfeb1cf2295cb350866e0b3a07dfee4
+* https://github.com/ROCm/llvm-project/commit/179ec2e67bf882c6bccb27f81db3d80f7eb9946e
+* https://github.com/ROCm/llvm-project/commit/ee123c3d1706bc4346511b1a9032020782576350
+--- a/src/comgr-compiler.cpp
++++ b/src/comgr-compiler.cpp
+@@ -205,7 +205,11 @@ bool AssemblerInvocation::createFromArgs(AssemblerInvocation &Opts,
+ // Parse the arguments.
+ const OptTable &OptTbl = getDriverOptTable();
+
++#if LLVM_VERSION_MAJOR == 17
+ const unsigned IncludedFlagsBitmask = options::CC1AsOption;
++#else
++ llvm::opt::Visibility IncludedFlagsBitmask(options::CC1AsOption);
++#endif
+ unsigned MissingArgIndex, MissingArgCount;
+ InputArgList Args = OptTbl.ParseArgs(Argv, MissingArgIndex, MissingArgCount,
+ IncludedFlagsBitmask);
+@@ -1041,11 +1045,15 @@ amd_comgr_status_t AMDGPUCompiler::addCompilationFlags() {
+ Args.push_back("cl");
+ Args.push_back("-std=cl1.2");
+ Args.push_back("-cl-no-stdinc");
++ Args.push_back("-mllvm");
++ Args.push_back("-amdgpu-internalize-symbols");
+ break;
+ case AMD_COMGR_LANGUAGE_OPENCL_2_0:
+ Args.push_back("cl");
+ Args.push_back("-std=cl2.0");
+ Args.push_back("-cl-no-stdinc");
++ Args.push_back("-mllvm");
++ Args.push_back("-amdgpu-internalize-symbols");
+ break;
+ case AMD_COMGR_LANGUAGE_HIP:
+ Args.push_back("hip");
+@@ -1605,6 +1613,9 @@ amd_comgr_status_t AMDGPUCompiler::assembleToRelocatable() {
+ Args.push_back("-x");
+ Args.push_back("assembler");
+
++ // -nogpulib option not needed for assembling to relocatable
++ NoGpuLib = false;
++
+ return processFiles(AMD_COMGR_DATA_KIND_RELOCATABLE, ".o");
+ }
+
+--- a/src/comgr-metadata.cpp
++++ b/src/comgr-metadata.cpp
+@@ -1087,7 +1087,12 @@ amd_comgr_status_t lookUpCodeObject(DataObject *DataP,
+ }
+
+ BinaryStreamReader Reader(StringRef(DataP->Data, DataP->Size),
+- support::little);
++#if LLVM_VERSION_MAJOR == 17
++ support::little
++#else
++ llvm::endianness::little
++#endif
++ );
+
+ StringRef Magic;
+ if (auto EC = Reader.readFixedString(Magic, OffloadBundleMagicLen)) {
+--- a/test/compile_log_remarks_test.c
++++ b/test/compile_log_remarks_test.c
+@@ -107,7 +107,11 @@ int main(int argc, char *argv[]) {
+ AMD_COMGR_DATA_KIND_SOURCE, 1);
+
+ checkLogs("AMD_COMGR_ACTION_CODEGEN_BC_TO_ASSEMBLY", DataSetAsm,
++#if LLVM_VERSION_MAJOR == 17
+ "remark: <unknown>:0:0: 8 stack bytes in function "
++#else
++ "remark: <unknown>:0:0: 8 stack bytes in function 'f' "
++#endif
+ "[-Rpass-analysis=prologepilog]");
+
+ Status = amd_comgr_destroy_data_set(DataSetCl);