diff options
Diffstat (limited to 'dev-libs/rocm-device-libs')
-rw-r--r-- | dev-libs/rocm-device-libs/Manifest | 3 | ||||
-rw-r--r-- | dev-libs/rocm-device-libs/files/rocm-device-libs-6.1.2-fix-build.patch | 152 | ||||
-rw-r--r-- | dev-libs/rocm-device-libs/rocm-device-libs-6.1.2.ebuild | 84 |
3 files changed, 239 insertions, 0 deletions
diff --git a/dev-libs/rocm-device-libs/Manifest b/dev-libs/rocm-device-libs/Manifest index 9a1c83c8664b..c72b5c47799d 100644 --- a/dev-libs/rocm-device-libs/Manifest +++ b/dev-libs/rocm-device-libs/Manifest @@ -7,7 +7,9 @@ AUX rocm-device-libs-6.0.0-add-gws-attribute.patch 581 BLAKE2B 40a8b66e44109a0d3 AUX rocm-device-libs-6.1.0-fix-llvm-link.patch 1089 BLAKE2B c14aa8fe06d351ba5ec75ae78a4fdd6a8c06bc4a2fff64214f2c28e538d33a07ad986743b5f7278fbaf771841766a368f44a0da17815461891fc09de81346842 SHA512 7ff795ff0db46331c001e5bb7cc7c0e671038f41596d0c3ed6da817146d169d9577c238cb2ecca3998e8a21fff0840e81760f3dd549dfaf1dbcb89ef8d9340a7 AUX rocm-device-libs-6.1.0-fix-test-failures.patch 3232 BLAKE2B ae76a41e48192bc5cc940a7769b9f264cda742eab52870eeba730403440aa52580dde5d3c735e79e261c6d0b7a5ddac96dc590e547f7fbe1a3f369d08ab38229 SHA512 10905ddca80c6013561849920382f9716ad7d72080241a84aca568dd4ae43845e22627d309f6423f994f068dd64af1db52d64896aa66a79e4c803c093d44dcda AUX rocm-device-libs-6.1.0-fix-test-failures2.patch 1492 BLAKE2B a75aa720c1573111280c6fc07c2f5fef4b152116081b6c9eb25844efa9412680c6301f6e3150bd0334265ff76db38ace8a21a9abd6db42692387f04b1d5534d1 SHA512 b265327fa21ecbabbaca2270d8a7022ca46f2890b597683b827848a3d503d26ac7eeb1edb292387694730f6e86dc46a69209d8266ea7aa156266e96052a198ed +AUX rocm-device-libs-6.1.2-fix-build.patch 4182 BLAKE2B 2fded91e25a4b0eb55c021047d7cee4d13f6f1a00da9e596bed45ad1292f1b30ffce2f08d34eb6d693a13bb5499b9e3e49e83dd7c71525ec054881965c6b1fd7 SHA512 e2d8bdaf4fde64a470d3b87134a81a1348806b757e5a0362faee4ffde34e442e52c47f233f01b1cc4129beb17c8a4378579faa3383a4d3d681b51eb719c3f38e DIST llvm-project-rocm-6.1.1.tar.gz 196027084 BLAKE2B 3ef0b6e3c47c66fd80289373e6ff8aaff44751f9b380addfae73a18dc388093c0535f230b0cc7528724bc43f6992e2ae6decd3d0d3c700893ca95a6166b7b8dc SHA512 e320d4eeaa6f61ed1cdbf653d67fe887d3ce9dc0d6743b4713502e1cb5318ab8afbe1ee71f8cba07635c54ce532df6683de40ade0e5be4a52e50ce25a9b70818 +DIST llvm-project-rocm-6.1.2.tar.gz 195992927 BLAKE2B d821f29f2f1f7c1ff414c63a710281f16d2a394b21f3365d01b86710cc09ed27e514b49fb744bf6a36b38815afa56cc26d44f0238f38479a0c2db9bf9989f389 SHA512 5f7e5dbe5976141de35e96e603624bd9d5a2c08b0690ba9fcd81d1b32f540f94bb9f4b74539e2838fd60ae1312dbe5e0b429ba80a03871782cdf3bd834940ce0 DIST rocm-device-libs-5.1.3.tar.gz 242862 BLAKE2B 68d66de897f461e9f876de5fe2214803d4c00665651dea6af0952f0ce579c6704a5ec41b08971fa613ade309a0a85cb611b56b592dc2a25e247183e634ea3378 SHA512 cc3dfb8d4b4841ba777355c537175259d0019159ff462358320674b85082cccd99f6462f60fee66228ddfb88fade043445c1bac62504aa1462ba61b7e2751de7 DIST rocm-device-libs-5.3.3.tar.gz 245690 BLAKE2B 475c0d818b8b0f090a8daeca2910cd4002e4cdf505d020327f46eb5f864a26937a6a3dfe4ff7b188ebda0f936b1c396f2163bb27b9e2a62c5976e60fa60856ac SHA512 8f6f2fc1534e348e02ba30a25cfc6017f8eab768968b5d0344a5ea7d65c4f0a874072f9e53919c74545814330602ef7c190753c7ff019137230e02f58a5d3a5d DIST rocm-device-libs-5.4.3.tar.gz 246095 BLAKE2B eb749346c96d465a5f22831968ccbd71f02749e6aa0d9c2becc0f378641ca0f65c1a131bfd3ed226f838b4208091fcc920b1e31b427adbd69a42881898668e6a SHA512 67b904363a3cff6c15bbd032cbc72cb5cd5f82acaa68c74391dbcf415266e8f35486a496b69b69e1fc0721e0e4e21fb6a6b9c180a46cb59cdcf53916be846ca4 @@ -19,4 +21,5 @@ EBUILD rocm-device-libs-5.4.3.ebuild 1250 BLAKE2B 6013dd693a8120b5eeac654c9bad19 EBUILD rocm-device-libs-5.5.1.ebuild 1206 BLAKE2B 4e40b4f468a1f3f2bba04aff69de95796f4df079c737b17812a979531a70d62da394655e00d1956d2dd55248141b255a293c1e19fd189e0df3e168f2da9d4d1c SHA512 98a5d239e61f5690bed1af56f8d38f9e188d2f9a3e5c0409dab1afeda7b825b114037e5319daf650ba75916a15c16fef8bb3e61c63ef60255bfea95b4d34d44d EBUILD rocm-device-libs-5.7.1.ebuild 1455 BLAKE2B 3185873cbed4c601d4d3b6b9142376a0af8c3d89718bf8e7a06215cfd9c25cefd956607f557cf721a3f9b68289b91212889d23888bffda7daf8d0c29b596c258 SHA512 f7db2da2818b9eaa4930dfac02e660e98a2b7dc5e326e1fde78acc98d5723a436052a628cf20254e5295a334980c9d350047503cb262ee4c9820d5ef889d170c EBUILD rocm-device-libs-6.1.1.ebuild 2249 BLAKE2B 0e118c5e35b8788affc6b20cbe2fd8473eeb8e1593c4e8b8a986cfcfe373cfdea367c267b0e4339dcb72759dadf75e742ddd696296576c7c61169c41f16440d0 SHA512 8a6a155224f845936279a0c33548d13253c66c9bc0c0b99399a82c69b611cebb76e2eea131dbcf74b6e6792e42c103f4e6f32582a2af25ccbd0567ec94ce6bea +EBUILD rocm-device-libs-6.1.2.ebuild 2292 BLAKE2B 52ba4d9b7935c5d4a6a4499429c59740dd95a169dabab27a9e530a73cd46b2b8091dccb2033ee1ecbf9e02ed82b65f9f0f8c04b5ca0942a6caba572a225debbf SHA512 43d758a86ac9c64fd3d63a9fbbbe379c6c71fe395fd7b3a6060cfe48bbcdc179a612663a063950e0a30e9a081ee65537d8654697bebd34ecec00339e075109df MISC metadata.xml 486 BLAKE2B 39c4a21b96ad471942a38d1c46b2600e3544729a87cd01c79606d495c595f271d5c04621ec581d2322686aca56de5d7c4b2b0d6125fc8db15b1236b2c093c8c9 SHA512 1a7d6570a687bb55877c6a5d9cbd7dc30a72ec0613b84e33be0982ff46b8d80d89e502e50364afdcc1022d7620fc10317a411b27c255caeafa2b45a85a43fd8c diff --git a/dev-libs/rocm-device-libs/files/rocm-device-libs-6.1.2-fix-build.patch b/dev-libs/rocm-device-libs/files/rocm-device-libs-6.1.2-fix-build.patch new file mode 100644 index 000000000000..e717627893b9 --- /dev/null +++ b/dev-libs/rocm-device-libs/files/rocm-device-libs-6.1.2-fix-build.patch @@ -0,0 +1,152 @@ +commit 83ef5b48800a47cc30b3d4bcfdf31de9c3bd0dc8 +Author: Patrick Lauer <patrick@gentoo.org> +Date: Sun Jul 28 07:43:54 2024 +0000 + + Revert "ockl: Don't use wave32 ballot builtin" + + This reverts commit 066a0b2716b7ade96a2b3e79e5ddcd0c110e9f98. + +diff --git a/ockl/src/dm.cl b/ockl/src/dm.cl +index 18efc54203b7..a3f06c448aee 100644 +--- a/ockl/src/dm.cl ++++ b/ockl/src/dm.cl +@@ -287,14 +287,9 @@ first(__global void * v) + return __builtin_astype(w2, __global void *); + } + +-// Read val from one active lane whose predicate is one. +-// If no lanes have the predicate set, return none +-// This is like first, except that first may not have its predicate set ++REQUIRES_WAVE64 + static uint +-elect_uint(int pred, uint val, uint none) +-{ +- // Pretend wave32 doesn't exist. The wave64 ballot works, and the high half +- // will fold out as 0. ++elect_uint_wave64(int pred, uint val, uint none) { + uint ret = none; + + ulong mask = __builtin_amdgcn_ballot_w64(pred != 0); +@@ -306,14 +301,51 @@ elect_uint(int pred, uint val, uint none) + return ret; + } + +-// Count the number of nonzero arguments across the wave ++REQUIRES_WAVE32 + static uint +-votes(bool b) ++elect_uint_wave32(int pred, uint val, uint none) { ++ uint ret = none; ++ uint mask = __builtin_amdgcn_ballot_w32(pred != 0); ++ if (mask != 0U) { ++ uint l = __ockl_ctz_u32(mask); ++ ret = __builtin_amdgcn_ds_bpermute(l << 2, val); ++ } ++ ++ return ret; ++} ++ ++// Read val from one active lane whose predicate is one. ++// If no lanes have the predicate set, return none ++// This is like first, except that first may not have its predicate set ++static uint ++elect_uint(int pred, uint val, uint none) ++{ ++ return __oclc_wavefrontsize64 ? elect_uint_wave64(pred, val, none) : elect_uint_wave32(pred, val, none); ++} ++ ++REQUIRES_WAVE64 ++static uint ++votes_wave64(bool b) + { + ulong mask = __builtin_amdgcn_ballot_w64(b); + return __builtin_popcountl(mask); + } + ++REQUIRES_WAVE32 ++static uint ++votes_wave32(bool b) ++{ ++ uint mask = __builtin_amdgcn_ballot_w32(b); ++ return __builtin_popcount(mask); ++} ++ ++// Count the number of nonzero arguments across the wave ++static uint ++votes(bool b) ++{ ++ return __oclc_wavefrontsize64 ? votes_wave64(b) : votes_wave32(b); ++} ++ + // The kind of the smallest block that can hold sz bytes + static uint + size_to_kind(uint sz) +diff --git a/ockl/src/wfaas.cl b/ockl/src/wfaas.cl +index 3861a5bb3eab..4dab97cea5f7 100644 +--- a/ockl/src/wfaas.cl ++++ b/ockl/src/wfaas.cl +@@ -21,25 +21,60 @@ static int optimizationBarrierHack(int in_val) + return out_val; + } + ++REQUIRES_WAVE32 ++static bool wfany_impl_w32(int e) { ++ return __builtin_amdgcn_ballot_w32(e) != 0; ++} ++ ++REQUIRES_WAVE64 ++static bool wfany_impl_w64(int e) { ++ return __builtin_amdgcn_ballot_w64(e) != 0; ++} ++ + ATTR bool + OCKL_MANGLE_I32(wfany)(int e) + { + e = optimizationBarrierHack(e); +- return __builtin_amdgcn_ballot_w64(e) != 0; ++ return __oclc_wavefrontsize64 ? ++ wfany_impl_w64(e) : wfany_impl_w32(e); ++} ++ ++REQUIRES_WAVE32 ++static bool wfall_impl_w32(int e) { ++ return __builtin_amdgcn_ballot_w32(e) == __builtin_amdgcn_read_exec_lo(); ++} ++ ++REQUIRES_WAVE64 ++static bool wfall_impl_w64(int e) { ++ return __builtin_amdgcn_ballot_w64(e) == __builtin_amdgcn_read_exec(); + } + + ATTR bool + OCKL_MANGLE_I32(wfall)(int e) + { + e = optimizationBarrierHack(e); +- return __builtin_amdgcn_ballot_w64(e) == __builtin_amdgcn_read_exec(); ++ return __oclc_wavefrontsize64 ? ++ wfall_impl_w64(e) : wfall_impl_w32(e); ++} ++ ++ ++REQUIRES_WAVE32 ++static bool wfsame_impl_w32(int e) { ++ uint u = __builtin_amdgcn_ballot_w32(e); ++ return (u == 0) | (u == __builtin_amdgcn_read_exec_lo()); ++} ++ ++REQUIRES_WAVE64 ++static bool wfsame_impl_w64(int e) { ++ ulong u = __builtin_amdgcn_ballot_w64(e); ++ return (u == 0UL) | (u == __builtin_amdgcn_read_exec()); + } + + ATTR bool + OCKL_MANGLE_I32(wfsame)(int e) + { + e = optimizationBarrierHack(e); +- ulong u = __builtin_amdgcn_ballot_w64(e); +- return (u == 0UL) | (u == __builtin_amdgcn_read_exec()); ++ return __oclc_wavefrontsize64 ? ++ wfsame_impl_w64(e) : wfsame_impl_w32(e); + } + diff --git a/dev-libs/rocm-device-libs/rocm-device-libs-6.1.2.ebuild b/dev-libs/rocm-device-libs/rocm-device-libs-6.1.2.ebuild new file mode 100644 index 000000000000..a6d5dd67438b --- /dev/null +++ b/dev-libs/rocm-device-libs/rocm-device-libs-6.1.2.ebuild @@ -0,0 +1,84 @@ +# Copyright 1999-2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +LLVM_COMPAT=( 18 ) +inherit cmake llvm-r1 + +MY_P=llvm-project-rocm-${PV} +components=( "amd/device-libs" ) + +if [[ ${PV} == *9999 ]] ; then + EGIT_REPO_URI="https://github.com/ROCm/llvm-project" + inherit git-r3 + S="${WORKDIR}/${P}/${components[0]}" +else + SRC_URI="https://github.com/ROCm/llvm-project/archive/rocm-${PV}.tar.gz -> ${MY_P}.tar.gz" + S="${WORKDIR}/${MY_P}/${components[0]}" + KEYWORDS="~amd64" +fi + +DESCRIPTION="Radeon Open Compute Device Libraries" +HOMEPAGE="https://github.com/ROCm/ROCm-Device-Libs" + +LICENSE="MIT" +SLOT="0/$(ver_cut 1-2)" +IUSE="test" +RESTRICT="!test? ( test )" + +BDEPEND=" + dev-build/rocm-cmake + $(llvm_gen_dep ' + sys-devel/clang:${LLVM_SLOT} + sys-devel/lld:${LLVM_SLOT} + ') +" + +CMAKE_BUILD_TYPE=Release + +PATCHES=( + "${FILESDIR}/${PN}-5.5.0-test-bitcode-dir.patch" + "${FILESDIR}/${PN}-6.1.0-fix-llvm-link.patch" + "${FILESDIR}/${PN}-6.0.0-add-gws-attribute.patch" + "${FILESDIR}/${PN}-6.1.0-fix-test-failures.patch" + "${FILESDIR}/${PN}-6.1.0-fix-test-failures2.patch" + "${FILESDIR}/${PN}-6.1.2-fix-build.patch" +) + +src_unpack() { + if [[ ${PV} == *9999 ]] ; then + git-r3_fetch + git-r3_checkout '' . '' "${components[@]}" + else + archive="${MY_P}.tar.gz" + ebegin "Unpacking from ${archive}" + tar -x -z -o \ + -f "${DISTDIR}/${archive}" \ + "${components[@]/#/${MY_P}/}" || die + eend ${?} + fi +} + +src_prepare() { + sed -e "s:amdgcn/bitcode:lib/amdgcn/bitcode:" -i "${S}/cmake/OCL.cmake" || die + sed -e "s:amdgcn/bitcode:lib/amdgcn/bitcode:" -i "${S}/cmake/Packages.cmake" || die + cmake_src_prepare +} + +src_configure() { + local mycmakeargs=( + -DLLVM_DIR="$(get_llvm_prefix)" + ) + # do not trust CMake with autoselecting Clang, as it autoselects the latest one + # producing too modern LLVM bitcode and causing linker errors in other packages + CC="$(get_llvm_prefix)/bin/clang" CXX="$(get_llvm_prefix)/bin/clang++" cmake_src_configure +} + +src_install() { + cmake_src_install + local CLANG_EXE="$(get_llvm_prefix)/bin/clang" + # install symlink, so that clang won't ask for "--rocm-device-lib-path" flag anymore + local bitcodedir="$("${CLANG_EXE}" -print-resource-dir)/$(get_libdir)/amdgcn/bitcode" + dosym -r "/usr/lib/amdgcn/bitcode" "${bitcodedir#${EPREFIX}}" +} |