diff options
Diffstat (limited to 'sci-libs/rocBLAS')
-rw-r--r-- | sci-libs/rocBLAS/Manifest | 5 | ||||
-rw-r--r-- | sci-libs/rocBLAS/files/rocBLAS-4.3.0-remove-problematic-test-suites.patch | 22 | ||||
-rw-r--r-- | sci-libs/rocBLAS/files/rocBLAS-4.3.0-unbundle-Tensile.patch | 44 | ||||
-rw-r--r-- | sci-libs/rocBLAS/rocBLAS-4.3.0-r1.ebuild | 147 | ||||
-rw-r--r-- | sci-libs/rocBLAS/rocBLAS-4.3.0.ebuild | 11 |
5 files changed, 223 insertions, 6 deletions
diff --git a/sci-libs/rocBLAS/Manifest b/sci-libs/rocBLAS/Manifest index 2135355d9bd3..ff8f0fa12732 100644 --- a/sci-libs/rocBLAS/Manifest +++ b/sci-libs/rocBLAS/Manifest @@ -3,7 +3,10 @@ AUX Tensile-4.3.0-output-commands.patch 1629 BLAKE2B c43395d8e8aeb956d5eb84c8a34 AUX rocBLAS-4.3.0-change-default-Tensile-library-dir.patch 1533 BLAKE2B d6b48397b1df8da0325590b79fdd351bf7db39fee662dc8809da03cff1715f5b952b7dbebb6889a08ca8b67067ca0626fc6350a825433eaf1e4747eee5f8d70e SHA512 318a00649aa80771722c1128b619320d03fd66fd9375f065e2c2f69cc2353261fa104fb1a5d0991490c92cb9e3a0c78b4232764e43d262f83738663621560f8f AUX rocBLAS-4.3.0-fix-glibc-2.32-and-above.patch 1254 BLAKE2B d3bcf526f374065ed0615c89ab74cbc203d2d6b6d0a1c49072f3328aaa40aa52f0fa155b23a936a865d353db32f48f348a64f05f157eb6bb2d55c93f9da364d1 SHA512 e899b26e3dc4717cb7854c16d104b9e070067f49e308fe6a38f1be5735a83a814da46ab41752cf4431b4c378d29f31c6082e1fe63e1bf5ae72a55b54a9017c15 AUX rocBLAS-4.3.0-link-system-blas.patch 1106 BLAKE2B 83f525072dc34edec01c56353d8c87f0f967c9cf07d054da8c1f4af632e095cd2af9562dea8eab8a2ddc7a71a9570e654b4a6b0f68c95508e92cc743350c8129 SHA512 94efab3d507fc90bd0d0b2cd0096e68b7669d1de2faf73fe0bfbc8c26c83db88596664bd61f394863194ad3108985b6206c4408c6d2ec93cd30ccec13b3b37f1 +AUX rocBLAS-4.3.0-remove-problematic-test-suites.patch 2240 BLAKE2B bdaf0111083e62f15d907b3faa536b1418a7273b3733386c14625ffed4c505b7d6a2318ae24a0dda286d57d4dac6befefa8005ef58452a9eb3928b78d337e214 SHA512 565d29c193ed04d45dc179ce06bbbdb0d472d66fe989286f4ccf29ccce39c118d79225ca80d2dbcb022833347b87e7573a78dc573dfde73d368e62256526eb93 +AUX rocBLAS-4.3.0-unbundle-Tensile.patch 1934 BLAKE2B 72622949ce96381b7e0b24dd7eff28eff492bb46d800f7d47e12d092662dc258a1e758d7a20fd27753b0e50ec2348abe0c0b4bb1a7e644d9522049e2a1df19b7 SHA512 36ca6740439a4eed672df7e8c52be87ca165d0095efa147480290e0c3db117ba2d524cf2ef759074b757bbba30e470660bcbbdf1d98fb97448101eda86cd6f67 DIST rocm-Tensile-4.3.0.tar.gz 14250149 BLAKE2B 030138eaca2a0aadd96801e6dbd72e510716dd90553ef3795c5e04e00a34a05ecae82b24f755e4033a4acfcdb1cf26291da1e7902bb090f89a010d403e832beb SHA512 126db0b413c716fba8d5be9bff7a44fd1badacbf32f3db8d0db649819177db37ebd56fd22dd3c809655f5d29675be115e698cd10bc3d0b4b23878ae3726fce47 DIST rocm-rocBLAS-4.3.0.tar.gz 11569970 BLAKE2B f11b0acf2bbd5737b036142d3b2cc1e18c38e088a8b7db58156d478dd6718befbb82bb7fd43a38fe64a5427124c5ad5241fa37977f094e6efd195d168d3e5f65 SHA512 490ab1c1e98a8b311ad5630515c448d9bb0a2bf588a08bcbebee345e2954d616b4ffe7c2e03dadca82c590438c42afccb98fe8ba00856aece101b5ffe62ef1e0 -EBUILD rocBLAS-4.3.0.ebuild 4139 BLAKE2B a089a65404724ffd406620053375ee8e62405643bd5827ff18440eaa5838d80d1bcc450fe46cdaf700e604421da1dd9e62a15e7c5a51e68cdfdb92474bbf97b4 SHA512 7c73c100050bbf4eb167f52e591d5858bcfa6d03210a8bbc3bbd572c33d3e839ef81643db3fd10403c25c4885d1c585fd4dd66e51b89c0d6308dfa88621c3c5f +EBUILD rocBLAS-4.3.0-r1.ebuild 3809 BLAKE2B 9054edbfe1b285efd53932a33e8df1de168ab8f64481e596b7d891efd838c87c64cd75093e3d738e9bc0c3499fa645c15fa3959b0492bb2eb648da1c4d2b92dd SHA512 44fa7416f2b9582f2cf7d1a5965942579350956c4a5408ef11dac725a8e248758ab1b974be768c4bcb486cc4179aa167ca17dded4d1c1b754f6efce1605d9dd0 +EBUILD rocBLAS-4.3.0.ebuild 4226 BLAKE2B 0e4814b92bf97e230dee8ff54565ca718d2482d12e3f31d194e6321a55b064f3653059dfef623d665ac5d25fadad20a8bca81ace83980c3ee8f5e62cb5c4cd24 SHA512 06528ef0cf5767ed0b9bc58794cddebfc7e247091ff78c4e0e8eeb8f75ed3cdfd5443044671837f9573cf0e576d8006ef6a7c1a96786f5d4a8f06657c5b6308c MISC metadata.xml 654 BLAKE2B b8a1d40e2ccc2aadccb9a1200e9c78fccb37544fa01c353645372a4ed01a6d153c04ff5d0d96cd7661e80b09eb6c69e8cbc59e466fef117ea1ad5ef54b1334ae SHA512 b880f8f495aee8c337b7cac4054a30f877a1204d0cda03fa90aaa25e4ae163d435c113141f2afef62967f603dea372419b228655e1b26fff53c010c8efdcebf1 diff --git a/sci-libs/rocBLAS/files/rocBLAS-4.3.0-remove-problematic-test-suites.patch b/sci-libs/rocBLAS/files/rocBLAS-4.3.0-remove-problematic-test-suites.patch new file mode 100644 index 000000000000..4265e999f322 --- /dev/null +++ b/sci-libs/rocBLAS/files/rocBLAS-4.3.0-remove-problematic-test-suites.patch @@ -0,0 +1,22 @@ +Those tests will fail comparing rocblas vs openblas, because the testing program is so strict that it cannot tolerate the numerical differences which is actually OK. + +https://github.com/ROCmSoftwarePlatform/rocBLAS/issues/1202 +--- orig/clients/gtest/known_bugs.yaml ++++ rocBLAS-rocm-4.3.0/clients/gtest/known_bugs.yaml +@@ -7,6 +7,16 @@ Known bugs: + - { function: gemm_ex, a_type: i8_r, b_type: i8_r, c_type: i32_r, d_type: i32_r, compute_type: i32_r, flags: 0, known_bug_platforms: "gfx900,gfx906,gfx1010,gfx1011,gfx1012,gfx1030" } + - { function: gemm_batched_ex, a_type: i8_r, b_type: i8_r, c_type: i32_r, d_type: i32_r, compute_type: i32_r, flags: 0, known_bug_platforms: "gfx900,gfx906,gfx90a,gfx1010,gfx1011,gfx1012,gfx1030" } + - { function: gemm_strided_batched_ex, a_type: i8_r, b_type: i8_r, c_type: i32_r, d_type: i32_r, compute_type: i32_r, flags: 0, known_bug_platforms: "gfx900,gfx906,gfx1010,gfx1011,gfx1012,gfx1030" } ++# gemv openblas reference differences due to summation order dependent roundoff accumulation with large M float complex ++# 8th significant digit difference vs CPU on single precision float math, leads to expected equality test failure ++# code needs to be changed to a tolerance test or reduce M for float complex type if using equality vs. CPU reference ++- { function: gemv, a_type: f32_c, transA: T, M: 131071 } ++- { function: gemv, a_type: f32_c, transA: C, M: 131071 } ++- { function: gemv_batched, a_type: f32_c, transA: T, M: 131071 } ++- { function: gemv_batched, a_type: f32_c, transA: C, M: 131071 } ++- { function: gemv_strided_batched, a_type: f32_c, transA: T, M: 131071 } ++- { function: gemv_strided_batched, a_type: f32_c, transA: C, M: 131071 } ++ + + #- { function: gemm_ex, a_type: bf16_r, b_type: bf16_r, c_type: bf16_r, d_type: bf16_r, compute_type: f32_r, transA: C, transB: N, M: 512, N: 512, K: 512, lda: 512, ldb: 512, ldc: 512, ldd: 512, alpha: 5.0, alphai: 0.0, beta: 0.0, betai: 0.0, known_bug_platforms: gfx908 } + #- { function: gemm_ex, a_type: bf16_r, b_type: bf16_r, c_type: bf16_r, d_type: bf16_r, compute_type: f32_r, transA: C, transB: N, M: 512, N: 512, K: 512, lda: 512, ldb: 512, ldc: 512, ldd: 512, alpha: 0.0, alphai: 0.0, beta: 3.0, betai: 0.0, known_bug_platforms: gfx908 } diff --git a/sci-libs/rocBLAS/files/rocBLAS-4.3.0-unbundle-Tensile.patch b/sci-libs/rocBLAS/files/rocBLAS-4.3.0-unbundle-Tensile.patch new file mode 100644 index 000000000000..f2bcff9a58db --- /dev/null +++ b/sci-libs/rocBLAS/files/rocBLAS-4.3.0-unbundle-Tensile.patch @@ -0,0 +1,44 @@ +diff --git a/cmake/virtualenv.cmake b/cmake/virtualenv.cmake +index 4c29c94..f9838c2 100644 +--- a/cmake/virtualenv.cmake ++++ b/cmake/virtualenv.cmake +@@ -11,7 +11,7 @@ set(VIRTUALENV_HOME_DIR ${CMAKE_BINARY_DIR}/virtualenv CACHE PATH "Path to virtu + function(virtualenv_create) + message("${VIRTUALENV_PYTHON_EXE} -m venv ${VIRTUALENV_HOME_DIR} --system-site-packages --clear") + execute_process( +- COMMAND ${VIRTUALENV_PYTHON_EXE} -m venv ${VIRTUALENV_HOME_DIR} --system-site-packages --clear ++ COMMAND true + ) + endfunction() + +@@ -21,7 +21,7 @@ function(virtualenv_install) + message("${VIRTUALENV_HOME_DIR}/bin/python -m pip install ${ARGN}") + execute_process( + RESULT_VARIABLE rc +- COMMAND ${VIRTUALENV_HOME_DIR}/bin/python -m pip install ${ARGN} ++ COMMAND true + ) + if(rc) + message(FATAL_ERROR ${rc}) +diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt +index a34ee87..bf94988 100755 +--- a/library/src/CMakeLists.txt ++++ b/library/src/CMakeLists.txt +@@ -99,9 +99,6 @@ if( BUILD_WITH_TENSILE ) + # Create a unique name for TensileHost compiled for rocBLAS + set_target_properties( TensileHost PROPERTIES OUTPUT_NAME rocblas-tensile CXX_EXTENSIONS NO ) + target_compile_definitions( TensileHost PUBLIC USE_TENSILE_HOST ) +- +- # Tensile host depends on libs build target +- add_dependencies( TensileHost TENSILE_LIBRARY_TARGET ) + else() + # Create a unique name for Tensile compiled for rocBLAS + set_target_properties( Tensile PROPERTIES OUTPUT_NAME rocblas-tensile CXX_EXTENSIONS NO ) +@@ -520,7 +517,6 @@ rocm_install_targets( + if( BUILD_WITH_TENSILE ) + if( BUILD_WITH_TENSILE_HOST ) + set( ROCBLAS_TENSILE_LIBRARY_DIR "\${CPACK_PACKAGING_INSTALL_PREFIX}rocblas/lib" CACHE PATH "path to tensile library" ) +- install(DIRECTORY ${CMAKE_BINARY_DIR}/Tensile/library DESTINATION ${ROCBLAS_TENSILE_LIBRARY_DIR}) + endif() + endif() + diff --git a/sci-libs/rocBLAS/rocBLAS-4.3.0-r1.ebuild b/sci-libs/rocBLAS/rocBLAS-4.3.0-r1.ebuild new file mode 100644 index 000000000000..13cf41fdd900 --- /dev/null +++ b/sci-libs/rocBLAS/rocBLAS-4.3.0-r1.ebuild @@ -0,0 +1,147 @@ +# Copyright 1999-2022 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +PYTHON_COMPAT=( python3_{8..10} ) +DOCS_BUILDER="doxygen" +DOCS_DIR="docs" +DOCS_DEPEND="media-gfx/graphviz" +inherit cmake docs prefix python-any-r1 + +DESCRIPTION="AMD's library for BLAS on ROCm" +HOMEPAGE="https://github.com/ROCmSoftwarePlatform/rocBLAS" +SRC_URI="https://github.com/ROCmSoftwarePlatform/rocBLAS/archive/rocm-${PV}.tar.gz -> rocm-${P}.tar.gz" +S="${WORKDIR}/${PN}-rocm-${PV}" + +LICENSE="BSD" +KEYWORDS="~amd64" +SLOT="0/$(ver_cut 1-2)" +IUSE="benchmark test" +RESTRICT="!test? ( test )" + +BDEPEND="${PYTHON_DEPS} + dev-util/rocm-cmake:${SLOT} + $(python_gen_any_dep ' + dev-util/Tensile[${PYTHON_USEDEP}] + ') + dev-util/Tensile:${SLOT} +" + +DEPEND=" + dev-util/hip:${SLOT} + dev-libs/msgpack + test? ( + virtual/blas + dev-cpp/gtest + sys-libs/libomp + ) + benchmark? ( + virtual/blas + sys-libs/libomp + ) +" + +PATCHES=( + "${FILESDIR}"/${PN}-4.3.0-fix-glibc-2.32-and-above.patch + "${FILESDIR}"/${PN}-4.3.0-change-default-Tensile-library-dir.patch + "${FILESDIR}"/${PN}-4.3.0-link-system-blas.patch + "${FILESDIR}"/${PN}-4.3.0-remove-problematic-test-suites.patch + "${FILESDIR}"/${PN}-4.3.0-unbundle-Tensile.patch +) + +python_check_deps() { + has_version "dev-util/Tensile[${PYTHON_USEDEP}]" +} + +src_prepare() { + cmake_src_prepare + # Fit for Gentoo FHS rule + sed -e "/PREFIX rocblas/d" \ + -e "/<INSTALL_INTERFACE/s:include:include/rocblas:" \ + -e "s:rocblas/include:include/rocblas:" \ + -e "s:\\\\\${CPACK_PACKAGING_INSTALL_PREFIX}rocblas/lib:${EPREFIX}/usr/$(get_libdir)/rocblas:" \ + -e "s:share/doc/rocBLAS:share/doc/${P}:" \ + -e "/rocm_install_symlink_subdir( rocblas )/d" -i library/src/CMakeLists.txt || die + + sed -e "s:,-rpath=.*\":\":" -i clients/CMakeLists.txt || die + + eprefixify library/src/tensile_host.cpp +} + +src_configure() { + # allow acces to hardware + addpredict /dev/kfd + addpredict /dev/dri/ + addpredict /dev/random + + export PATH="${EPREFIX}/usr/lib/llvm/roc/bin:${PATH}" + export TENSILE_SKIP_LIBRARY=1 + + local mycmakeargs=( + -DTensile_LOGIC="asm_full" + -DTensile_COMPILER="hipcc" + -DTensile_LIBRARY_FORMAT="msgpack" + -DTensile_CODE_OBJECT_VERSION="V3" + -DTensile_TEST_LOCAL_PATH="${EPREFIX}/usr/share/Tensile" + -DTensile_ROOT="${EPREFIX}/usr/share/Tensile" + -DBUILD_WITH_TENSILE=ON + -DBUILD_WITH_TENSILE_HOST=ON + -DCMAKE_INSTALL_PREFIX="${EPREFIX}/usr" + -DCMAKE_INSTALL_INCLUDEDIR="include/rocblas" + -DCMAKE_SKIP_RPATH=TRUE + -DBUILD_TESTING=OFF + -DBUILD_CLIENTS_SAMPLES=OFF + -DBUILD_CLIENTS_TESTS=$(usex test ON OFF) + -DBUILD_CLIENTS_BENCHMARKS=$(usex benchmark ON OFF) + ${AMDGPU_TARGETS+-DAMDGPU_TARGETS="${AMDGPU_TARGETS}"} + ) + + CXX="hipcc" cmake_src_configure + + # do not rerun cmake and the build process in src_install + sed -e '/RERUN/,+1d' -i "${BUILD_DIR}"/build.ninja || die +} + +src_compile() { + docs_compile + cmake_src_compile +} + +check_rw_permission() { + local cmd="[ -r $1 ] && [ -w $1 ]" + local error=0 user + if has sandbox ${FEATURES}; then + user="portage" + su portage -c "${cmd}" || error=1 + else + user="$(whoami)" + ${cmd} || error=1 + fi + if [[ "${error}" == 1 ]]; then + die "${user} do not have read and write permissions on $1! \n Make sure ${user} is in render group and check the permissions." + fi +} + +src_test() { + # check permissions on /dev/kfd and /dev/dri/render* + check_rw_permission /dev/kfd + check_rw_permission /dev/dri/render* + addwrite /dev/kfd + addwrite /dev/dri/ + cd "${BUILD_DIR}/clients/staging" || die + ./rocblas-test || die "Tests failed" +} + +src_install() { + cmake_src_install + + if use benchmark; then + cd "${BUILD_DIR}" || die + dolib.so clients/librocblas_fortran_client.so + dobin clients/staging/rocblas-bench + fi + + # Don't install the License (it is installed into the wrong dir) + rm "${ED}/usr/share/doc/${P}/LICENSE"* || die +} diff --git a/sci-libs/rocBLAS/rocBLAS-4.3.0.ebuild b/sci-libs/rocBLAS/rocBLAS-4.3.0.ebuild index 48945fe72456..3b4067eb964a 100644 --- a/sci-libs/rocBLAS/rocBLAS-4.3.0.ebuild +++ b/sci-libs/rocBLAS/rocBLAS-4.3.0.ebuild @@ -1,18 +1,18 @@ -# Copyright 1999-2021 Gentoo Authors +# Copyright 1999-2022 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 EAPI=7 PYTHON_COMPAT=( python3_{6..9} ) -inherit cmake prefix python-any-r1 +inherit cmake multiprocessing prefix python-any-r1 DESCRIPTION="AMD's library for BLAS on ROCm" HOMEPAGE="https://github.com/ROCmSoftwarePlatform/rocBLAS" SRC_URI="https://github.com/ROCmSoftwarePlatform/rocBLAS/archive/rocm-${PV}.tar.gz -> rocm-${P}.tar.gz https://github.com/ROCmSoftwarePlatform/Tensile/archive/rocm-${PV}.tar.gz -> rocm-Tensile-${PV}.tar.gz" -LICENSE="MIT" +LICENSE="BSD" KEYWORDS="~amd64" IUSE="benchmark test" SLOT="0/$(ver_cut 1-2)" @@ -46,7 +46,8 @@ S="${WORKDIR}"/${PN}-rocm-${PV} PATCHES=("${FILESDIR}"/${PN}-4.3.0-fix-glibc-2.32-and-above.patch "${FILESDIR}"/${PN}-4.3.0-change-default-Tensile-library-dir.patch - "${FILESDIR}"/${PN}-4.3.0-link-system-blas.patch ) + "${FILESDIR}"/${PN}-4.3.0-link-system-blas.patch + "${FILESDIR}"/${PN}-4.3.0-remove-problematic-test-suites.patch ) src_prepare() { eapply_user @@ -54,6 +55,7 @@ src_prepare() { pushd "${WORKDIR}"/Tensile-rocm-${PV} || die eapply "${FILESDIR}/Tensile-${PV}-hsaco-compile-specified-arch.patch" # backported from upstream, should remove after 4.3.0 eapply "${FILESDIR}/Tensile-4.3.0-output-commands.patch" + sed -e "/Number of parallel jobs to launch/s:default=-1:default=$(makeopts_jobs):" -i Tensile/TensileCreateLibrary.py || die popd || die # Fit for Gentoo FHS rule @@ -97,7 +99,6 @@ src_configure() { -DBUILD_CLIENTS_TESTS=$(usex test ON OFF) -DBUILD_CLIENTS_BENCHMARKS=$(usex benchmark ON OFF) ${AMDGPU_TARGETS+-DAMDGPU_TARGETS="${AMDGPU_TARGETS}"} - -D__skip_rocmclang="ON" ## fix cmake-3.21 configuration issue caused by officialy support programming language "HIP" ) CXX="hipcc" cmake_src_configure |